-
Notifications
You must be signed in to change notification settings - Fork 23
BB-764: Add OpenTelemetry tracing to backbeat replication pipeline #2733
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: development/9.3
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,5 @@ | ||
| require('../lib/otel'); | ||
|
|
||
| const async = require('async'); | ||
| const schedule = require('node-schedule'); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| 'use strict'; | ||
| require('../../../lib/otel'); | ||
|
|
||
| const werelogs = require('werelogs'); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -23,6 +23,26 @@ const { | |||||||||||
| } | ||||||||||||
| } = require('./constants'); | ||||||||||||
|
|
||||||||||||
| const { context: otelContext, SpanStatusCode } = require('@opentelemetry/api'); | ||||||||||||
| const { | ||||||||||||
| startSpanFromKafkaEntry, | ||||||||||||
| startLinkedSpanFromKafkaEntry, | ||||||||||||
| } = require('./tracing/kafkaTraceContext'); | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
| * Detect whether a Kafka entry carries link-traceparent (fan-out break) | ||||||||||||
| * rather than standard traceparent. Link semantics mean the consumer | ||||||||||||
| * starts a NEW root trace that references the upstream via OTEL Link, | ||||||||||||
| * instead of becoming a child of it. | ||||||||||||
| */ | ||||||||||||
| function entryHasLinkHeaders(entry) { | ||||||||||||
| if (!entry || !Array.isArray(entry.headers)) return false; | ||||||||||||
| for (const h of entry.headers) { | ||||||||||||
| if (h['link-traceparent']) return true; | ||||||||||||
| } | ||||||||||||
| return false; | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| const CLIENT_ID = 'BackbeatConsumer'; | ||||||||||||
| const { withTopicPrefix } = require('./util/topic'); | ||||||||||||
|
|
||||||||||||
|
|
@@ -514,7 +534,22 @@ class BackbeatConsumer extends EventEmitter { | |||||||||||
| const { topic, partition } = entry; | ||||||||||||
| KafkaBacklogMetrics.onTaskStarted(topic, partition, this._groupId); | ||||||||||||
|
|
||||||||||||
| this._queueProcessor(entry, (err, completionArgs) => done(err, completionArgs, finishProcessingTask)); | ||||||||||||
| const { ctx, span } = entryHasLinkHeaders(entry) | ||||||||||||
| ? startLinkedSpanFromKafkaEntry(entry, `${topic}.process`) | ||||||||||||
| : startSpanFromKafkaEntry(entry, `${topic}.process`); | ||||||||||||
| span.setAttribute('messaging.kafka.topic', topic); | ||||||||||||
| span.setAttribute('messaging.kafka.partition', partition); | ||||||||||||
|
|
||||||||||||
| otelContext.with(ctx, () => { | ||||||||||||
| this._queueProcessor(entry, (err, completionArgs) => { | ||||||||||||
| if (err) { | ||||||||||||
| span.recordException(err); | ||||||||||||
| span.setStatus({ code: SpanStatusCode.ERROR }); | ||||||||||||
| } | ||||||||||||
| span.end(); | ||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. recordException only adds an event to the span — it does not set the span status. Without calling span.setStatus({ code: SpanStatusCode.ERROR }), failed spans will still appear as OK in Jaeger. Add span.setStatus({ code: 2 }) (SpanStatusCode.ERROR) alongside recordException. — Claude Code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
— Claude Code |
||||||||||||
| done(err, completionArgs, finishProcessingTask); | ||||||||||||
| }); | ||||||||||||
| }); | ||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
|
|
||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider adding messaging.kafka.consumer_group — it helps distinguish spans from different consumer groups in Jaeger (e.g. replication vs lifecycle processors running in the same cluster).
```suggestion
span.setAttribute('messaging.kafka.topic', topic);
span.setAttribute('messaging.kafka.partition', partition);
span.setAttribute('messaging.kafka.consumer_group', this._groupId);