Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
- Playstation: Do not upload attachments if quota is 0. ([#5770](https://github.com/getsentry/relay/pull/5770))
- Add payload byte size to trace metrics. ([#5764](https://github.com/getsentry/relay/pull/5764))
- Mix kafka partition key with org id. ([#5772](https://github.com/getsentry/relay/pull/5772))
- Set a trace_id on all events by default for internal use. ([#5759](https://github.com/getsentry/relay/pull/5759))

## 26.3.1

Expand Down
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4404,6 +4404,7 @@ dependencies = [
"insta",
"minidump",
"opentelemetry-proto",
"rand 0.9.2",
"relay-base-schema",
"relay-common",
"relay-conventions",
Expand Down
1 change: 1 addition & 0 deletions relay-cabi/src/processing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event(
span_allowed_hosts: &[], // only supported in relay
span_op_defaults: Default::default(), // only supported in relay
performance_issues_spans: Default::default(),
derive_trace_id: Default::default(),
};
normalize_event(&mut event, &normalization_config);

Expand Down
3 changes: 3 additions & 0 deletions relay-dynamic-config/src/feature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ pub enum Feature {
/// Upload non-prosperodmp playstation attachments via the upload-endpoint.
#[serde(rename = "projects:relay-playstation-uploads")]
PlaystationUploads,
/// Add a random trace ID to events that lack one.
#[serde(rename = "organizations:relay-default-trace-id")]
AddDefaultTraceID,

/// Enables OTLP spans to use the Span V2 processing pipeline in Relay.
///
Expand Down
143 changes: 141 additions & 2 deletions relay-event-normalization/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ pub struct NormalizationConfig<'a> {

/// Set a flag to enable performance issue detection on spans.
pub performance_issues_spans: bool,

/// Should add a random trace ID to events that lack one.
pub derive_trace_id: bool,
}

impl Default for NormalizationConfig<'_> {
Expand Down Expand Up @@ -201,6 +204,7 @@ impl Default for NormalizationConfig<'_> {
span_allowed_hosts: Default::default(),
span_op_defaults: Default::default(),
performance_issues_spans: Default::default(),
derive_trace_id: Default::default(),
}
}
}
Expand Down Expand Up @@ -333,8 +337,11 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) {
Ok(())
});

// We know this exists thanks to normalize_default_attributes.
let event_id = event.id.value().unwrap().0;

// Some contexts need to be normalized before metrics extraction takes place.
normalize_contexts(&mut event.contexts);
normalize_contexts(&mut event.contexts, event_id, config);

if config.normalize_spans && event.ty.value() == Some(&EventType::Transaction) {
span::reparent_broken_spans::reparent_broken_spans(event);
Expand Down Expand Up @@ -1307,13 +1314,29 @@ fn remove_logger_word(tokens: &mut Vec<&str>) {
}

/// Normalizes incoming contexts for the downstream metric extraction.
fn normalize_contexts(contexts: &mut Annotated<Contexts>) {
fn normalize_contexts(
contexts: &mut Annotated<Contexts>,
event_id: Uuid,
config: &NormalizationConfig,
) {
if config.derive_trace_id {
// We will always need a TraceContext.
let _ = contexts.get_or_insert_with(Contexts::new);
}

let _ = processor::apply(contexts, |contexts, _meta| {
// Reprocessing context sent from SDKs must not be accepted, it is a Sentry-internal
// construct.
// [`normalize`] does not run on renormalization anyway.
contexts.0.remove("reprocessing");

// We need a TraceId to ingest the event into EAP.
// If the event lacks a TraceContext, add a random one.

if config.derive_trace_id && !contexts.contains::<TraceContext>() {
contexts.add(TraceContext::random(event_id))
}

Comment on lines +1333 to +1339
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is one of two non-test changes.

for annotated in &mut contexts.0.values_mut() {
if let Some(ContextInner(Context::Trace(context))) = annotated.value_mut() {
context.status.get_or_insert_with(|| SpanStatus::Unknown);
Expand Down Expand Up @@ -4003,6 +4026,122 @@ mod tests {
"###);
}

#[test]
fn test_normalize_adds_trace_id() {
let json = r#"
{
"type": "transaction",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we actually want this normalization to also apply to transactions? If not, we should probably filter on the event type in normalize_contexts.

"timestamp": "2021-04-26T08:00:05+0100",
"start_timestamp": "2021-04-26T08:00:00+0100",
"measurements": {
"inp": {"value": 120.0}
}
}
"#;

let mut event = Annotated::<Event>::from_json(json).unwrap().0.unwrap();

let performance_score: PerformanceScoreConfig = serde_json::from_value(json!({
"profiles": [
{
"name": "Desktop",
"scoreComponents": [
{
"measurement": "inp",
"weight": 1.0,
"p10": 0.1,
"p50": 0.25
},
],
"condition": {
"op":"and",
"inner": []
},
"version": "beta"
}
]
}))
.unwrap();

normalize(
&mut event,
&mut Meta::default(),
&NormalizationConfig {
performance_score: Some(&performance_score),
derive_trace_id: true,
..Default::default()
},
);

insta::assert_ron_snapshot!(SerializableAnnotated(&event.contexts), {
".event_id" => "[event-id]",
".trace.trace_id" => "[trace-id]",
".trace.span_id" => "[span-id]"
}, @r#"
{
"performance_score": {
"score_profile_version": "beta",
"type": "performancescore",
},
"trace": {
"trace_id": "[trace-id]",
"span_id": "[span-id]",
"status": "unknown",
"exclusive_time": 5000.0,
"type": "trace",
},
"_meta": {
"trace": {
"span_id": {
"": Meta(Some(MetaInner(
rem: [
[
"span_id.missing",
s,
],
],
))),
},
"trace_id": {
"": Meta(Some(MetaInner(
rem: [
[
"trace_id.missing",
s,
],
],
))),
},
},
},
}
"#);
insta::assert_ron_snapshot!(SerializableAnnotated(&event.measurements), {}, @r###"
{
"inp": {
"value": 120.0,
"unit": "millisecond",
},
"score.inp": {
"value": 0.0,
"unit": "ratio",
},
"score.ratio.inp": {
"value": 0.0,
"unit": "ratio",
},
"score.total": {
"value": 0.0,
"unit": "ratio",
},
"score.weight.inp": {
"value": 1.0,
"unit": "ratio",
},
}
"###);
}

#[test]
fn test_computes_standalone_cls_performance_score() {
let json = r#"
Expand Down
1 change: 1 addition & 0 deletions relay-event-schema/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ debugid = { workspace = true, features = ["serde"] }
enumset = { workspace = true }
minidump = { workspace = true }
opentelemetry-proto = { workspace = true, features = ["gen-tonic", "trace"] }
rand = { workspace = true }
relay-common = { workspace = true }
relay-conventions = { workspace = true }
relay-base-schema = { workspace = true }
Expand Down
51 changes: 51 additions & 0 deletions relay-event-schema/src/protocol/contexts/trace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,13 @@ pub struct SpanId([u8; 8]);

relay_common::impl_str_serde!(SpanId, "a span identifier");

impl SpanId {
pub fn random() -> Self {
let value: u64 = rand::random_range(1..=u64::MAX);
Self(value.to_ne_bytes())
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: SpanId::random() uses native-endian bytes (to_ne_bytes), but serialization/deserialization (Display/FromStr) expect big-endian. This causes incorrect roundtripping on little-endian systems.
Severity: MEDIUM

Suggested Fix

In SpanId::random(), change the call from rng.gen::<u64>().to_ne_bytes() to rng.gen::<u64>().to_be_bytes(). This will align the byte order during generation with the big-endian order expected by the Display and FromStr implementations, ensuring correct serialization and deserialization roundtrips.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: relay-event-schema/src/protocol/contexts/trace.rs#L174

Potential issue: A byte-ordering inconsistency exists in `SpanId` handling. The
`SpanId::random()` function generates an ID using native-endian byte order
(`to_ne_bytes()`), while the `Display` and `FromStr` implementations, used for
serialization and deserialization, assume big-endian byte order. On common little-endian
architectures like x86_64, this causes a randomly generated `SpanId` to fail a
serialization-deserialization roundtrip. For example, a generated ID will be displayed
with its bytes reversed, and parsing that string back will result in a different ID
value. This can lead to data integrity issues and broken trace continuity if these
randomly generated IDs are ever stored and re-ingested.

}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SpanId byte order inconsistency between random and from_str

Low Severity

SpanId::random() stores the u64 using to_ne_bytes() (native endian), but SpanId::from_str() stores it using to_be_bytes() (big endian). On little-endian platforms (virtually all modern servers), these produce different byte orderings for the same u64 value. While the Display/from_str round-trip happens to preserve bytes correctly, this inconsistency means randomly-generated SpanIds follow a different internal byte convention than parsed ones. Any future code that interprets the inner [u8; 8] as a u64 via from_be_bytes (the convention implied by from_str) would get wrong results for random SpanIds. Using to_be_bytes() here would maintain consistency.

Fix in Cursor Fix in Web

Comment on lines +172 to +175
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: SpanId::random() uses native-endian bytes (to_ne_bytes), while parsing assumes big-endian, causing incorrect serialization and parsing on little-endian systems.
Severity: HIGH

Suggested Fix

Ensure consistent endianness across SpanId creation, serialization, and parsing. Modify SpanId::random() to use to_be_bytes() instead of to_ne_bytes(). This will align the byte order of randomly generated IDs with the expectations of the FromStr and Display implementations, guaranteeing correct behavior on all architectures.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: relay-event-schema/src/protocol/contexts/trace.rs#L172-L175

Potential issue: The `SpanId::random()` function generates a `u64` and converts it to
bytes using `to_ne_bytes()`. On little-endian systems, which include most modern server
architectures, this produces a little-endian byte array. However, the `Display`
implementation serializes these bytes in their storage order, creating a byte-reversed
hex string. When this string is later parsed by `FromStr`, it is interpreted as a
big-endian value, resulting in a different `SpanId` value than the one originally
generated. This will cause incorrect span IDs to be sent to downstream systems that
expect standard big-endian hex encoding.

}

impl FromStr for SpanId {
type Err = Error;

Expand Down Expand Up @@ -324,6 +331,23 @@ pub struct TraceContext {
pub other: Object<Value>,
}

impl TraceContext {
/// Generates a random [`SpanId`] and takes `[TraceId]` from the event's UUID.
/// Leaves all other fields blank.
pub fn random(event_id: Uuid) -> Self {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Method named random but trace_id is deterministic

Low Severity

TraceContext::random(event_id) derives trace_id deterministically from the event UUID, not randomly. Only the span_id is actually random. The method was originally fully random (first iteration of the PR) but was changed to derive from event_id without updating the name. This is misleading — callers seeing TraceContext::random(event_id) would reasonably assume the output is fully random, not that it embeds the event UUID as the trace ID.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 9927ecc. Configure here.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could rename to auto.

let mut trace_meta = Meta::default();
trace_meta.add_remark(Remark::new(RemarkType::Substituted, "trace_id.missing"));

let mut span_meta = Meta::default();
span_meta.add_remark(Remark::new(RemarkType::Substituted, "span_id.missing"));
Comment on lines +339 to +342
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How will these remarks show up in the UI if we merge this right now? Might be worth trying with a local relay hooked up to production sentry.io before merging (see https://github.com/getsentry/relay/?tab=readme-ov-file#building-and-running). I'm worried that UI annotates this as some kind of processing error, and then users see this on almost every error event.

An alternative would be to not set a remark at all, and rather set the origin field of the trace context to something like "relay".

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The UI doesn't show meta at all right now from the spans dataset. It's a task for a project upcoming shortly (attribute explorer).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The UI doesn't show meta at all right now from the spans dataset

But it does for the errors data set, right? I.e. JSON ends up in nodestore, and the views in Issues will render at least some of the _meta.

TraceContext {
trace_id: Annotated(Some(TraceId::from(event_id)), trace_meta),
span_id: Annotated(Some(SpanId::random()), span_meta),
..Default::default()
}
}
}
Comment on lines +334 to +349
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is one of two non-test changes.


impl super::DefaultContext for TraceContext {
fn default_key() -> &'static str {
"trace"
Expand Down Expand Up @@ -615,4 +639,31 @@ mod tests {
let remark = annotated.meta().iter_remarks().next().unwrap();
assert_eq!(remark.rule_id(), "trace_id.invalid");
}

#[test]
fn test_random_trace_context() {
let rand_context = TraceContext::random(Uuid::new_v4());
assert!(rand_context.trace_id.value().is_some());
assert_eq!(
rand_context
.trace_id
.meta()
.iter_remarks()
.next()
.unwrap()
.rule_id(),
"trace_id.missing"
);
assert!(rand_context.span_id.value().is_some());
assert_eq!(
rand_context
.span_id
.meta()
.iter_remarks()
.next()
.unwrap()
.rule_id(),
"span_id.missing"
);
}
}
1 change: 1 addition & 0 deletions relay-server/src/processing/utils/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ pub fn normalize(
performance_issues_spans: ctx
.project_info
.has_feature(Feature::PerformanceIssuesSpans),
derive_trace_id: project_info.has_feature(Feature::AddDefaultTraceID),
};

metric!(timer(RelayTimers::EventProcessingNormalization), {
Expand Down
Loading