Skip to content

Commit 8772bd3

Browse files
committed
feat: add uniqueRowIdExpression, use in log where query
1 parent 337ebff commit 8772bd3

10 files changed

Lines changed: 197 additions & 41 deletions

File tree

.changeset/tidy-pets-shout.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"@hyperdx/api": patch
3+
"@hyperdx/app": patch
4+
---
5+
6+
fix: filter logs with ID field

docker/otel-collector/schema/seed/00002_otel_logs.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ CREATE TABLE IF NOT EXISTS ${DATABASE}.otel_logs
2525
`__hdx_materialized_k8s.pod.name` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.pod.name'] CODEC(ZSTD(1)),
2626
`__hdx_materialized_k8s.pod.uid` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.pod.uid'] CODEC(ZSTD(1)),
2727
`__hdx_materialized_deployment.environment.name` LowCardinality(String) MATERIALIZED ResourceAttributes['deployment.environment.name'] CODEC(ZSTD(1)),
28+
`__hdx_id` UInt16 MATERIALIZED toUInt16(rand()),
2829
INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1,
2930
INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
3031
INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE ${DATABASE}.otel_logs DROP COLUMN IF EXISTS `__hdx_id`;
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ALTER TABLE ${DATABASE}.otel_logs
2+
ADD COLUMN IF NOT EXISTS `__hdx_id` UInt16
3+
MATERIALIZED toUInt16(rand());

packages/app/src/__tests__/source.test.ts

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
import { ColumnMeta } from '@hyperdx/common-utils/dist/clickhouse';
2+
import { Metadata } from '@hyperdx/common-utils/dist/core/metadata';
13
import { SourceKind, TTraceSource } from '@hyperdx/common-utils/dist/types';
24

3-
import { getEventBody } from '../source';
5+
import { getEventBody, inferTableSourceConfig } from '../source';
46

57
describe('getEventBody', () => {
68
// Added to prevent regression back to HDX-3361
@@ -29,3 +31,86 @@ describe('getEventBody', () => {
2931
expect(result).toBe('SpanName');
3032
});
3133
});
34+
35+
describe('inferTableSourceConfig', () => {
36+
const col = (name: string, type = 'String'): ColumnMeta => ({
37+
name,
38+
type,
39+
codec_expression: '',
40+
comment: '',
41+
default_expression: '',
42+
default_type: '',
43+
ttl_expression: '',
44+
});
45+
46+
const OTEL_LOG_COLUMNS = [
47+
col('Timestamp', "DateTime64(9, 'UTC')"),
48+
col('TimestampTime', 'DateTime'),
49+
col('Body'),
50+
col('SeverityText'),
51+
col('TraceId'),
52+
col('SpanId'),
53+
col('ServiceName'),
54+
col('LogAttributes', 'Map(String, String)'),
55+
col('ResourceAttributes', 'Map(String, String)'),
56+
];
57+
58+
const baseArgs = {
59+
databaseName: 'default',
60+
tableName: 'otel_logs',
61+
connectionId: 'test-conn',
62+
};
63+
64+
function mockMetadata(columns: ColumnMeta[]): Metadata {
65+
return {
66+
getColumns: jest.fn().mockResolvedValue(columns),
67+
getTableMetadata: jest.fn().mockResolvedValue({
68+
primary_key: 'ServiceName, TimestampTime',
69+
}),
70+
} as unknown as Metadata;
71+
}
72+
73+
it('should set uniqueRowIdExpression when __hdx_id column exists on otel log table', async () => {
74+
const columns = [...OTEL_LOG_COLUMNS, col('__hdx_id', 'UInt16')];
75+
const result = await inferTableSourceConfig({
76+
...baseArgs,
77+
kind: SourceKind.Log,
78+
metadata: mockMetadata(columns),
79+
});
80+
expect(result).toHaveProperty('uniqueRowIdExpression', '__hdx_id');
81+
});
82+
83+
it('should not set uniqueRowIdExpression when __hdx_id column is missing', async () => {
84+
const result = await inferTableSourceConfig({
85+
...baseArgs,
86+
kind: SourceKind.Log,
87+
metadata: mockMetadata(OTEL_LOG_COLUMNS),
88+
});
89+
expect(result).not.toHaveProperty('uniqueRowIdExpression');
90+
});
91+
92+
it('should not set uniqueRowIdExpression for trace sources even if __hdx_id exists', async () => {
93+
const OTEL_TRACE_COLUMNS = [
94+
col('Timestamp', "DateTime64(9, 'UTC')"),
95+
col('SpanName'),
96+
col('Duration', 'UInt64'),
97+
col('SpanKind'),
98+
col('TraceId'),
99+
col('SpanId'),
100+
col('ParentSpanId'),
101+
col('ServiceName'),
102+
col('SpanAttributes', 'Map(String, String)'),
103+
col('ResourceAttributes', 'Map(String, String)'),
104+
col('StatusCode'),
105+
col('StatusMessage'),
106+
col('__hdx_id', 'UInt16'),
107+
];
108+
const result = await inferTableSourceConfig({
109+
...baseArgs,
110+
tableName: 'otel_traces',
111+
kind: SourceKind.Trace,
112+
metadata: mockMetadata(OTEL_TRACE_COLUMNS),
113+
});
114+
expect(result).not.toHaveProperty('uniqueRowIdExpression');
115+
});
116+
});

packages/app/src/components/DBRowTable.tsx

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,14 +1374,15 @@ export const RawLogTable = memo(
13741374
},
13751375
);
13761376

1377-
export function appendSelectWithPrimaryAndPartitionKey(
1377+
export function appendSelectWithAdditionalKeys(
13781378
select: SelectList,
13791379
primaryKeys: string,
13801380
partitionKey: string,
1381+
extraKeys: string[] = [],
13811382
): { select: SelectList; additionalKeysLength: number } {
13821383
const partitionKeyArr = extractColumnReferencesFromKey(partitionKey);
13831384
const primaryKeyArr = extractColumnReferencesFromKey(primaryKeys);
1384-
const allKeys = new Set([...partitionKeyArr, ...primaryKeyArr]);
1385+
const allKeys = new Set([...partitionKeyArr, ...primaryKeyArr, ...extraKeys]);
13851386
if (typeof select === 'string') {
13861387
const selectSplit = splitAndTrimWithBracket(select);
13871388
const selectColumns = new Set(selectSplit);
@@ -1407,15 +1408,19 @@ function getSelectLength(select: SelectList): number {
14071408
}
14081409
}
14091410

1410-
export function useConfigWithPrimaryAndPartitionKey(
1411+
export function useConfigWithAdditionalSelect(
14111412
config: BuilderChartConfigWithDateRange,
1413+
sourceId?: string,
14121414
) {
14131415
const { data: tableMetadata } = useTableMetadata({
14141416
databaseName: config.from.databaseName,
14151417
tableName: config.from.tableName,
14161418
connectionId: config.connection,
14171419
});
14181420

1421+
// We're only interested in `uniqueRowIdExpression` for logs.
1422+
const { data: source } = useSource({ id: sourceId, kinds: [SourceKind.Log] });
1423+
14191424
const primaryKey = tableMetadata?.primary_key;
14201425
const partitionKey = tableMetadata?.partition_key;
14211426

@@ -1424,14 +1429,14 @@ export function useConfigWithPrimaryAndPartitionKey(
14241429
return undefined;
14251430
}
14261431

1427-
const { select, additionalKeysLength } =
1428-
appendSelectWithPrimaryAndPartitionKey(
1429-
config.select,
1430-
primaryKey,
1431-
partitionKey,
1432-
);
1432+
const { select, additionalKeysLength } = appendSelectWithAdditionalKeys(
1433+
config.select,
1434+
primaryKey,
1435+
partitionKey,
1436+
source?.uniqueRowIdExpression ? [source.uniqueRowIdExpression] : [],
1437+
);
14331438
return { ...config, select, additionalKeysLength };
1434-
}, [primaryKey, partitionKey, config]);
1439+
}, [primaryKey, partitionKey, config, source]);
14351440

14361441
return mergedConfig;
14371442
}
@@ -1564,7 +1569,7 @@ function DBSqlRowTableComponent({
15641569
return base;
15651570
}, [me, config, orderByArray]);
15661571

1567-
const mergedConfig = useConfigWithPrimaryAndPartitionKey(mergedConfigObj);
1572+
const mergedConfig = useConfigWithAdditionalSelect(mergedConfigObj, sourceId);
15681573

15691574
const { data, fetchNextPage, hasNextPage, isFetching, isError, error } =
15701575
useOffsetPaginatedQuery(mergedConfig ?? config, {

packages/app/src/components/Sources/SourceForm.tsx

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,20 @@ function LogTableModelForm(props: TableModelProps) {
12141214
disableKeywordAutocomplete
12151215
/>
12161216
</FormRow>
1217+
<FormRow
1218+
label={'Unique Row Identifier Expression'}
1219+
helpText="Expression used to disambiguate rows with identical visible column values."
1220+
>
1221+
<SQLInlineEditorControlled
1222+
tableConnection={{
1223+
databaseName,
1224+
tableName,
1225+
connectionId,
1226+
}}
1227+
control={control}
1228+
name="uniqueRowIdExpression"
1229+
/>
1230+
</FormRow>
12171231
<Divider />
12181232
<FormRow
12191233
label={'Correlated Metric Source'}
@@ -1254,21 +1268,6 @@ function LogTableModelForm(props: TableModelProps) {
12541268
</FormRow>
12551269

12561270
<Divider />
1257-
{/* <FormRow
1258-
label={'Unique Row ID Expression'}
1259-
helpText="Unique identifier for a given row, will be primary key if not specified. Used for showing full row details in search results."
1260-
>
1261-
<SQLInlineEditorControlled
1262-
tableConnection={{
1263-
databaseName,
1264-
tableName,
1265-
connectionId,
1266-
}}
1267-
control={control}
1268-
name="uniqueRowIdExpression"
1269-
placeholder="Timestamp, ServiceName, Body"
1270-
/>
1271-
</FormRow> */}
12721271
{/* <FormRow label={'Table Filter Expression'}>
12731272
<SQLInlineEditorControlled
12741273
tableConnection={{

packages/app/src/components/__tests__/DBRowTable.test.tsx

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { screen } from '@testing-library/react';
22
import userEvent from '@testing-library/user-event';
33

44
import {
5-
appendSelectWithPrimaryAndPartitionKey,
5+
appendSelectWithAdditionalKeys,
66
RawLogTable,
77
} from '@/components/DBRowTable';
88
import { RowWhereResult } from '@/hooks/useRowWhere';
@@ -145,9 +145,9 @@ describe('RawLogTable', () => {
145145
});
146146
});
147147

148-
describe('appendSelectWithPrimaryAndPartitionKey', () => {
148+
describe('appendSelectWithAdditionalKeys', () => {
149149
it('should extract columns from partition key with nested function call', () => {
150-
const result = appendSelectWithPrimaryAndPartitionKey(
150+
const result = appendSelectWithAdditionalKeys(
151151
'col1, col2',
152152
'id, created_at',
153153
' toStartOfInterval(timestamp, toIntervalDay(3))',
@@ -159,15 +159,15 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => {
159159
});
160160

161161
it('should extract no columns from empty primary key and partition key', () => {
162-
const result = appendSelectWithPrimaryAndPartitionKey('col1, col2', '', '');
162+
const result = appendSelectWithAdditionalKeys('col1, col2', '', '', []);
163163
expect(result).toEqual({
164164
additionalKeysLength: 0,
165165
select: 'col1,col2',
166166
});
167167
});
168168

169169
it('should extract columns from complex primary key', () => {
170-
const result = appendSelectWithPrimaryAndPartitionKey(
170+
const result = appendSelectWithAdditionalKeys(
171171
'col1, col2',
172172
'id, timestamp, toStartOfInterval(timestamp2, toIntervalDay(3))',
173173
"toStartOfInterval(timestamp, toIntervalDay(3)), date_diff('DAY', col3, col4), now(), toDate(col5 + INTERVAL 1 DAY)",
@@ -179,7 +179,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => {
179179
});
180180

181181
it('should extract map columns', () => {
182-
const result = appendSelectWithPrimaryAndPartitionKey(
182+
const result = appendSelectWithAdditionalKeys(
183183
'col1, col2',
184184
`map['key']`,
185185
`map2['key'], map1['key3 ']`,
@@ -191,7 +191,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => {
191191
});
192192

193193
it('should extract map columns', () => {
194-
const result = appendSelectWithPrimaryAndPartitionKey(
194+
const result = appendSelectWithAdditionalKeys(
195195
'col1, col2',
196196
``,
197197
`map2['key.2']`,
@@ -203,7 +203,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => {
203203
});
204204

205205
it('should extract array columns', () => {
206-
const result = appendSelectWithPrimaryAndPartitionKey(
206+
const result = appendSelectWithAdditionalKeys(
207207
'col1, col2',
208208
`array[1]`,
209209
`array[2], array[3]`,
@@ -215,7 +215,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => {
215215
});
216216

217217
it('should extract json columns', () => {
218-
const result = appendSelectWithPrimaryAndPartitionKey(
218+
const result = appendSelectWithAdditionalKeys(
219219
'col1, col2',
220220
`json.b`,
221221
`json.a, json.b.c, toStartOfDay(timestamp, json_2.d)`,
@@ -227,7 +227,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => {
227227
});
228228

229229
it('should extract json columns with type specifiers', () => {
230-
const result = appendSelectWithPrimaryAndPartitionKey(
230+
const result = appendSelectWithAdditionalKeys(
231231
'col1, col2',
232232
`json.b.:Int64`,
233233
`toStartOfDay(json.a.b.:DateTime)`,
@@ -239,7 +239,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => {
239239
});
240240

241241
it('should skip json columns with hard-to-parse type specifiers', () => {
242-
const result = appendSelectWithPrimaryAndPartitionKey(
242+
const result = appendSelectWithAdditionalKeys(
243243
'col1, col2',
244244
`json.b.:Array(String), col3`,
245245
``,
@@ -251,7 +251,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => {
251251
});
252252

253253
it('should skip nested map references', () => {
254-
const result = appendSelectWithPrimaryAndPartitionKey(
254+
const result = appendSelectWithAdditionalKeys(
255255
'col1, col2',
256256
`map['key']['key2'], col3`,
257257
``,
@@ -261,4 +261,53 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => {
261261
select: `col1,col2,col3`,
262262
});
263263
});
264+
265+
it('should append extraKeys to string select', () => {
266+
const result = appendSelectWithAdditionalKeys('col1, col2', 'id', '', [
267+
'__hdx_id',
268+
]);
269+
expect(result).toEqual({
270+
additionalKeysLength: 2,
271+
select: 'col1,col2,id,__hdx_id',
272+
});
273+
});
274+
275+
it('should not duplicate extraKeys already in select', () => {
276+
const result = appendSelectWithAdditionalKeys('col1, __hdx_id', 'id', '', [
277+
'__hdx_id',
278+
]);
279+
expect(result).toEqual({
280+
additionalKeysLength: 1,
281+
select: 'col1,__hdx_id,id',
282+
});
283+
});
284+
285+
it('should deduplicate extraKeys that overlap with primary/partition keys', () => {
286+
const result = appendSelectWithAdditionalKeys('col1, col2', 'id', '', [
287+
'id',
288+
'__hdx_id',
289+
]);
290+
expect(result).toEqual({
291+
additionalKeysLength: 2,
292+
select: 'col1,col2,id,__hdx_id',
293+
});
294+
});
295+
296+
it('should append extraKeys to array-style select', () => {
297+
const result = appendSelectWithAdditionalKeys(
298+
[{ valueExpression: 'col1' }, { valueExpression: 'col2' }],
299+
'id',
300+
'',
301+
['__hdx_id'],
302+
);
303+
expect(result).toEqual({
304+
additionalKeysLength: 2,
305+
select: [
306+
{ valueExpression: 'col1' },
307+
{ valueExpression: 'col2' },
308+
{ valueExpression: 'id' },
309+
{ valueExpression: '__hdx_id' },
310+
],
311+
});
312+
});
264313
});

0 commit comments

Comments
 (0)