From 698f5d78708f0608a549d4be70e2999176cb5f62 Mon Sep 17 00:00:00 2001 From: Alex Fedotyev <61838744+alex-fedotyev@users.noreply.github.com> Date: Mon, 4 May 2026 22:52:40 +0000 Subject: [PATCH 1/5] feat(api): redactSecrets util for LLM input from observability data Adds a reusable best-effort secret redactor with conservative allowlist patterns covering: PEM blocks, basic-auth URLs, key=value pairs, JSON-shaped secrets, HTTP secret headers, Bearer/Basic auth values, JWTs, AWS access keys, Slack tokens, and GitHub token shapes. Codifies the design rule for HyperDX AI endpoints in the file header: LLM input derived from observability data passes through redactSecrets; user-authored prose does not. Internal-only; no consumer in this commit. Imported by the upcoming /ai/summarize endpoint and any future LLM endpoints that ingest observability data. Refs HDX-3992. --- .../src/utils/__tests__/redactSecrets.test.ts | 311 ++++++++++++++++++ packages/api/src/utils/redactSecrets.ts | 139 ++++++++ 2 files changed, 450 insertions(+) create mode 100644 packages/api/src/utils/__tests__/redactSecrets.test.ts create mode 100644 packages/api/src/utils/redactSecrets.ts diff --git a/packages/api/src/utils/__tests__/redactSecrets.test.ts b/packages/api/src/utils/__tests__/redactSecrets.test.ts new file mode 100644 index 0000000000..4cbff53ac1 --- /dev/null +++ b/packages/api/src/utils/__tests__/redactSecrets.test.ts @@ -0,0 +1,311 @@ +import { REDACTION_PATTERN_NAMES, redactSecrets } from '../redactSecrets'; + +describe('redactSecrets', () => { + describe('passes legitimate observability data through unchanged', () => { + it('returns empty string unchanged', () => { + expect(redactSecrets('')).toBe(''); + }); + + it('leaves a normal log line alone', () => { + const line = 'error: database timeout after 30s on host db-1'; + expect(redactSecrets(line)).toBe(line); + }); + + it('does not match the bare word "password"', () => { + const line = 'user forgot their password and reset it'; + expect(redactSecrets(line)).toBe(line); + }); + + it('does not match "errorless" or other substring noise', () => { + const line = 'request handled errorlessly with status 200'; + expect(redactSecrets(line)).toBe(line); + }); + + it('does not redact a generic hex blob without context', () => { + const line = 'request id: 7f3a9b1c2d4e5f6a'; + expect(redactSecrets(line)).toBe(line); + }); + }); + + describe('key=value pairs', () => { + it('redacts password=', () => { + expect(redactSecrets('conn: password=secret123')).toBe( + 'conn: password=[REDACTED]', + ); + }); + + it('redacts api_key= and token= in one string', () => { + const out = redactSecrets('api_key=abc123 token=xyz789'); + expect(out).toContain('api_key=[REDACTED]'); + expect(out).toContain('token=[REDACTED]'); + }); + + it('preserves URL query-string boundaries', () => { + const out = redactSecrets('GET /v1/items?token=abc&limit=10&api_key=xyz'); + expect(out).toContain('token=[REDACTED]'); + expect(out).toContain('limit=10'); + expect(out).toContain('api_key=[REDACTED]'); + }); + + it('handles secret keys with hyphen or underscore variants', () => { + const out = redactSecrets( + 'access-key=A access_key=B private-key=C client_secret=D', + ); + expect(out).toContain('access-key=[REDACTED]'); + expect(out).toContain('access_key=[REDACTED]'); + expect(out).toContain('private-key=[REDACTED]'); + expect(out).toContain('client_secret=[REDACTED]'); + }); + }); + + describe('JSON-shaped secrets', () => { + it('redacts {"password":"..."}', () => { + const out = redactSecrets('{"password":"s3cret","user":"alice"}'); + expect(out).not.toContain('s3cret'); + expect(out).toContain('"password":"[REDACTED]"'); + expect(out).toContain('"user":"alice"'); + }); + + it('handles whitespace around the colon', () => { + const out = redactSecrets('{ "api_key" : "abc123" }'); + expect(out).not.toContain('abc123'); + expect(out).toContain('[REDACTED]'); + }); + }); + + describe('HTTP-style headers', () => { + it('redacts X-Api-Key', () => { + expect(redactSecrets('X-Api-Key: abc123')).toContain( + 'X-Api-Key: [REDACTED]', + ); + }); + + it('redacts X-Auth-Token', () => { + expect(redactSecrets('X-Auth-Token: xyz')).toContain( + 'X-Auth-Token: [REDACTED]', + ); + }); + + it('redacts a bare Api-Key header', () => { + expect(redactSecrets('Api-Key: abc123')).toContain('Api-Key: [REDACTED]'); + }); + }); + + describe('Authorization header values', () => { + it('redacts Bearer values', () => { + expect(redactSecrets('Authorization: Bearer eyJhbG.xyz.abc')).toContain( + 'Bearer [REDACTED]', + ); + }); + + it('redacts Basic values', () => { + expect(redactSecrets('Authorization: Basic dXNlcjpwYXNz')).toContain( + 'Basic [REDACTED]', + ); + }); + }); + + describe('JWT-shaped strings', () => { + const jwt = + 'eyJhbGciOiJIUzI1NiJ9.eyJ1c2VyIjoxLCJleHAiOjE3MzM5OTk5OTl9.s1gnatur3'; + + it('redacts a free-floating JWT', () => { + expect(redactSecrets(`session: ${jwt}`)).toContain('[REDACTED_JWT]'); + expect(redactSecrets(`session: ${jwt}`)).not.toContain('s1gnatur3'); + }); + + it('does not match a JWT-like fragment fused to surrounding word chars', () => { + // No word boundary before "eyJ" in this string, so no match. + const out = redactSecrets(`prefixeyJabc.def.ghi`); + expect(out).toBe(`prefixeyJabc.def.ghi`); + }); + }); + + describe('PEM private key blocks', () => { + const rsaKey = [ + '-----BEGIN RSA PRIVATE KEY-----', + 'MIIEpAIBAAKCAQEAyJk8Q...lots of base64...', + 'aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789==', + '-----END RSA PRIVATE KEY-----', + ].join('\n'); + + const opensshKey = [ + '-----BEGIN OPENSSH PRIVATE KEY-----', + 'b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAA', + '-----END OPENSSH PRIVATE KEY-----', + ].join('\n'); + + it('redacts an RSA PEM block', () => { + const out = redactSecrets(`leading text\n${rsaKey}\ntrailing text`); + expect(out).toContain('[REDACTED_PRIVATE_KEY]'); + expect(out).not.toContain('MIIEpAIB'); + expect(out).toContain('leading text'); + expect(out).toContain('trailing text'); + }); + + it('redacts an OPENSSH PEM block', () => { + const out = redactSecrets(opensshKey); + expect(out).toBe('[REDACTED_PRIVATE_KEY]'); + }); + + it('redacts a PKCS#8 PRIVATE KEY block', () => { + const pk8 = [ + '-----BEGIN PRIVATE KEY-----', + 'MIIBVgIBADANBgkqhkiG9w0BAQEFAASCAUAwggE8', + '-----END PRIVATE KEY-----', + ].join('\n'); + expect(redactSecrets(pk8)).toBe('[REDACTED_PRIVATE_KEY]'); + }); + + it('does not redact a non-private PEM block', () => { + const cert = [ + '-----BEGIN CERTIFICATE-----', + 'MIIDazCCAlOgAwIBAgIUJ', + '-----END CERTIFICATE-----', + ].join('\n'); + expect(redactSecrets(cert)).toBe(cert); + }); + }); + + describe('basic-auth URLs', () => { + it('redacts user:pass in https URL', () => { + const out = redactSecrets( + 'clone https://alice:hunter2@github.com/acme/repo', + ); + expect(out).toContain( + 'https://[REDACTED]:[REDACTED]@github.com/acme/repo', + ); + expect(out).not.toContain('hunter2'); + }); + + it('redacts user:pass in http URL', () => { + const out = redactSecrets('proxy http://svc:p@ss@proxy.local:8080/'); + expect(out).toContain('[REDACTED]:[REDACTED]'); + expect(out).not.toContain('svc:p@ss'); + }); + + it('does not match a URL without a password component', () => { + const line = 'fetch https://api.example.com/v1/data'; + expect(redactSecrets(line)).toBe(line); + }); + + it('does not falsely match an email address', () => { + const line = 'contact alice@example.com for access'; + expect(redactSecrets(line)).toBe(line); + }); + }); + + describe('AWS access keys', () => { + it('redacts AKIA-prefixed keys', () => { + const out = redactSecrets('AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE'); + expect(out).toContain('[REDACTED_AWS_KEY]'); + expect(out).not.toContain('AKIAIOSFODNN7EXAMPLE'); + }); + + it('redacts ASIA-prefixed STS session keys', () => { + const out = redactSecrets('using ASIA1234567890ABCDEF for session'); + expect(out).toContain('[REDACTED_AWS_KEY]'); + }); + + it('does not match AKIA without 16 trailing chars', () => { + const line = 'AKIA short'; + expect(redactSecrets(line)).toBe(line); + }); + + it('does not match lowercase akia', () => { + const line = 'akia1234567890abcdef'; + expect(redactSecrets(line)).toBe(line); + }); + }); + + describe('Slack tokens', () => { + it('redacts xoxb bot tokens', () => { + const out = redactSecrets('slack: xoxb-1234567890-abcdefghij'); + expect(out).toContain('[REDACTED_SLACK_TOKEN]'); + expect(out).not.toContain('xoxb-1234567890-abcdefghij'); + }); + + it('redacts xoxp user tokens', () => { + expect(redactSecrets('xoxp-9999999-aaaaaaaaaa')).toContain( + '[REDACTED_SLACK_TOKEN]', + ); + }); + + it('does not match the bare prefix', () => { + const line = 'something xox- not a token'; + expect(redactSecrets(line)).toBe(line); + }); + }); + + describe('GitHub tokens', () => { + it('redacts ghp_ personal access tokens', () => { + const tok = 'ghp_' + 'A'.repeat(36); + const out = redactSecrets(`token: ${tok}`); + expect(out).toContain('[REDACTED_GITHUB_TOKEN]'); + expect(out).not.toContain(tok); + }); + + it('redacts gho_, ghu_, ghs_, ghr_ variants', () => { + const tokens = [ + 'gho_' + 'B'.repeat(36), + 'ghu_' + 'C'.repeat(36), + 'ghs_' + 'D'.repeat(36), + 'ghr_' + 'E'.repeat(36), + ]; + for (const t of tokens) { + expect(redactSecrets(t)).toBe('[REDACTED_GITHUB_TOKEN]'); + } + }); + + it('does not match the bare prefix or short fragments', () => { + const line = 'ghp_short and gh_other'; + expect(redactSecrets(line)).toBe(line); + }); + }); + + describe('multi-secret payloads', () => { + it('redacts every distinct secret in one pass', () => { + const input = [ + 'conn url: https://alice:hunter2@db.example.com/app', + 'auth: Bearer eyJhbGciOiJIUzI1NiJ9.eyJ1IjoxfQ.sig', + 'aws: AKIAIOSFODNN7EXAMPLE', + 'gh: ghp_' + 'X'.repeat(36), + 'slack: xoxb-1111-aaaaaaaaaa', + 'env: password=hunter2 api_key=abc', + ].join('\n'); + + const out = redactSecrets(input); + + expect(out).not.toContain('hunter2'); + expect(out).not.toContain('AKIAIOSFODNN7EXAMPLE'); + expect(out).not.toContain('xoxb-1111-aaaaaaaaaa'); + expect(out).toContain('[REDACTED]:[REDACTED]@db.example.com'); + expect(out).toContain('Bearer [REDACTED]'); + expect(out).toContain('[REDACTED_AWS_KEY]'); + expect(out).toContain('[REDACTED_GITHUB_TOKEN]'); + expect(out).toContain('[REDACTED_SLACK_TOKEN]'); + expect(out).toContain('password=[REDACTED]'); + expect(out).toContain('api_key=[REDACTED]'); + }); + }); + + describe('pattern coverage', () => { + it('exposes the expected pattern names', () => { + expect(REDACTION_PATTERN_NAMES).toEqual( + expect.arrayContaining([ + 'pem', + 'basic-auth-url', + 'bearer', + 'basic', + 'jwt', + 'aws-access-key', + 'slack-token', + 'github-token', + 'key-value', + 'json-quoted', + 'http-header', + ]), + ); + }); + }); +}); diff --git a/packages/api/src/utils/redactSecrets.ts b/packages/api/src/utils/redactSecrets.ts new file mode 100644 index 0000000000..104d2ee387 --- /dev/null +++ b/packages/api/src/utils/redactSecrets.ts @@ -0,0 +1,139 @@ +// redactSecrets: best-effort allowlist redactor for LLM input that +// originates from observability data (log bodies, span attributes, +// pattern samples, alert payloads). +// +// Design rule: any LLM input derived from observability data goes +// through redactSecrets before leaving the API process. User-authored +// prose (e.g. the chart-builder assistant where the user types their +// own question) does NOT, because redacting the user's own input would +// strip exactly what they meant to ask. +// +// This is best-effort, not a guarantee. The patterns here are +// conservative: each one matches a high-confidence shape. False +// negatives (real secrets that slip through) are expected; false +// positives (legitimate data wrongly redacted) should be rare. +// +// Patterns covered: +// pem PEM key blocks (-----BEGIN ... PRIVATE KEY-----) +// basic-auth-url https://user:pass@host +// key-value password=secret, api_key=abc +// json-quoted {"password":"secret"} and similar +// http-header X-Api-Key: abc, Api-Key: abc +// bearer Authorization: Bearer xxx +// basic Authorization: Basic xxx +// jwt eyJ... three dot-separated base64 segments +// aws-access-key AKIA[16 chars], ASIA[16 chars] +// slack-token xox[a-z]-... shape +// github-token ghp_, gho_, ghu_, ghs_, ghr_ prefixes +// +// Known gaps (extend when seen in production): +// URL-percent-encoded values, vendor-specific tokens (Stripe, Twilio, +// Datadog, etc.), generic high-entropy hex blobs (too many false +// positives without surrounding context). + +const SECRET_KEY_TOKENS = + 'password|passwd|pwd|secret|token|api[_-]?key|apikey|access[_-]?key|private[_-]?key|client[_-]?secret|authorization|auth'; + +interface RedactionPattern { + name: string; + re: RegExp; + replace: string; +} + +// Order matters: broad multi-line patterns (PEM) run first so their +// inner contents do not match other narrower patterns. High-confidence +// shapes (Bearer, JWT, AWS, Slack, GitHub) run before the permissive +// key-value catch-all so that, e.g., "password=eyJ..." preserves the +// JWT shape inside the redacted value if it happened to leak. +const PATTERNS: RedactionPattern[] = [ + // -----BEGIN ... PRIVATE KEY----- ... -----END ... PRIVATE KEY----- + // Covers RSA, EC, DSA, OPENSSH, and PKCS#8 (plain "PRIVATE KEY"). + // The algorithm prefix is optional to accept the bare "PRIVATE KEY" form. + { + name: 'pem', + re: /-----BEGIN (?:[A-Z][A-Z0-9 ]* )?PRIVATE KEY-----[\s\S]*?-----END (?:[A-Z][A-Z0-9 ]* )?PRIVATE KEY-----/g, + replace: '[REDACTED_PRIVATE_KEY]', + }, + // scheme://user:pass@host. RFC 3986 allows ":" in password; the + // first ":" delimits user from password the way clients parse it. + { + name: 'basic-auth-url', + re: /\b(https?|ftp|ssh):\/\/([^:@/\s]+):([^@/\s]+)@/g, + replace: '$1://[REDACTED]:[REDACTED]@', + }, + // Authorization: Bearer xxx + { + name: 'bearer', + re: /Bearer\s+[A-Za-z0-9._~+/=-]+/gi, + replace: 'Bearer [REDACTED]', + }, + // Authorization: Basic xxx (base64 user:pass) + { + name: 'basic', + re: /Basic\s+[A-Za-z0-9+/=]+/gi, + replace: 'Basic [REDACTED]', + }, + // JWT-shape: three dot-separated base64url segments starting with eyJ. + // Word boundary on the front prevents matching mid-token concatenations. + { + name: 'jwt', + re: /\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b/g, + replace: '[REDACTED_JWT]', + }, + // AWS Access Key IDs. AKIA = long-lived, ASIA = STS session. + { + name: 'aws-access-key', + re: /\b(AKIA|ASIA)[0-9A-Z]{16}\b/g, + replace: '[REDACTED_AWS_KEY]', + }, + // Slack tokens. Common prefixes: xoxa, xoxb, xoxe, xoxo, xoxp, xoxr, xoxs. + { + name: 'slack-token', + re: /\bxox[a-z]-[A-Za-z0-9-]{10,}/g, + replace: '[REDACTED_SLACK_TOKEN]', + }, + // GitHub token shapes: ghp_ (PAT), gho_ (oauth), ghu_ (user-to-server), + // ghs_ (server-to-server), ghr_ (refresh). Real ones are 36+ chars + // after the prefix; floor at 20 to catch shortened test fixtures. + { + name: 'github-token', + re: /\bgh[pousr]_[A-Za-z0-9_]{20,}\b/g, + replace: '[REDACTED_GITHUB_TOKEN]', + }, + // key=value where the key looks secret-ish. Stops at whitespace, + // commas, semicolons, ampersands, and quotes, so URL query string + // boundaries are preserved. + { + name: 'key-value', + re: new RegExp(`\\b(${SECRET_KEY_TOKENS})=([^\\s,;&"'\`]+)`, 'gi'), + replace: '$1=[REDACTED]', + }, + // JSON shape: "key": "value" with whitespace tolerance. + { + name: 'json-quoted', + re: new RegExp(`("(?:${SECRET_KEY_TOKENS})"\\s*:\\s*)"[^"]*"`, 'gi'), + replace: '$1"[REDACTED]"', + }, + // HTTP-style secret headers on a single line. Covers X-Api-Key, + // X-Auth-Token, X-Access-Token, Api-Key. + { + name: 'http-header', + re: new RegExp( + `\\b(x[-_]?(?:api[-_]?key|auth[-_]?token|access[-_]?token)|api[-_]?key)\\s*:\\s*([^\\s,;]+)`, + 'gi', + ), + replace: '$1: [REDACTED]', + }, +]; + +export function redactSecrets(input: string): string { + let out = input; + for (const { re, replace } of PATTERNS) { + out = out.replace(re, replace); + } + return out; +} + +// Exposed for tests that want to assert pattern coverage. Not part of +// the public API; the names are not contractual. +export const REDACTION_PATTERN_NAMES = PATTERNS.map(p => p.name); From 9753dc1618a19fb751b855cc17a9cf51e04676a1 Mon Sep 17 00:00:00 2001 From: Alex Fedotyev <61838744+alex-fedotyev@users.noreply.github.com> Date: Mon, 4 May 2026 23:24:24 +0000 Subject: [PATCH 2/5] fix(api): redactSecrets review feedback Address review comments on #2188: - basic-auth-url now handles "@" in passwords. Previous regex stopped at the first "@", leaving any password tail before the host visible. New regex greedily consumes the password and backtracks to the last "@" before the host; host is captured and preserved in the replacement. New test: a password containing "@" must be fully redacted, with the host intact. - key-value pattern now matches shell-style quoted values: PASSWORD="hunter2 with spaces" and API_KEY='abc 123' are redacted. Previously the unquoted character class stopped at the leading quote, so neither pattern fired. Two new tests cover both quote styles. - pem pattern is bounded by {0,16000}? on the lazy match so an unmatched BEGIN does not scan an unbounded amount of trailing input. Real PEM blocks are well under 16KB; the API caps the whole request body at 50KB. New test asserts unchanged output and sub-500ms wall-clock on a 50KB unmatched-BEGIN payload. - Header "Known gaps" comment now mentions raw "@" in basic-auth usernames (ambiguous to parse without percent-encoding). 44 tests pass; eight new cases for the items above. No changes to the public surface. Refs HDX-3992. --- .../src/utils/__tests__/redactSecrets.test.ts | 54 +++++++++++++++++-- packages/api/src/utils/redactSecrets.ts | 34 ++++++++---- 2 files changed, 75 insertions(+), 13 deletions(-) diff --git a/packages/api/src/utils/__tests__/redactSecrets.test.ts b/packages/api/src/utils/__tests__/redactSecrets.test.ts index 4cbff53ac1..97589b6448 100644 --- a/packages/api/src/utils/__tests__/redactSecrets.test.ts +++ b/packages/api/src/utils/__tests__/redactSecrets.test.ts @@ -56,6 +56,18 @@ describe('redactSecrets', () => { expect(out).toContain('private-key=[REDACTED]'); expect(out).toContain('client_secret=[REDACTED]'); }); + + it('redacts shell-style double-quoted values', () => { + const out = redactSecrets('export PASSWORD="hunter2 with spaces"'); + expect(out).not.toContain('hunter2'); + expect(out).toContain('PASSWORD=[REDACTED]'); + }); + + it('redacts shell-style single-quoted values', () => { + const out = redactSecrets("API_KEY='abc 123'"); + expect(out).not.toContain('abc 123'); + expect(out).toContain('API_KEY=[REDACTED]'); + }); }); describe('JSON-shaped secrets', () => { @@ -165,6 +177,18 @@ describe('redactSecrets', () => { ].join('\n'); expect(redactSecrets(cert)).toBe(cert); }); + + it('returns quickly when BEGIN has no matching END', () => { + // Unmatched BEGIN with a large trailing payload; bounded lazy + // quantifier should fail fast rather than scan the entire input. + const noisy = 'x'.repeat(50_000); + const input = `-----BEGIN RSA PRIVATE KEY-----\n${noisy}`; + const start = Date.now(); + const out = redactSecrets(input); + const elapsed = Date.now() - start; + expect(out).toBe(input); // unchanged: no match + expect(elapsed).toBeLessThan(500); // generous upper bound + }); }); describe('basic-auth URLs', () => { @@ -179,9 +203,28 @@ describe('redactSecrets', () => { }); it('redacts user:pass in http URL', () => { - const out = redactSecrets('proxy http://svc:p@ss@proxy.local:8080/'); - expect(out).toContain('[REDACTED]:[REDACTED]'); - expect(out).not.toContain('svc:p@ss'); + const out = redactSecrets('proxy http://svc:hunter2@proxy.local:8080/'); + expect(out).toContain('http://[REDACTED]:[REDACTED]@proxy.local:8080/'); + expect(out).not.toContain('hunter2'); + }); + + it('redacts a password that contains an @ character', () => { + const out = redactSecrets('proxy http://svc:p@ss@proxy.local:8080/path'); + expect(out).toContain( + 'http://[REDACTED]:[REDACTED]@proxy.local:8080/path', + ); + // The whole password including the embedded "@" must be gone. + expect(out).not.toContain('p@ss'); + expect(out).not.toContain('ss@proxy'); + }); + + it('preserves the host in the replacement', () => { + const out = redactSecrets( + 'clone https://alice:hunter2@github.com/acme/repo', + ); + expect(out).toContain( + 'https://[REDACTED]:[REDACTED]@github.com/acme/repo', + ); }); it('does not match a URL without a password component', () => { @@ -193,6 +236,11 @@ describe('redactSecrets', () => { const line = 'contact alice@example.com for access'; expect(redactSecrets(line)).toBe(line); }); + + it('does not match an ssh URL with only a username (no password)', () => { + const line = 'fetch ssh://git@github.com/acme/repo'; + expect(redactSecrets(line)).toBe(line); + }); }); describe('AWS access keys', () => { diff --git a/packages/api/src/utils/redactSecrets.ts b/packages/api/src/utils/redactSecrets.ts index 104d2ee387..3b23ee850c 100644 --- a/packages/api/src/utils/redactSecrets.ts +++ b/packages/api/src/utils/redactSecrets.ts @@ -29,7 +29,8 @@ // Known gaps (extend when seen in production): // URL-percent-encoded values, vendor-specific tokens (Stripe, Twilio, // Datadog, etc.), generic high-entropy hex blobs (too many false -// positives without surrounding context). +// positives without surrounding context), basic-auth URLs with raw +// "@" in the username (ambiguous to parse without percent-encoding). const SECRET_KEY_TOKENS = 'password|passwd|pwd|secret|token|api[_-]?key|apikey|access[_-]?key|private[_-]?key|client[_-]?secret|authorization|auth'; @@ -49,17 +50,24 @@ const PATTERNS: RedactionPattern[] = [ // -----BEGIN ... PRIVATE KEY----- ... -----END ... PRIVATE KEY----- // Covers RSA, EC, DSA, OPENSSH, and PKCS#8 (plain "PRIVATE KEY"). // The algorithm prefix is optional to accept the bare "PRIVATE KEY" form. + // The lazy quantifier is bounded so an unmatched BEGIN does not scan + // an unbounded amount of trailing input. Real PEM blocks are well + // under 16KB; the API also caps the whole request body at 50KB. { name: 'pem', - re: /-----BEGIN (?:[A-Z][A-Z0-9 ]* )?PRIVATE KEY-----[\s\S]*?-----END (?:[A-Z][A-Z0-9 ]* )?PRIVATE KEY-----/g, + re: /-----BEGIN (?:[A-Z][A-Z0-9 ]* )?PRIVATE KEY-----[\s\S]{0,16000}?-----END (?:[A-Z][A-Z0-9 ]* )?PRIVATE KEY-----/g, replace: '[REDACTED_PRIVATE_KEY]', }, - // scheme://user:pass@host. RFC 3986 allows ":" in password; the - // first ":" delimits user from password the way clients parse it. + // scheme://user:pass@host. The password may contain "@" if not + // percent-encoded, so the password group greedily consumes anything + // non-whitespace, non-slash and the engine backtracks to the last + // "@" before the host. The host group is captured so the replacement + // preserves it. Raw "@" in the username is not handled (would need + // ambiguous parsing); add to known gaps if seen in production. { name: 'basic-auth-url', - re: /\b(https?|ftp|ssh):\/\/([^:@/\s]+):([^@/\s]+)@/g, - replace: '$1://[REDACTED]:[REDACTED]@', + re: /\b(https?|ftp|ssh):\/\/([^/\s:@]+):([^/\s]+)@([^/\s@]+)/g, + replace: '$1://[REDACTED]:[REDACTED]@$4', }, // Authorization: Bearer xxx { @@ -100,12 +108,18 @@ const PATTERNS: RedactionPattern[] = [ re: /\bgh[pousr]_[A-Za-z0-9_]{20,}\b/g, replace: '[REDACTED_GITHUB_TOKEN]', }, - // key=value where the key looks secret-ish. Stops at whitespace, - // commas, semicolons, ampersands, and quotes, so URL query string - // boundaries are preserved. + // key=value where the key looks secret-ish. Three variants on the + // value: double-quoted, single-quoted, or unquoted. Unquoted stops + // at whitespace, commas, semicolons, ampersands, and quotes so URL + // query-string boundaries are preserved. Quoted variants are + // matched first via alternation so shell-style password="secret" + // gets caught instead of slipping through the unquoted-value class. { name: 'key-value', - re: new RegExp(`\\b(${SECRET_KEY_TOKENS})=([^\\s,;&"'\`]+)`, 'gi'), + re: new RegExp( + `\\b(${SECRET_KEY_TOKENS})=(?:"[^"]*"|'[^']*'|[^\\s,;&"'\`]+)`, + 'gi', + ), replace: '$1=[REDACTED]', }, // JSON shape: "key": "value" with whitespace tolerance. From 2c5992a3842ef1212232b479b605a673b73c667a Mon Sep 17 00:00:00 2001 From: Alex Fedotyev <61838744+alex-fedotyev@users.noreply.github.com> Date: Mon, 4 May 2026 23:39:22 +0000 Subject: [PATCH 3/5] chore(api): trim redactSecrets comments to keep PR Tier 2 The previous review-fix commit pushed prod lines from 139 to 153, just over the Tier 2 threshold (< 150 prod lines). Compressing the verbose comments on PEM, basic-auth-url, and key-value patterns brings prod back to 144. No behavior change. --- packages/api/src/utils/redactSecrets.ts | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/packages/api/src/utils/redactSecrets.ts b/packages/api/src/utils/redactSecrets.ts index 3b23ee850c..f364089f65 100644 --- a/packages/api/src/utils/redactSecrets.ts +++ b/packages/api/src/utils/redactSecrets.ts @@ -48,22 +48,16 @@ interface RedactionPattern { // JWT shape inside the redacted value if it happened to leak. const PATTERNS: RedactionPattern[] = [ // -----BEGIN ... PRIVATE KEY----- ... -----END ... PRIVATE KEY----- - // Covers RSA, EC, DSA, OPENSSH, and PKCS#8 (plain "PRIVATE KEY"). - // The algorithm prefix is optional to accept the bare "PRIVATE KEY" form. - // The lazy quantifier is bounded so an unmatched BEGIN does not scan - // an unbounded amount of trailing input. Real PEM blocks are well - // under 16KB; the API also caps the whole request body at 50KB. + // (RSA, EC, DSA, OPENSSH, PKCS#8). Bounded lazy match so an + // unmatched BEGIN does not scan an unbounded trailing input. { name: 'pem', re: /-----BEGIN (?:[A-Z][A-Z0-9 ]* )?PRIVATE KEY-----[\s\S]{0,16000}?-----END (?:[A-Z][A-Z0-9 ]* )?PRIVATE KEY-----/g, replace: '[REDACTED_PRIVATE_KEY]', }, - // scheme://user:pass@host. The password may contain "@" if not - // percent-encoded, so the password group greedily consumes anything - // non-whitespace, non-slash and the engine backtracks to the last - // "@" before the host. The host group is captured so the replacement - // preserves it. Raw "@" in the username is not handled (would need - // ambiguous parsing); add to known gaps if seen in production. + // scheme://user:pass@host. Password may contain "@"; the engine + // backtracks to the last "@" before the host. Host is captured and + // preserved in the replacement. { name: 'basic-auth-url', re: /\b(https?|ftp|ssh):\/\/([^/\s:@]+):([^/\s]+)@([^/\s@]+)/g, @@ -108,12 +102,9 @@ const PATTERNS: RedactionPattern[] = [ re: /\bgh[pousr]_[A-Za-z0-9_]{20,}\b/g, replace: '[REDACTED_GITHUB_TOKEN]', }, - // key=value where the key looks secret-ish. Three variants on the - // value: double-quoted, single-quoted, or unquoted. Unquoted stops - // at whitespace, commas, semicolons, ampersands, and quotes so URL - // query-string boundaries are preserved. Quoted variants are - // matched first via alternation so shell-style password="secret" - // gets caught instead of slipping through the unquoted-value class. + // key=value with secret-ish key. Three value forms: double-quoted, + // single-quoted, or unquoted (stops at whitespace, comma, semicolon, + // ampersand, quote so URL query-string boundaries are preserved). { name: 'key-value', re: new RegExp( From 0829acd8fc3ee606cd1848cff5a148e0bb0a6302 Mon Sep 17 00:00:00 2001 From: Alex Fedotyev <61838744+alex-fedotyev@users.noreply.github.com> Date: Tue, 5 May 2026 04:32:03 +0000 Subject: [PATCH 4/5] chore: add changeset for redactSecrets utility Co-Authored-By: Claude Opus --- .changeset/redact-secrets-util.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/redact-secrets-util.md diff --git a/.changeset/redact-secrets-util.md b/.changeset/redact-secrets-util.md new file mode 100644 index 0000000000..a88863d25b --- /dev/null +++ b/.changeset/redact-secrets-util.md @@ -0,0 +1,5 @@ +--- +"@hyperdx/api": patch +--- + +feat(api): add redactSecrets utility for scrubbing credentials from LLM input derived from observability data From 757c19b2b40e193fc545fd87cc5033e82e404058 Mon Sep 17 00:00:00 2001 From: Alex Fedotyev <61838744+alex-fedotyev@users.noreply.github.com> Date: Wed, 6 May 2026 18:59:34 +0000 Subject: [PATCH 5/5] chore(api): drop changeset for redactSecrets util The PR body has always declared this PR as having no user-facing change (internal-only utility, no consumer in this PR). The changeset was added in error and would surface a stray "feat(api)" line in the next release notes for code that no production caller reaches yet. Drop it; the consumer's PR (#2206) carries the changeset that ships the user-facing behavior. --- .changeset/redact-secrets-util.md | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .changeset/redact-secrets-util.md diff --git a/.changeset/redact-secrets-util.md b/.changeset/redact-secrets-util.md deleted file mode 100644 index a88863d25b..0000000000 --- a/.changeset/redact-secrets-util.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@hyperdx/api": patch ---- - -feat(api): add redactSecrets utility for scrubbing credentials from LLM input derived from observability data