From 7fdfd8b2f9a7de442d94b5ea8e1e83cff23cb28f Mon Sep 17 00:00:00 2001 From: Brian Harrington Date: Thu, 11 Jun 2026 21:30:02 -0500 Subject: [PATCH] Defer unescaping in Parser until tokens are used as values The query/data expression parser unescaped each comma-separated token up front, before deciding whether it was a structural token (a parenthesis or a `:operator`). As a result an escaped special character that was intended as data, such as `(` or `:in`, was unescaped to `(` or `:in` and then mistaken for syntax. Depending on the expression this either threw `invalid query expression` (a lone escaped paren or a top-level escaped operator) or produced a structurally different query. In a streaming context a subscription whose expression fails to parse silently matches nothing, so streaming results could diverge from the backend. Match structural tokens against the raw token and only unescape when a token is stored as a value (the scalar push and list members). This mirrors the Atlas server parser (Interpreter.nextStep / popAndPushList). Also align escaping with the server so values round-trip: escape `:` in addition to comma and whitespace, and escape a standalone `(`/`)` token. Add a fast path to unescape to avoid allocating when there is nothing to do. --- .../netflix/spectator/atlas/impl/Parser.java | 33 +++++++++-- .../spectator/atlas/impl/ParserTest.java | 10 +++- .../spectator/atlas/impl/QueryTest.java | 57 +++++++++++++++++++ 3 files changed, 95 insertions(+), 5 deletions(-) diff --git a/spectator-reg-atlas/src/main/java/com/netflix/spectator/atlas/impl/Parser.java b/spectator-reg-atlas/src/main/java/com/netflix/spectator/atlas/impl/Parser.java index f7245ebf9..097d19c72 100644 --- a/spectator-reg-atlas/src/main/java/com/netflix/spectator/atlas/impl/Parser.java +++ b/spectator-reg-atlas/src/main/java/com/netflix/spectator/atlas/impl/Parser.java @@ -70,7 +70,12 @@ private static Object parse(String expr) { String[] parts = expr.split(","); Deque stack = new ArrayDeque<>(parts.length); for (String p : parts) { - String token = unescape(p.trim()); + // Structural tokens (parentheses and `:operators`) are matched against the raw + // token. Unescaping is deferred until a token is consumed as a literal value (see + // the default case below) so that escaped special characters such as `(` or + // `:in` are treated as data rather than as syntax. This mirrors the behavior of + // the Atlas server parser (Interpreter.nextStep / popAndPushList). + String token = p.trim(); if (token.isEmpty()) { continue; } @@ -80,7 +85,9 @@ private static Object parse(String expr) { } else if (")".equals(token)) { --depth; } - vs.add(token); + // Structure is determined from the raw token above, but the value stored in the + // list is unescaped so that escaped special characters are treated as data. + vs.add(unescape(token)); continue; } switch (token) { @@ -213,7 +220,7 @@ private static Object parse(String expr) { if (token.startsWith(":")) { throw new IllegalArgumentException("unknown word '" + token + "'"); } - stack.push(token); + stack.push(unescape(token)); break; } } @@ -240,7 +247,10 @@ private static void pushIn(Deque stack, String k, List values) { } static boolean isSpecial(int codePoint) { - return codePoint == ',' || Character.isWhitespace(codePoint); + // The comma is used as the token separator and the colon as the prefix for operators, + // so both need to be escaped when they appear within a key or value. This matches the + // set of special characters used by the Atlas server (Interpreter.isSpecial). + return codePoint == ',' || codePoint == ':' || Character.isWhitespace(codePoint); } static void zeroPad(String str, StringBuilder builder) { @@ -262,6 +272,15 @@ private static void escapeCodePoint(int codePoint, StringBuilder builder) { */ @SuppressWarnings("PMD") public static String escape(String str) { + // A token consisting of a single parenthesis is structural in the stack language, so + // when a parenthesis is used as a value it must be escaped even though it is not + // treated as special in the middle of a larger string. This matches the handling in + // the Atlas server (Interpreter.escape). + if ("(".equals(str)) { + return "\\u0028"; + } else if (")".equals(str)) { + return "\\u0029"; + } final int length = str.length(); StringBuilder builder = new StringBuilder(length); for (int i = 0; i < length;) { @@ -282,6 +301,12 @@ public static String escape(String str) { */ @SuppressWarnings("PMD") public static String unescape(String str) { + // Fast path: escape sequences always start with a backslash, so if there is none the + // input is already unescaped and there is no need to allocate a StringBuilder. This is + // the common case as most keys and values do not contain special characters. + if (str.indexOf('\\') < 0) { + return str; + } final int length = str.length(); StringBuilder builder = new StringBuilder(length); for (int i = 0; i < length; ++i) { diff --git a/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/ParserTest.java b/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/ParserTest.java index d83d3ff5f..cbad49175 100644 --- a/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/ParserTest.java +++ b/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/ParserTest.java @@ -30,7 +30,15 @@ private static String zeroPad(int i) { public void escape() { for (char i = 0; i < Short.MAX_VALUE; ++i) { String str = Character.toString(i); - String expected = Parser.isSpecial(i) ? "\\u" + zeroPad(i) : str; + final String expected; + if ("(".equals(str)) { + // A standalone parenthesis is structural and is always escaped when used as a value. + expected = "\\u0028"; + } else if (")".equals(str)) { + expected = "\\u0029"; + } else { + expected = Parser.isSpecial(i) ? "\\u" + zeroPad(i) : str; + } Assertions.assertEquals(expected, Parser.escape(str)); } } diff --git a/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/QueryTest.java b/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/QueryTest.java index 235ca1a62..6b48cab7a 100644 --- a/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/QueryTest.java +++ b/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/QueryTest.java @@ -734,4 +734,61 @@ public void containsWithSpecialChars() { Assertions.assertEquals("foo,bar", re.key()); Assertions.assertTrue(re.matches("a,b,c")); } + + @Test + public void eqValueWithEscapedColon() { + // A value that is an escaped operator must be treated as data, not as the `:in` + // operator. Previously the early unescape caused this to be parsed as an operator and + // throw an exception. + Query q = parse("name,\\u003ain,:eq"); + Assertions.assertEquals(new Query.Equal("name", ":in"), q); + Assertions.assertTrue(q.matches(registry.createId(":in"))); + Assertions.assertFalse(q.matches(registry.createId("foo"))); + } + + @Test + public void eqValueWithEscapedParen() { + Query q = parse("name,\\u0028,:eq"); + Assertions.assertEquals(new Query.Equal("name", "("), q); + Assertions.assertTrue(q.matches(registry.createId("("))); + } + + @Test + public void inClauseWithEscapedStructuralTokens() { + // Models the result of an "odd" expression where parentheses and operators end up as + // literal values inside an `:in` list. The escaped tokens must be treated as data so + // the real values still match and the structure is preserved. + String expr = "nf.cluster,(,\\u0028,app-foo,app-bar," + + "\\u0029,\\u003ain,\\u003aand,),:in"; + Query q = parse(expr); + + Query.In in = (Query.In) q; + Assertions.assertEquals("nf.cluster", in.key()); + + // Real values match. + Assertions.assertTrue(q.matches(tags("nf.cluster", "app-foo"))); + Assertions.assertTrue(q.matches(tags("nf.cluster", "app-bar"))); + + // The escaped structural tokens are present as literal data values. + Assertions.assertTrue(q.matches(tags("nf.cluster", "("))); + Assertions.assertTrue(q.matches(tags("nf.cluster", ")"))); + Assertions.assertTrue(q.matches(tags("nf.cluster", ":in"))); + Assertions.assertTrue(q.matches(tags("nf.cluster", ":and"))); + + // An unrelated value does not match. + Assertions.assertFalse(q.matches(tags("nf.cluster", "other"))); + } + + @Test + public void inClauseWithUnbalancedEscapedParen() { + // A lone escaped parenthesis as a value must not corrupt the list depth tracking or + // consume the real closing parenthesis. Previously this threw an exception. + Query q = parse("name,(,\\u0028,foo,),:in"); + + Query.In in = (Query.In) q; + Assertions.assertEquals("name", in.key()); + Assertions.assertTrue(q.matches(registry.createId("foo"))); + Assertions.assertTrue(q.matches(registry.createId("("))); + Assertions.assertFalse(q.matches(registry.createId(")"))); + } }