diff --git a/_scripts/templates/CSharp/st.Test.cs b/_scripts/templates/CSharp/st.Test.cs index 49a34f1a9b..bc94dabcf4 100644 --- a/_scripts/templates/CSharp/st.Test.cs +++ b/_scripts/templates/CSharp/st.Test.cs @@ -68,7 +68,7 @@ public static void SetupParse2(string input, string fn, bool quiet = false) if (int.TryParse(part.Trim(), out int d)) ambig_decisions.Add(d); } - else if (d_ambig_index >= 0 && (args[d_ambig_index].StartsWith("-ambig="))) + else if (d_ambig_index >= 0 && (args[d_ambig_index].StartsWith("--ambig="))) { ambig_decisions = new HashSet\(); int prefix_len = 8; @@ -102,7 +102,7 @@ public static void SetupParse2(string input, string fn, bool quiet = false) var parser_startIndex = ai.startIndex; var parser_stopIndex = ai.stopIndex; var p = Parser.RuleNames.Select((value, index) => new { value, index }) - .Where(pair => (pair.value == "prog")) + .Where(pair => (pair.value == "")) .Select(pair => pair.index).First(); var parser_startRuleIndex = p; var parse_trees = ((MyParser)Parser).getAllPossibleParseTrees( @@ -393,7 +393,6 @@ static void DoParse(ICharStream str, string input_name, int row_number) if (token.Type == Antlr4.Runtime.TokenConstants.EOF) break; } - if (show_tokens) System.Console.Error.WriteLine(new_s.ToString()); total_count += i; if (show_tokens) System.Console.Error.WriteLine(new_s.ToString()); lexer.Reset(); diff --git a/csharp/v8-spec/Antlr4ng/CSharpParserBase.ts b/csharp/v8-spec/Antlr4ng/CSharpParserBase.ts index 66faaeb2bc..5b9e6c16fa 100644 --- a/csharp/v8-spec/Antlr4ng/CSharpParserBase.ts +++ b/csharp/v8-spec/Antlr4ng/CSharpParserBase.ts @@ -292,7 +292,26 @@ export abstract class CSharpParserBase extends Parser { return tok2 !== null && (tok2.type === CSharpLexer.Simple_Identifier || tok2.text === "_"); } - IsConstantPatternAhead(): boolean { return !this.IsDeclarationPatternAhead(); } + IsConstantPatternAhead(): boolean { + if (this.IsDeclarationPatternAhead()) return false; + const ts = this.tokenStream; + if (ts.LT(1)?.type === CSharpLexer.TK_LPAREN) { + let depth = 0, i = 1; + while (true) { + const tok = ts.LT(i++); + if (!tok || tok.type < 0) break; + if (tok.type === CSharpLexer.TK_LPAREN) depth++; + else if (tok.type === CSharpLexer.TK_RPAREN) { depth--; if (depth === 0) break; } + else if (tok.type === CSharpLexer.TK_COMMA && depth === 1) return false; + } + } + // Identifier followed immediately by '(' → type-headed positional pattern. + if (ts.LT(1)?.type === CSharpLexer.Simple_Identifier + && ts.LT(2)?.type === CSharpLexer.TK_LPAREN) return false; + return true; + } + + IsPositionalPatternAhead(): boolean { return !this.IsConstantPatternAhead(); } IsImplicitlyTypedLocalVariable(): boolean { const tok = this.tokenStream.LT(1); diff --git a/csharp/v8-spec/CSharp/CSharpParserBase.cs b/csharp/v8-spec/CSharp/CSharpParserBase.cs index 2f263c963d..f1e2b2552a 100644 --- a/csharp/v8-spec/CSharp/CSharpParserBase.cs +++ b/csharp/v8-spec/CSharp/CSharpParserBase.cs @@ -426,7 +426,54 @@ public bool IsDeclarationPatternAhead() } } - public bool IsConstantPatternAhead() => !IsDeclarationPatternAhead(); + public bool IsConstantPatternAhead() + { + if (IsDeclarationPatternAhead()) return false; + + // A '(' followed by a comma at depth 1 is a tuple-positional pattern + // like ("rock", "scissors") or (0, 0). Tuple expressions are not C# + // compile-time constants, so this can only match positional_pattern + // (alt 4), not constant_pattern (alt 2). Return false to suppress + // alt 2 and eliminate the decision-31 ambiguity. + var ts = (ITokenStream)InputStream; + if (ts.LT(1).Type == CSharpLexer.TK_LPAREN) + { + int depth = 0, i = 1; + while (true) + { + IToken tok = ts.LT(i++); + if (tok.Type == TokenConstants.EOF) break; + if (tok.Type == CSharpLexer.TK_LPAREN) depth++; + else if (tok.Type == CSharpLexer.TK_RPAREN) { depth--; if (depth == 0) break; } + else if (tok.Type == CSharpLexer.TK_COMMA && depth == 1) return false; + } + } + + // An identifier followed by '(' after a type_ is a type-headed positional + // pattern like Point(0, 0) or Shape.Circle(x, y). Invocation expressions + // cannot be compile-time constants, so this cannot be a constant_pattern. + IToken t1 = ts.LT(1); + if (t1 != null && t1.Type == CSharpLexer.Simple_Identifier) + { + var par = new CSharpParser((ITokenStream)InputStream); + par.RemoveErrorListeners(); + par.ErrorHandler = new BailErrorStrategy(); + int savedIndex = InputStream.Index; + try + { + par.type_(); + IToken next = ((CommonTokenStream)InputStream).LT(1); + if (next != null && next.Type == CSharpLexer.TK_LPAREN) + return false; // type_ '(' → positional_pattern + } + catch { } + finally { InputStream.Seek(savedIndex); } + } + + return true; + } + + public bool IsPositionalPatternAhead() => !IsConstantPatternAhead(); //-------------------------------------------------------------------------------------- // non_nullable_reference_type disambiguation — decision 15 diff --git a/csharp/v8-spec/CSharpParser.g4 b/csharp/v8-spec/CSharpParser.g4 index 42e7446541..175c0b5f82 100644 --- a/csharp/v8-spec/CSharpParser.g4 +++ b/csharp/v8-spec/CSharpParser.g4 @@ -307,7 +307,7 @@ pattern : {this.IsDeclarationPatternAhead()}? declaration_pattern | {this.IsConstantPatternAhead()}? constant_pattern | var_pattern - | positional_pattern + | {this.IsPositionalPatternAhead()}? positional_pattern | property_pattern | discard_pattern ; @@ -868,9 +868,14 @@ await_expression ; // Source: §12.10 Range operator +// Original: +//range_expression +// : unary_expression +// | unary_expression? '..' unary_expression? +// ; range_expression - : unary_expression - | unary_expression? '..' unary_expression? + : unary_expression ('..' unary_expression?)? + | '..' unary_expression? ; // Source: §12.11 Switch expression @@ -958,9 +963,14 @@ conditional_or_expression ; // Source: §12.17 The null coalescing operator +// Original: +//null_coalescing_expression +// : conditional_or_expression +// | conditional_or_expression '??' null_coalescing_expression +// | throw_expression +// ; null_coalescing_expression - : conditional_or_expression - | conditional_or_expression '??' null_coalescing_expression + : conditional_or_expression ('??' null_coalescing_expression)? | throw_expression ; @@ -983,11 +993,19 @@ declaration_expression // ; // Source: §12.20 Conditional operator +// Original rule before left-factoring: +//conditional_expression +// : null_coalescing_expression +// | null_coalescing_expression '?' expression ':' expression +// | null_coalescing_expression '?' 'ref' variable_reference ':' 'ref' variable_reference +// ; conditional_expression - : null_coalescing_expression - | null_coalescing_expression '?' expression ':' expression - | null_coalescing_expression '?' 'ref' variable_reference ':' - 'ref' variable_reference + : null_coalescing_expression ( + '?' ( + expression ':' expression + | 'ref' variable_reference ':' 'ref' variable_reference + ) + )? ; // Source: §12.21.1 General @@ -1130,9 +1148,13 @@ assignment_operator ; // Source: §12.24 Expression +// Original: +//expression +// : non_assignment_expression +// | assignment +// ; expression - : non_assignment_expression - | assignment + : non_assignment_expression (assignment_operator expression)? ; non_assignment_expression diff --git a/csharp/v8-spec/Cpp/CSharpParserBase.cpp b/csharp/v8-spec/Cpp/CSharpParserBase.cpp index 4f83c141e7..74e5749705 100644 --- a/csharp/v8-spec/Cpp/CSharpParserBase.cpp +++ b/csharp/v8-spec/Cpp/CSharpParserBase.cpp @@ -100,9 +100,52 @@ bool CSharpParserBase::IsDeclarationPatternAhead() bool CSharpParserBase::IsConstantPatternAhead() { - return !IsDeclarationPatternAhead(); + if (IsDeclarationPatternAhead()) return false; + antlr4::Token *first = _input->LT(1); + if (first && static_cast(first->getType()) == CSharpLexer::TK_LPAREN) + { + int depth = 0, i = 1; + while (true) + { + antlr4::Token *tok = _input->LT(i++); + if (!tok) break; + int tt = static_cast(tok->getType()); + if (tt < 0) break; // EOF + if (tt == CSharpLexer::TK_LPAREN) depth++; + else if (tt == CSharpLexer::TK_RPAREN) { depth--; if (depth == 0) break; } + else if (tt == CSharpLexer::TK_COMMA && depth == 1) return false; + } + } + // Type-headed positional pattern: speculative parse of type_() followed by '(' + // e.g. Point(0, 0) — LT(1) is an identifier, not '(', so the tuple scan above + // was skipped. A successful type_() parse whose next token is '(' means this + // is a positional_pattern, not a constant_pattern. + if (first && static_cast(first->getType()) == CSharpLexer::Simple_Identifier) + { + size_t savedIndex = _input->index(); + auto *par = new CSharpParser(_input); + par->removeErrorListeners(); + par->setErrorHandler(std::make_shared()); + try + { + par->type_(); + antlr4::Token *next = _input->LT(1); + if (next && static_cast(next->getType()) == CSharpLexer::TK_LPAREN) + { + _input->seek(savedIndex); + delete par; + return false; + } + } + catch (...) { } + _input->seek(savedIndex); + delete par; + } + return true; } +bool CSharpParserBase::IsPositionalPatternAhead() { return !IsConstantPatternAhead(); } + bool CSharpParserBase::IsTypeParameterName() { antlr4::Token *t = _input->LT(1); diff --git a/csharp/v8-spec/Cpp/CSharpParserBase.h b/csharp/v8-spec/Cpp/CSharpParserBase.h index c782e27ffe..76544bcee6 100644 --- a/csharp/v8-spec/Cpp/CSharpParserBase.h +++ b/csharp/v8-spec/Cpp/CSharpParserBase.h @@ -19,6 +19,7 @@ class CSharpParserBase : public antlr4::Parser bool IsCastExpressionAhead(); bool IsDeclarationPatternAhead(); bool IsConstantPatternAhead(); + bool IsPositionalPatternAhead(); bool IsTypeParameterName(); bool IsValueTypeName(); bool IsReferenceTypeName(); diff --git a/csharp/v8-spec/Dart/CSharpParserBase.dart b/csharp/v8-spec/Dart/CSharpParserBase.dart index db36ce8f83..70ee4e1a45 100644 --- a/csharp/v8-spec/Dart/CSharpParserBase.dart +++ b/csharp/v8-spec/Dart/CSharpParserBase.dart @@ -309,7 +309,35 @@ abstract class CSharpParserBase extends Parser { (tok2.type == CSharpLexer.TOKEN_Simple_Identifier || tok2.text == '_'); } - bool IsConstantPatternAhead() => !IsDeclarationPatternAhead(); + bool IsConstantPatternAhead() { + if (IsDeclarationPatternAhead()) return false; + final ts = inputStream as TokenStream; + final first = ts.LT(1); + if (first != null && first.type == CSharpLexer.TOKEN_TK_LPAREN) { + int depth = 0, i = 1; + while (true) { + final tok = ts.LT(i++); + if (tok == null || tok.type < 0) break; + if (tok.type == CSharpLexer.TOKEN_TK_LPAREN) { + depth++; + } else if (tok.type == CSharpLexer.TOKEN_TK_RPAREN) { + depth--; + if (depth == 0) break; + } else if (tok.type == CSharpLexer.TOKEN_TK_COMMA && depth == 1) { + return false; + } + } + } + // Identifier followed immediately by '(' → type-headed positional pattern. + final second = ts.LT(2); + if (first != null && first.type == CSharpLexer.TOKEN_Simple_Identifier && + second != null && second.type == CSharpLexer.TOKEN_TK_LPAREN) { + return false; + } + return true; + } + + bool IsPositionalPatternAhead() => !IsConstantPatternAhead(); bool IsImplicitlyTypedLocalVariable() { final tok = (inputStream as TokenStream).LT(1); diff --git a/csharp/v8-spec/Go/CSharpParserBase.go b/csharp/v8-spec/Go/CSharpParserBase.go index 50a1e2814c..36e5a20ee9 100644 --- a/csharp/v8-spec/Go/CSharpParserBase.go +++ b/csharp/v8-spec/Go/CSharpParserBase.go @@ -801,8 +801,46 @@ func (p *CSharpParserBase) IsDeclarationPatternAhead() bool { return tok2 != nil && (tok2.GetTokenType() == CSharpLexerSimple_Identifier || tok2.GetText() == "_") } -// IsConstantPatternAhead is the complement of IsDeclarationPatternAhead. -func (p *CSharpParserBase) IsConstantPatternAhead() bool { return !p.IsDeclarationPatternAhead() } +// IsConstantPatternAhead returns false for declaration patterns and for paren-with-comma +// (tuple-positional) patterns, routing the latter exclusively to positional_pattern. +func (p *CSharpParserBase) IsConstantPatternAhead() bool { + if p.IsDeclarationPatternAhead() { + return false + } + ts := p.GetTokenStream() + tok1 := ts.LT(1) + if tok1 != nil && tok1.GetTokenType() == CSharpLexerTK_LPAREN { + depth, i := 0, 1 + for { + tok := ts.LT(i) + i++ + if tok == nil || tok.GetTokenType() == antlr.TokenEOF { + break + } + tt := tok.GetTokenType() + if tt == CSharpLexerTK_LPAREN { + depth++ + } else if tt == CSharpLexerTK_RPAREN { + depth-- + if depth == 0 { + break + } + } else if tt == CSharpLexerTK_COMMA && depth == 1 { + return false + } + } + } + // Identifier followed immediately by '(' → type-headed positional pattern. + tok2 := ts.LT(2) + if tok1 != nil && tok1.GetTokenType() == CSharpLexerSimple_Identifier && + tok2 != nil && tok2.GetTokenType() == CSharpLexerTK_LPAREN { + return false + } + return true +} + +// IsPositionalPatternAhead is the complement of IsConstantPatternAhead. +func (p *CSharpParserBase) IsPositionalPatternAhead() bool { return !p.IsConstantPatternAhead() } // IsImplicitlyTypedLocalVariable returns true when LT(1)='var' and context implies implicit typing. func (p *CSharpParserBase) IsImplicitlyTypedLocalVariable() bool { diff --git a/csharp/v8-spec/Java/CSharpParserBase.java b/csharp/v8-spec/Java/CSharpParserBase.java index f2450c8f24..98b6b140b8 100644 --- a/csharp/v8-spec/Java/CSharpParserBase.java +++ b/csharp/v8-spec/Java/CSharpParserBase.java @@ -389,7 +389,43 @@ public boolean IsDeclarationPatternAhead() finally { _input.seek(savedIndex); } } - public boolean IsConstantPatternAhead() { return !IsDeclarationPatternAhead(); } + public boolean IsConstantPatternAhead() + { + if (IsDeclarationPatternAhead()) return false; + Token t1 = ((CommonTokenStream)_input).LT(1); + if (t1 != null && t1.getType() == CSharpLexer.TK_LPAREN) + { + int depth = 0, i = 1; + while (true) + { + Token tok = ((CommonTokenStream)_input).LT(i++); + if (tok == null || tok.getType() == Token.EOF) break; + int tt = tok.getType(); + if (tt == CSharpLexer.TK_LPAREN) depth++; + else if (tt == CSharpLexer.TK_RPAREN) { depth--; if (depth == 0) break; } + else if (tt == CSharpLexer.TK_COMMA && depth == 1) return false; + } + } + // Identifier followed by '(' after type_ → type-headed positional pattern. + if (t1 != null && t1.getType() == CSharpLexer.Simple_Identifier) + { + int savedIndex = _input.index(); + CSharpParser par = new CSharpParser(_input); + par.removeErrorListeners(); + par.setErrorHandler(new BailErrorStrategy()); + try + { + par.type_(); + Token next = ((CommonTokenStream)_input).LT(1); + if (next != null && next.getType() == CSharpLexer.TK_LPAREN) return false; + } + catch (Exception e) { } + finally { _input.seek(savedIndex); } + } + return true; + } + + public boolean IsPositionalPatternAhead() { return !IsConstantPatternAhead(); } public boolean IsImplicitlyTypedLocalVariable() { diff --git a/csharp/v8-spec/JavaScript/CSharpParserBase.js b/csharp/v8-spec/JavaScript/CSharpParserBase.js index 30682da11c..a6ed630a4e 100644 --- a/csharp/v8-spec/JavaScript/CSharpParserBase.js +++ b/csharp/v8-spec/JavaScript/CSharpParserBase.js @@ -322,7 +322,25 @@ export default class CSharpParserBase extends antlr4.Parser { return tok2 !== null && (tok2.type === CSharpLexer.Simple_Identifier || tok2.text === '_'); } - IsConstantPatternAhead() { return !this.IsDeclarationPatternAhead(); } + IsConstantPatternAhead() { + if (this.IsDeclarationPatternAhead()) return false; + if (this._input.LT(1)?.type === CSharpLexer.TK_LPAREN) { + let depth = 0, i = 1; + while (true) { + const tok = this._input.LT(i++); + if (!tok || tok.type < 0) break; + if (tok.type === CSharpLexer.TK_LPAREN) depth++; + else if (tok.type === CSharpLexer.TK_RPAREN) { depth--; if (depth === 0) break; } + else if (tok.type === CSharpLexer.TK_COMMA && depth === 1) return false; + } + } + // Identifier followed immediately by '(' → type-headed positional pattern. + if (this._input.LT(1)?.type === CSharpLexer.Simple_Identifier + && this._input.LT(2)?.type === CSharpLexer.TK_LPAREN) return false; + return true; + } + + IsPositionalPatternAhead() { return !this.IsConstantPatternAhead(); } IsImplicitlyTypedLocalVariable() { const tok = this._input.LT(1); diff --git a/csharp/v8-spec/Python3/CSharpParserBase.py b/csharp/v8-spec/Python3/CSharpParserBase.py index 20431b6b9b..152cac262d 100644 --- a/csharp/v8-spec/Python3/CSharpParserBase.py +++ b/csharp/v8-spec/Python3/CSharpParserBase.py @@ -402,7 +402,51 @@ def IsDeclarationPatternAhead(self): self._input.seek(saved_index) def IsConstantPatternAhead(self): - return not self.IsDeclarationPatternAhead() + if self.IsDeclarationPatternAhead(): + return False + if "." in __name__: + from .CSharpLexer import CSharpLexer + from .CSharpParser import CSharpParser + else: + from CSharpLexer import CSharpLexer + from CSharpParser import CSharpParser + from antlr4 import BailErrorStrategy + tok1 = self._input.LT(1) + if tok1 is not None and tok1.type == CSharpLexer.TK_LPAREN: + depth = 0 + i = 1 + while True: + tok = self._input.LT(i) + i += 1 + if tok is None or tok.type < 0: + break + if tok.type == CSharpLexer.TK_LPAREN: + depth += 1 + elif tok.type == CSharpLexer.TK_RPAREN: + depth -= 1 + if depth == 0: + break + elif tok.type == CSharpLexer.TK_COMMA and depth == 1: + return False + # Identifier followed by '(' after type_ → type-headed positional pattern. + if tok1 is not None and tok1.type == CSharpLexer.Simple_Identifier: + saved_index = self._input.index + par = CSharpParser(self._input) + par.removeErrorListeners() + par._errHandler = BailErrorStrategy() + try: + par.type_() + next_tok = self._input.LT(1) + if next_tok is not None and next_tok.type == CSharpLexer.TK_LPAREN: + return False + except Exception: + pass + finally: + self._input.seek(saved_index) + return True + + def IsPositionalPatternAhead(self): + return not self.IsConstantPatternAhead() def IsImplicitlyTypedLocalVariable(self): if "." in __name__: diff --git a/csharp/v8-spec/TypeScript/CSharpParserBase.ts b/csharp/v8-spec/TypeScript/CSharpParserBase.ts index c6dbf7fe01..0a8a2e7029 100644 --- a/csharp/v8-spec/TypeScript/CSharpParserBase.ts +++ b/csharp/v8-spec/TypeScript/CSharpParserBase.ts @@ -292,7 +292,26 @@ export default abstract class CSharpParserBase extends Parser { return tok2 !== null && (tok2.type === CSharpLexer.Simple_Identifier || tok2.text === "_"); } - IsConstantPatternAhead(): boolean { return !this.IsDeclarationPatternAhead(); } + IsConstantPatternAhead(): boolean { + if (this.IsDeclarationPatternAhead()) return false; + const ts = this._input as any; + if (ts.LT(1)?.type === CSharpLexer.TK_LPAREN) { + let depth = 0, i = 1; + while (true) { + const tok = ts.LT(i++); + if (!tok || tok.type < 0) break; + if (tok.type === CSharpLexer.TK_LPAREN) depth++; + else if (tok.type === CSharpLexer.TK_RPAREN) { depth--; if (depth === 0) break; } + else if (tok.type === CSharpLexer.TK_COMMA && depth === 1) return false; + } + } + // Identifier followed immediately by '(' → type-headed positional pattern. + if (ts.LT(1)?.type === CSharpLexer.Simple_Identifier + && ts.LT(2)?.type === CSharpLexer.TK_LPAREN) return false; + return true; + } + + IsPositionalPatternAhead(): boolean { return !this.IsConstantPatternAhead(); } IsImplicitlyTypedLocalVariable(): boolean { const tok = (this._input as any).LT(1); diff --git a/csharp/v8-spec/desc.xml b/csharp/v8-spec/desc.xml index 6ae83335c8..b9f4d54116 100644 --- a/csharp/v8-spec/desc.xml +++ b/csharp/v8-spec/desc.xml @@ -6,7 +6,6 @@ examples/**/*.cs Antlr4ng;Cpp;CSharp;Dart;Go;Java;JavaScript;TypeScript - examples/CSharp8*.cs Python3 diff --git a/csharp/v8-spec/design/class_base.md b/csharp/v8-spec/design/class_base.md index 53adce39b0..f4a8b22a08 100644 --- a/csharp/v8-spec/design/class_base.md +++ b/csharp/v8-spec/design/class_base.md @@ -38,8 +38,8 @@ clause: ## The problem -All three alternatives begin with the token `':'`. After the colon, all three -continue with a `type_name` — which reduces to an identifier. ANTLR4 therefore +All three alternatives begin with the token `':'`. After the colon, all three +continue with a `type_name`, which reduces to an identifier. ANTLR4 therefore sees the same token prefix `':' IDENTIFIER` for alts 1, 2, and 3 and cannot choose among them by token lookahead alone. @@ -47,17 +47,17 @@ The specific ambiguities are: ### Single type after the colon (e.g., `: Foo`) -Both alt 1 and alt 2 match `':' type_name`. Without the symbol table, ANTLR4 +Both alt 1 and alt 2 match `':' type_name`. Without the symbol table, ANTLR4 cannot tell whether `Foo` is an interface (→ alt 1) or a class (→ alt 2). -Without predicates it would always choose alt 1 (`interface_type_list`), +Without predicates, it would always choose alt 1 (`interface_type_list`), misclassifying every single-base-class declaration as an interface list. ### Multiple types after the colon (e.g., `: Foo, Bar`) Both alt 1 (`interface_type_list = Foo, Bar`) and alt 3 (`class_type = Foo ',' interface_type_list = Bar`) match `':' type_name ',' -type_name`. Without the symbol table, ANTLR4 cannot tell whether the first -type is a class (→ alt 3) or an interface (→ alt 1). Without predicates it +type_name`. Without the symbol table, ANTLR4 cannot tell whether the first +type is a class (→ alt 3) or an interface (→ alt 1). Without predicates, it would always choose alt 1, misclassifying every class-plus-interfaces base clause. @@ -144,7 +144,7 @@ selects alt 2. ## Open-world limitation Accurate alt 1 classification requires interface declarations to be registered -in the symbol table before the class that implements them is parsed. In the +in the symbol table before the class that implements them is parsed. In the grammar test harness the declarations are **not** pre-registered, so all user-defined names default to `IsClassBaseClassType() = true` and parse as alt 2 or alt 3. diff --git a/csharp/v8-spec/design/expression.md b/csharp/v8-spec/design/expression.md new file mode 100644 index 0000000000..b12ffaed04 --- /dev/null +++ b/csharp/v8-spec/design/expression.md @@ -0,0 +1,74 @@ +# `expression` — SLL Lookahead Problem and Fix + +## The Problem + +After fixing `null_coalescing_expression`, `trperf` on `AllInOneNoPreprocessor.cs` +reported **max-k = 30** on the `expression` rule, at the same position in the +input: `list.Select(c => (c.f1, f3: c.f2)).Where(t => t.f2 == 1);`, line 771, column 21. +This is the token length of the longest common prefix between +two alternatives of `expression` in that file. + +### Root cause: shared common prefix across alternatives + +The original rule was: + +```antlr +// Source: §12.24 Expression +expression + : non_assignment_expression + | assignment + ; + +assignment + : unary_expression assignment_operator expression + ; +``` + +`non_assignment_expression` expands (via `conditional_expression → +null_coalescing_expression → conditional_or_expression → … → unary_expression`) +to a sequence that can begin with exactly the same tokens as `assignment`. +`assignment` itself begins with `unary_expression`. Since every `unary_expression` +is a valid prefix for both alternatives, ANTLR4's SLL simulation must scan through +the entire `unary_expression` — up to 30 tokens — to reach the distinguishing +`assignment_operator` token (or end of input) before it can select an alternative. + +--- + +## The Fix: Left-factoring + +The `assignment` alternative is inlined into `expression` and the +`assignment_operator expression` tail is made optional: + +```antlr +// Source: §12.24 Expression +expression + : non_assignment_expression (assignment_operator expression)? + ; +``` + +The parser now unconditionally parses `non_assignment_expression` first. After that +sub-rule returns, only **k = 1** is needed to decide whether an `assignment_operator` +follows. The max-k for this decision drops to 1. + +The separate `assignment` rule is no longer referenced by `expression`. It remains +in the grammar because it is referenced elsewhere (e.g. `statement_expression`). + +--- + +## Semantic equivalence and the grammar's permissiveness + +The rewritten rule is slightly **more permissive** than the original. The original +`assignment` required the left-hand side to be specifically a `unary_expression`; +the rewritten rule allows any `non_assignment_expression` to serve as the LHS of +an assignment (e.g. `a + b = c` is now grammatically accepted). + +This is acceptable: the C# specification already relies on semantic analysis (not +the grammar) to enforce that only certain expression forms are valid assignment +targets (§12.23.1). Invalid LHS forms are caught by the type checker, not the +parser. The same permissiveness is present in many other ANTLR4 C# grammar +implementations. + +| Input form | Original alt | Rewritten form | +|---|---|---| +| `expr` (no assignment) | alt 1 (`non_assignment_expression`) | alt 1, optional suffix absent | +| `lhs = rhs` | alt 2 (`assignment`) | alt 1, optional suffix present | diff --git a/csharp/v8-spec/design/non_nullable_reference_type.md b/csharp/v8-spec/design/non_nullable_reference_type.md index 34605e06d6..edb684f36d 100644 --- a/csharp/v8-spec/design/non_nullable_reference_type.md +++ b/csharp/v8-spec/design/non_nullable_reference_type.md @@ -32,14 +32,14 @@ interface_type : type_name ; class_type : type_name | 'object' | 'string' ; ``` -and `type_name` starts with an identifier. So for any input like `Foo bar`, +and `type_name` starts with an identifier. So for any input like `Foo bar`, ANTLR4 cannot distinguish between: * `Foo` being the name of a delegate (→ `delegate_type`) * `Foo` being the name of an interface (→ `interface_type`) * `Foo` being the name of a class (→ `class_type`) -by token lookahead alone. Without predicates it would always choose the first +by token lookahead alone. Without predicates, it would always choose the first alternative (`delegate_type`), misclassifying every class and interface as a delegate. @@ -80,7 +80,7 @@ return ts != null && ts.TypeKind == CSharpTypeKind.Interface; ### `IsClassTypeName()` -The default / open-world predicate. Returns `true` for: +The default, open-world predicate. Returns `true` for: * The keywords `object` and `string` (always class types). * Any **unknown identifier** (not in the symbol table) — open-world assumption. @@ -100,7 +100,7 @@ return ts.TypeKind != CSharpTypeKind.Interface ## Decision ordering and mutual exclusivity ANTLR4 evaluates the predicates in declaration order and stops at the first -`true` result. The three predicates are mutually exclusive for any +`true` result. The three predicates are mutually exclusive for any **registered** type name: | Predicate | Returns `true` | @@ -116,7 +116,7 @@ both return `false`, and `IsClassTypeName` returns `true` (open-world default). Accurate classification of user-defined delegate and interface types requires those declarations to have been registered in the symbol table before the -type reference is parsed. When the symbol table has not been populated — +type reference is parsed. When the symbol table has not been populated — as is the case in the grammar test harness for declarations made in the same file — all user-defined type names fall through to the `class_type` default. A post-parse binding pass would correct the classification. diff --git a/csharp/v8-spec/design/null_coalescing_expression.md b/csharp/v8-spec/design/null_coalescing_expression.md new file mode 100644 index 0000000000..42b2ae85ce --- /dev/null +++ b/csharp/v8-spec/design/null_coalescing_expression.md @@ -0,0 +1,71 @@ +# `null_coalescing_expression` — SLL Lookahead Problem and Fix + +## The Problem + +During performance profiling with `trperf` on `AllInOneNoPreprocessor.cs`, the rule +`null_coalescing_expression` was found to have **max-k = 30**. The 30 reflects the +length (in tokens) of the longest `conditional_or_expression` that preceded a `??` +operator in the test file. For inputs with longer expressions, the max-k would grow +proportionally. + +### Root cause: shared common prefix across alternatives + +The original rule was: + +```antlr +// Source: §12.17 The null coalescing operator +null_coalescing_expression + : conditional_or_expression + | conditional_or_expression '??' null_coalescing_expression + | throw_expression + ; +``` + +Alternatives 1 and 2 both begin with `conditional_or_expression`. ANTLR4's SLL +prediction must decide which alternative to enter **before consuming any tokens**. +It does so by speculatively simulating both alternatives in parallel through the ATN +until the token stream diverges. Since the two alternatives are identical up to +the end of `conditional_or_expression`, the simulation must consume every token of +that sub-expression before it reaches the distinguishing `??` token — or the end +of input (confirming alt 1). + +In the test file, the longest `conditional_or_expression` before a `??` spans +30 tokens, producing **max-k = 30** for this decision. + +--- + +## The Fix: Left-factoring + +The two alternatives that share the common prefix are merged by making the `??` +suffix optional: + +```antlr +// Source: §12.17 The null coalescing operator +null_coalescing_expression + : conditional_or_expression ('??' null_coalescing_expression)? + | throw_expression + ; +``` + +Now the parser commits to consuming `conditional_or_expression` without any +upfront lookahead into its internals. After the sub-rule returns, only **k = 1** +is needed to decide whether `??` follows. The max-k for this decision drops to 1. + +Alt 3 (`throw_expression`) begins with the `throw` keyword, which is syntactically +distinct from any token that can start `conditional_or_expression`, so it is +resolved at LT(1) with no lookahead cost. + +--- + +## Semantic equivalence + +The rewritten rule accepts exactly the same language as the original: + +| Input form | Original alt | Rewritten form | +|---|---|---| +| `expr` | alt 1 | alt 1, optional suffix absent | +| `expr ?? expr` | alt 2 | alt 1, optional suffix present | +| `throw expr` | alt 3 | alt 2 | + +No parse tree structure changes beyond the consolidation of the two +`conditional_or_expression`-headed alternatives into one. diff --git a/csharp/v8-spec/design/pattern_positional.md b/csharp/v8-spec/design/pattern_positional.md new file mode 100644 index 0000000000..f93ac89670 --- /dev/null +++ b/csharp/v8-spec/design/pattern_positional.md @@ -0,0 +1,139 @@ +# Disambiguation of `constant_pattern` vs `positional_pattern` + +## Context + +This document describes a **follow-on fix** to the ambiguity documented in +`pattern.md`. After `IsDeclarationPatternAhead` / `IsConstantPatternAhead` +were introduced to separate `declaration_pattern` from `constant_pattern`, a +residual ambiguity remained between `constant_pattern` (alt 2) and +`positional_pattern` (alt 4) for inputs that begin with `(`. + +## The problem + +`positional_pattern` has an optional leading `type_?`: + +```antlr +positional_pattern : type_? '(' subpatterns? ')' property_subpattern? simple_designation? ; +``` + +When the `type_?` is absent, the pattern begins with `(`. A `constant_pattern` +whose `constant_expression` is a parenthesised expression also begins with `(`: + +```csharp +case ("rock", "scissors"): // positional_pattern — tuple of two string patterns +case ("hello"): // constant_pattern — parenthesised string constant +``` + +With the original `IsConstantPatternAhead() => !IsDeclarationPatternAhead()`, +a `("rock", "scissors")` input was reported as ambiguous between alt 2 +(`constant_pattern`) and alt 4 (`positional_pattern`) because: + +* `IsDeclarationPatternAhead()` returns `false` (no type + designation follows). +* `IsConstantPatternAhead()` therefore returns `true`. +* `positional_pattern` had no predicate, so it was unconditionally viable. + +Both alternatives matched and ANTLR4 flagged the ambiguity (decision 31). + +## The solution + +### Extending `IsConstantPatternAhead` + +A tuple-positional pattern is recognised by a **comma at parenthesis depth 1**. +Tuple expressions cannot be C# compile-time constants (the C# specification, +§12.20, requires a constant expression; tuples are not constants), so any `(` +that contains a top-level comma must be `positional_pattern`, not +`constant_pattern`. + +`IsConstantPatternAhead` was extended with a second guard after the +declaration-pattern check: + +```csharp +public bool IsConstantPatternAhead() +{ + if (IsDeclarationPatternAhead()) return false; + + // A '(' followed by a comma at depth 1 is a tuple-positional pattern + // like ("rock", "scissors") or (0, 0). Tuple expressions are not C# + // compile-time constants, so this can only match positional_pattern + // (alt 4), not constant_pattern (alt 2). Return false to suppress + // alt 2 and eliminate the ambiguity. + var ts = (ITokenStream)InputStream; + if (ts.LT(1).Type == CSharpLexer.TK_LPAREN) + { + int depth = 0, i = 1; + while (true) + { + IToken tok = ts.LT(i++); + if (tok.Type == TokenConstants.EOF) break; + if (tok.Type == CSharpLexer.TK_LPAREN) depth++; + else if (tok.Type == CSharpLexer.TK_RPAREN) { depth--; if (depth == 0) break; } + else if (tok.Type == CSharpLexer.TK_COMMA && depth == 1) return false; + } + } + return true; +} +``` + +### Adding `IsPositionalPatternAhead` and gating `positional_pattern` + +Because the predicates on alts 1 and 2 are no longer a complete partition of +all inputs, `positional_pattern` must be explicitly gated so that it is +suppressed when `IsConstantPatternAhead` returns `true`. The complement +predicate and the grammar change together enforce mutual exclusion: + +```csharp +public bool IsPositionalPatternAhead() => !IsConstantPatternAhead(); +``` + +```antlr +pattern + : {this.IsDeclarationPatternAhead()}? declaration_pattern + | {this.IsConstantPatternAhead()}? constant_pattern + | var_pattern + | {this.IsPositionalPatternAhead()}? positional_pattern + | property_pattern + | discard_pattern + ; +``` + +## Why the comma-at-depth-1 test is sufficient + +The check is conservative: it only returns `false` when there is definitely a +top-level comma inside the parentheses. Cases where `IsConstantPatternAhead` must +still return `true` for a `(`-headed input: + +| Input | Reason | +|---|---| +| `("hello")` | No comma → not suppressed → remains `constant_pattern` | +| `(x + y)` | No comma → not suppressed → remains `constant_pattern` | +| `("rock", "scissors")` | Comma at depth 1 → suppressed → routes to `positional_pattern` | +| `(0, 0)` | Comma at depth 1 → suppressed → routes to `positional_pattern` | +| `((a, b), c)` | Comma at depth 1 (after inner pair closes) → suppressed | + +Nested parentheses are handled correctly because the scan tracks `depth` and only +reacts to commas at `depth == 1`. + +## Decision ordering (revised) + +| Alt | Predicate | Fires when | +|---|---|---| +| 1 `declaration_pattern` | `IsDeclarationPatternAhead()` | Speculative `type_()` succeeds and is followed by `Simple_Identifier` or `_` | +| 2 `constant_pattern` | `IsConstantPatternAhead()` | Not a declaration pattern **and** not a paren-with-comma | +| 3 `var_pattern` | _(none)_ | Reachable only when alts 1 and 2 are blocked; `var` keyword | +| 4 `positional_pattern` | `IsPositionalPatternAhead()` | Complement of alt 2; paren-with-comma or type-headed `(` | +| 5 `property_pattern` | _(none)_ | Leading `{` or type followed by `{` | +| 6 `discard_pattern` | _(none)_ | Single `_` token | + +Alts 1 and 2 remain mutually exclusive by construction (negation). +Alts 2 and 4 are now mutually exclusive by construction (negation via +`IsPositionalPatternAhead`). + +## Example parse-tree paths + +| C# pattern | Predicate outcome | Parse-tree path | +|---|---|---| +| `("hello")` | alt 2 true | `pattern → constant_pattern` (parenthesised string) | +| `("rock", "scissors")` | alt 2 false (comma at depth 1), alt 4 true | `pattern → positional_pattern` | +| `(0, 0)` | alt 2 false (comma at depth 1), alt 4 true | `pattern → positional_pattern` | +| `((a, b), c)` | alt 2 false (comma at depth 1), alt 4 true | `pattern → positional_pattern` | +| `Point(0, 0)` | alt 1 false, alt 2 false (speculative `type_()` followed by `(`), alt 4 true | `pattern → positional_pattern` (type-headed) | diff --git a/csharp/v8-spec/design/primary_expression_mlr.md b/csharp/v8-spec/design/primary_expression_mlr.md new file mode 100644 index 0000000000..481e5574c3 --- /dev/null +++ b/csharp/v8-spec/design/primary_expression_mlr.md @@ -0,0 +1,248 @@ +# Removal of mutual left-recursion in `primary_expression` + +## Background: mutual left-recursion (MLR) + +ANTLR4 supports **direct left recursion** within a single rule — it internally +rewrites a rule such as + +```antlr +expr : expr '+' expr | atom ; +``` + +into an iterative (non-recursive) form and produces a correct LL parser. + +ANTLR4 does **not** support **mutual left-recursion (MLR)** — a cycle involving +two or more distinct rules, e.g., + +```antlr +A : B 'x' ; // A calls B as its first token +B : A 'y' ; // B calls A as its first token +``` + +When MLR is present, the ANTLR4 tool reports an error and refuses to generate a +parser. The C# language specification contains exactly this pattern in the +`primary_expression` cluster. + +--- + +## The MLR group in the C# specification grammar + +The C# spec defines `primary_expression` as a union that includes ten named +sub-rules, each of which begins with `primary_expression` as its first element: + +| Sub-rule (spec §) | Left-recursive production | +|---|---| +| `member_access` (§12.8.7) | `primary_expression '.' identifier type_argument_list?` | +| `null_conditional_member_access` (§12.8.8) | `primary_expression '?' '.' identifier type_argument_list? …` | +| `null_forgiving_expression` (§12.8.9) | `primary_expression '!'` | +| `invocation_expression` (§12.8.10) | `primary_expression '(' argument_list? ')'` | +| `null_conditional_element_access` (§12.8.13) | `primary_expression '?' '[' argument_list ']' …` | +| `post_increment_expression` (§12.8.16) | `primary_expression '++'` | +| `post_decrement_expression` (§12.8.16) | `primary_expression '--'` | +| `element_access` (§12.8.12) | `primary_expression '[' argument_list ']'` | +| `pointer_member_access` (§24.6.3, unsafe) | `primary_expression '->' identifier type_argument_list?` | +| `pointer_element_access` (§24.6.4, unsafe) | `primary_expression '[' expression ']'` | + +Because `primary_expression` references each of these rules, and each of those +rules references `primary_expression` as its first symbol, ANTLR4 sees an +11-rule MLR cycle and cannot generate a parser. + +The grammar preserves the original spec text as a comment block at §12.8.1: + +```antlr +// ║ primary_expression +// ║ : literal +// ║ | ... +// ║ | member_access +// ║ | null_conditional_member_access +// ║ | invocation_expression +// ║ | element_access +// ║ | null_conditional_element_access +// ║ | post_increment_expression +// ║ | post_decrement_expression +// ║ | null_forgiving_expression +// ║ | ... +// ║ | pointer_member_access // unsafe code support +// ║ | pointer_element_access // unsafe code support +// ║ | stackalloc_expression +// ║ ; +``` + +--- + +## The fix: inline all left-recursive alternatives into `primary_expression` + +The solution is to replace every call to a left-recursive sub-rule with its +production body placed directly inside `primary_expression`. Because all the +alternatives now live in the same rule, ANTLR4's direct-left-recursion rewriter +can handle the self-references correctly. + +The grammar comment records this change: + +``` +// [CHANGE] This removes a mutual left-recursion group which has been left in the Standard +// [CHANGE] (other uses of MLR have been removed). Without this change the grammar will fail +// [CHANGE] to verify and no sample testing can be done. +``` + +The resulting `primary_expression` rule (abbreviated): + +```antlr +primary_expression + : literal + | interpolated_string_expression + | simple_name + | tuple_expression + | parenthesized_expression + // From: member_access + | primary_expression '.' identifier type_argument_list? { this.AsMemberAccess(_localctx); } + | predefined_type '.' identifier type_argument_list? { this.AsMemberAccess(_localctx); } + | qualified_alias_member '.' identifier type_argument_list? { this.AsMemberAccess(_localctx); } + // From: null_conditional_member_access + | primary_expression '?' '.' identifier type_argument_list? + (null_forgiving_operator? dependent_access)* { this.AsNullConditionalMemberAccess(_localctx); } + // From: invocation_expression + | primary_expression '(' argument_list? ')' { this.AsInvocationExpression(_localctx); } + // From: element_access and pointer_element_access (unsafe) + | primary_expression '[' argument_list ']' { this.AsElementAccess(_localctx); } + { this.ElementAccessSemanticCheck(_localctx); } + // From: null_conditional_element_access + | primary_expression '?' '[' argument_list ']' + (null_forgiving_operator? dependent_access)* { this.AsNullConditionalElementAccess(_localctx); } + { this.ElementAccessSemanticCheck(_localctx); } + | this_access + | base_access + // From: post_increment_expression + | primary_expression '++' { this.AsPostIncrementExpression(_localctx); } + // From: post_decrement_expression + | primary_expression '--' { this.AsPostDecrementExpression(_localctx); } + // From: null_forgiving_expression + // | null_forgiving_expression + | primary_expression null_forgiving_operator { this.AsNullForgivingExpression(_localctx); } + | array_creation_expression + | ... + // From: pointer_member_access (unsafe) + | primary_expression '->' identifier type_argument_list? { this.AsPointerMemberAccess(_localctx); } + // From: pointer_element_access — covered by element_access replacement above + | stackalloc_expression + ; +``` + +Each `As*` action on the inlined alternatives reconstructs the semantic +context that the now-removed named rule would have established, preserving +listener/visitor behaviour. + +--- + +## Rules commented out after the refactoring + +Five named rules became unreachable after their sole uses were inlined. Rather +than deleting them, the grammar retains them as commented-out documentation so +that the spec section reference remains visible. + +### `null_forgiving_expression` (§12.8.9.1) + +```antlr +//null_forgiving_expression +// : primary_expression null_forgiving_operator +// ; +``` + +Inlined into `primary_expression` as `| primary_expression null_forgiving_operator`. + +### `invocation_expression` (§12.8.10.1) + +```antlr +//invocation_expression +// : primary_expression '(' argument_list? ')' +// ; +``` + +Inlined into `primary_expression` as `| primary_expression '(' argument_list? ')'`. + +Consequence for `statement_expression`: the rule previously listed +`invocation_expression` as an alternative. After inlining, that alternative +became `primary_expression` (which subsumes any invocation): + +```antlr +statement_expression + : null_conditional_invocation_expression +// BUGBUG BUG | invocation_expression + | primary_expression + ... +``` + +The `BUGBUG` comment records that `primary_expression` is broader than +`invocation_expression` was — it accepts any primary expression where only a +call was valid. Tightening this would require a semantic predicate or +post-parse validation. + +### `element_access` (§12.8.12.1) + +```antlr +//element_access +// : primary_expression '[' argument_list ']' +// ; +``` + +Inlined into `primary_expression` as `| primary_expression '[' argument_list ']'`. +The same inlined alternative also replaces `pointer_element_access` (see below). + +### `pointer_member_access` (§24.6.3, unsafe) + +```antlr +//pointer_member_access +// : primary_expression '->' identifier type_argument_list? +// ; +``` + +Inlined into `primary_expression` as `| primary_expression '->' identifier type_argument_list?`. + +### `pointer_element_access` (§24.6.4, unsafe) + +```antlr +//pointer_element_access +// : primary_expression '[' expression ']' +// ; +``` + +Covered by the inlined `element_access` alternative (`'[' argument_list ']'` +is a superset of `'[' expression ']'` in practice). + +--- + +## Rules retained as named rules + +Five rules in the original MLR group were **not** commented out because they +are still referenced from other parts of the grammar: + +| Rule | Retained references | +|---|---| +| `member_access` | `member_declarator` (anonymous-object-creation field initializers) | +| `null_conditional_member_access` | `null_conditional_invocation_expression`, `null_conditional_projection_initializer` | +| `null_conditional_element_access` | `null_conditional_invocation_expression` | +| `post_increment_expression` | `statement_expression` | +| `post_decrement_expression` | `statement_expression` | + +These rules still begin with `primary_expression`, but because `primary_expression` +no longer calls them, the MLR cycle is broken. ANTLR4 encounters them only +when the parent rule (e.g. `member_declarator`) explicitly invokes them, at +which point the parse descends into `primary_expression` without any cycle. + +--- + +## Summary of the refactoring + +| Rule | Action | Reason | +|---|---|---| +| `primary_expression` | Expanded in place with all left-recursive alternatives inlined | Converts MLR to direct left recursion, which ANTLR4 supports | +| `null_forgiving_expression` | Commented out | Sole use was in `primary_expression`; now inlined | +| `invocation_expression` | Commented out | Sole use was in `primary_expression`; `statement_expression` updated to use `primary_expression` | +| `element_access` | Commented out | Sole use was in `primary_expression`; now inlined | +| `pointer_member_access` | Commented out | Sole use was in `primary_expression`; now inlined | +| `pointer_element_access` | Commented out | Covered by inlined `element_access` alternative | +| `member_access` | Retained | Still used by `member_declarator` | +| `null_conditional_member_access` | Retained | Still used by `null_conditional_invocation_expression` and `null_conditional_projection_initializer` | +| `null_conditional_element_access` | Retained | Still used by `null_conditional_invocation_expression` | +| `post_increment_expression` | Retained | Still used by `statement_expression` | +| `post_decrement_expression` | Retained | Still used by `statement_expression` | diff --git a/csharp/v8-spec/design/range_expression.md b/csharp/v8-spec/design/range_expression.md new file mode 100644 index 0000000000..7564e4a36c --- /dev/null +++ b/csharp/v8-spec/design/range_expression.md @@ -0,0 +1,66 @@ +# `range_expression` — SLL Lookahead Problem and Fix + +## The Problem + +After fixing `null_coalescing_expression` and `expression`, `trperf` on +`AllInOneNoPreprocessor.cs` reported **max-k = 30** on `range_expression`, again +at the same position in the input. + +### Root cause: shared common prefix across alternatives + +The original rule was: + +```antlr +// Source: §12.10 Range operator +range_expression + : unary_expression + | unary_expression? '..' unary_expression? + ; +``` + +Alternatives 1 and 2 can both start with `unary_expression`. ANTLR4's SLL +simulation cannot choose between them at the decision point; it must speculatively +follow both alternatives through the ATN until the token stream diverges. The +divergence is the `..` token that follows (alt 2) or does not follow (alt 1) +the `unary_expression`. In the test file the longest `unary_expression` that +precedes `..` spans 30 tokens, giving **max-k = 30**. + +--- + +## The Fix: Left-factoring + +Alternative 2 is split on whether a leading `unary_expression` is present, and +the `..` suffix is made optional on alt 1: + +```antlr +// Source: §12.10 Range operator +range_expression + : unary_expression ('..' unary_expression?)? + | '..' unary_expression? + ; +``` + +- **Alt 1** covers: a bare `unary_expression`, and `unary_expression '..'`, + and `unary_expression '..' unary_expression`. + After parsing `unary_expression`, only **k = 1** is needed to decide whether + `..` follows. +- **Alt 2** covers the `..`-prefix form where the left operand is absent + (`.. unary_expression` or just `..`). This begins with `..`, which is + syntactically distinct from any token that can start `unary_expression`, + so it is resolved at LT(1) with no lookahead cost. + +The max-k for this decision drops to 1. + +--- + +## Semantic equivalence + +The rewritten rule accepts exactly the same language as the original: + +| Input form | Original alt | Rewritten form | +|---|---|---| +| `expr` | alt 1 | alt 1, optional suffix absent | +| `expr ..` | alt 2 | alt 1, optional `..` present, right operand absent | +| `expr .. expr` | alt 2 | alt 1, optional `..` present, right operand present | +| `.. expr` | alt 2 | alt 2 | +| `..` | alt 2 | alt 2, operand absent | diff --git a/csharp/v8-spec/design/type_.md b/csharp/v8-spec/design/type_.md index 1cb4fe822a..3d7008bdfb 100644 --- a/csharp/v8-spec/design/type_.md +++ b/csharp/v8-spec/design/type_.md @@ -26,7 +26,7 @@ For example, `Foo` could be: always syntactically distinguished by the trailing `*`. Because the three non-pointer alternatives are token-identical at `LT(1)`, -ANTLR4 cannot resolve the choice by lookahead alone. Without help it would +ANTLR4 cannot resolve the choice by lookahead alone. Without help, it would always pick the first alternative (`type_parameter`), misclassifying every ordinary type as a generic type variable. @@ -44,7 +44,7 @@ type_ ; ``` -ANTLR4 evaluates the predicates in order at prediction time. The first +ANTLR4 evaluates the predicates in order at prediction time. The first predicate that returns `true` selects the corresponding alternative; execution never reaches a later alternative once an earlier one is chosen. @@ -76,7 +76,7 @@ Returns `true` for two disjoint sets of inputs: ### `IsReferenceTypeName()` -The default / open-world predicate. Returns `true` for: +The default, open-world predicate. Returns `true` for: * The keywords `dynamic`, `object`, `string`. * `[` — beginning of an `array_type`.