diff --git a/rust/CSharp/RustLexerBase.cs b/rust/CSharp/RustLexerBase.cs index e47933d486..d284b1cffd 100644 --- a/rust/CSharp/RustLexerBase.cs +++ b/rust/CSharp/RustLexerBase.cs @@ -35,6 +35,11 @@ public bool next(char expect){ return _input.LA(1) == expect; } + + public bool nexti(int expect){ + return _input.LA(1) == expect; + } + public bool floatDotPossible(){ int next = _input.LA(1); // only block . _ identifier after float diff --git a/rust/CSharp/transformGrammar.py b/rust/CSharp/transformGrammar.py new file mode 100644 index 0000000000..3b6d7d5532 --- /dev/null +++ b/rust/CSharp/transformGrammar.py @@ -0,0 +1,33 @@ +import sys, os, re, shutil +from glob import glob +from pathlib import Path + +def main(argv): + for file in glob("*.g4"): + fix(file) + +def fix(file_path): + print("Altering " + file_path) + if not os.path.exists(file_path): + print(f"Could not find file: {file_path}") + sys.exit(1) + parts = os.path.split(file_path) + file_name = parts[-1] + + shutil.move(file_path, file_path + ".bak") + input_file = open(file_path + ".bak",'r') + output_file = open(file_path, 'w') + for x in input_file: + if 'this._input' in x: + x = x.replace("this._input", "this.InputStream") + + + output_file.write(x) + output_file.flush() + + print("Writing ...") + input_file.close() + output_file.close() + +if __name__ == '__main__': + main(sys.argv) diff --git a/rust/Cpp/RustLexerBase.h b/rust/Cpp/RustLexerBase.h index fd08f54024..9f0e7de583 100644 --- a/rust/Cpp/RustLexerBase.h +++ b/rust/Cpp/RustLexerBase.h @@ -31,6 +31,9 @@ class RustLexerBase : public antlr4::Lexer { bool next(char expect){ return _input->LA(1) == expect; } + bool nexti(int expect){ + return _input->LA(1) == expect; + } bool floatDotPossible(){ size_t next = _input->LA(1); diff --git a/rust/Java/RustLexerBase.java b/rust/Java/RustLexerBase.java index 008c424061..36f550acad 100644 --- a/rust/Java/RustLexerBase.java +++ b/rust/Java/RustLexerBase.java @@ -29,6 +29,10 @@ public boolean next(char expect){ return _input.LA(1) == expect; } + public boolean nexti(int expect){ + return _input.LA(1) == expect; + } + public boolean floatDotPossible(){ int next = _input.LA(1); // only block . _ identifier after float @@ -82,4 +86,4 @@ public boolean floatLiteralPossible(){ return true; } } -} \ No newline at end of file +} diff --git a/rust/Python3/RustLexerBase.py b/rust/Python3/RustLexerBase.py index ec95ffe758..6e5c5c3d41 100644 --- a/rust/Python3/RustLexerBase.py +++ b/rust/Python3/RustLexerBase.py @@ -2,9 +2,11 @@ from antlr4 import * from antlr4.InputStream import InputStream + class RustLexerBase(Lexer): flt_mp = set() RustLexer = None + def __init__(self, input: InputStream, output: TextIO = ...) -> None: super().__init__(input, output) self.token_lookbehind: tuple[Optional[Token], Optional[Token]] = (None, None) @@ -14,32 +16,33 @@ def __init__(self, input: InputStream, output: TextIO = ...) -> None: from RustLexer import RustLexer RustLexerBase.RustLexer = RustLexer RustLexerBase.flt_mp = { - RustLexer.STRING_LITERAL, - RustLexer.RAW_STRING_LITERAL, - RustLexer.BYTE_LITERAL, - RustLexer.BYTE_STRING_LITERAL, - RustLexer.RAW_BYTE_STRING_LITERAL, - RustLexer.INTEGER_LITERAL, - RustLexer.DEC_LITERAL, - RustLexer.HEX_LITERAL, - RustLexer.OCT_LITERAL, - RustLexer.BIN_LITERAL, - RustLexer.KW_SUPER, - RustLexer.KW_SELFVALUE, - RustLexer.KW_SELFTYPE, - RustLexer.KW_CRATE, - RustLexer.KW_DOLLARCRATE, - RustLexer.RCURLYBRACE, - RustLexer.RSQUAREBRACKET, - RustLexer.RPAREN, - RustLexer.KW_AWAIT, - RustLexer.NON_KEYWORD_IDENTIFIER, - RustLexer.RAW_IDENTIFIER, - RustLexer.KW_MACRORULES, - RustLexer.GT - } + RustLexer.STRING_LITERAL, + RustLexer.RAW_STRING_LITERAL, + RustLexer.BYTE_LITERAL, + RustLexer.BYTE_STRING_LITERAL, + RustLexer.RAW_BYTE_STRING_LITERAL, + RustLexer.INTEGER_LITERAL, + RustLexer.DEC_LITERAL, + RustLexer.HEX_LITERAL, + RustLexer.OCT_LITERAL, + RustLexer.BIN_LITERAL, + RustLexer.KW_SUPER, + RustLexer.KW_SELFVALUE, + RustLexer.KW_SELFTYPE, + RustLexer.KW_CRATE, + RustLexer.KW_DOLLARCRATE, + RustLexer.RCURLYBRACE, + RustLexer.RSQUAREBRACKET, + RustLexer.RPAREN, + RustLexer.KW_AWAIT, + RustLexer.NON_KEYWORD_IDENTIFIER, + RustLexer.RAW_IDENTIFIER, + RustLexer.KW_MACRORULES, + RustLexer.GT, + } """LOOK BEHIND TOKENS""" + def nextToken(self): next: Token = super().nextToken() @@ -57,6 +60,12 @@ def next(self, expect) -> bool: else: return self._input.LA(1) == expect + def nexti(self, expect) -> bool: + if isinstance(expect, str): + return chr(self._input.LA(1)) == expect + else: + return self._input.LA(1) == expect + def floatDotPossible(self): next = chr(self._input.LA(1)) # print(f'INFO: floatpossible ? {next} = {chr(next)}') @@ -81,10 +90,10 @@ def floatDotPossible(self): def floatLiteralPossible(self): prev, current = self.token_lookbehind - + if prev == None or current == None: return True elif current.type != RustLexerBase.RustLexer.DOT: return True else: - return prev.type not in RustLexerBase.flt_mp \ No newline at end of file + return prev.type not in RustLexerBase.flt_mp diff --git a/rust/RustLexer.g4 b/rust/RustLexer.g4 index afe41edfe0..71a61686e0 100644 --- a/rust/RustLexer.g4 +++ b/rust/RustLexer.g4 @@ -133,7 +133,9 @@ OUTER_BLOCK_DOC: BLOCK_COMMENT_OR_DOC: ( BLOCK_COMMENT | INNER_BLOCK_DOC | OUTER_BLOCK_DOC) -> channel (HIDDEN); -SHEBANG: {this.SOF()}? '\ufeff'? '#!' ~[\r\n]* -> channel(HIDDEN); +SHEBANG: {this.SOF()}? '\ufeff'? '#!' {this.nexti(97)}? ~[\r\n]* -> channel(HIDDEN); +// Shebang not in first line and #! cannot be followed by a '[' character shebang + //ISOLATED_CR // : '\r' {_input.LA(1)!='\n'}// not followed with \n ; diff --git a/rust/TypeScript/RustLexerBase.ts b/rust/TypeScript/RustLexerBase.ts index 32d755e7e4..ca32ba6358 100644 --- a/rust/TypeScript/RustLexerBase.ts +++ b/rust/TypeScript/RustLexerBase.ts @@ -30,6 +30,10 @@ export default abstract class RustLexerBase extends Lexer { next(expect: string): boolean { return this._input.LA(1) === expect.charCodeAt(0); } + nexti(expect: number): boolean { + return this._input.LA(1) === expect; + } + // Determine if a float dot is possible based on the next character floatDotPossible(): boolean { @@ -88,4 +92,4 @@ export default abstract class RustLexerBase extends Lexer { return true; } } -} \ No newline at end of file +}