From 09e1ca5fe4c1e122b06eb82a70f020e04951415a Mon Sep 17 00:00:00 2001 From: Ashinee-work Date: Sat, 10 Jan 2026 12:16:17 +0530 Subject: [PATCH 1/2] fixed the multi-line code split using backslash issue --- src/blib2to3/pgen2/tokenize.py | 31 +++++++++++++++++ tests/test_tokenize.py | 63 ++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index 4e3761f3028..d245e4bdc9f 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -147,6 +147,10 @@ def tokenize(source: str, grammar: Grammar | None = None) -> Iterator[TokenInfo] line, column = 1, 0 prev_token: pytokens.Token | None = None + in_backslash_continuation = False + # Track the balance of INDENT/DEDENT tokens skipped during continuation + # Positive means we skipped more DEDENTs than INDENTs + skipped_indent_balance = 0 try: for token in pytokens.tokenize(source): token = transform_whitespace(token, source, prev_token) @@ -163,6 +167,33 @@ def tokenize(source: str, grammar: Grammar | None = None) -> Iterator[TokenInfo] prev_token = token continue + # Track if we just saw a backslash continuation (NL token starting with \) + if token.type == TokenType.nl and token_str.startswith("\\"): + in_backslash_continuation = True + # Skip INDENT/DEDENT tokens that come during a backslash continuation + # (before the logical line ends with a NEWLINE token) + elif in_backslash_continuation and token.type == TokenType.dedent: + skipped_indent_balance += 1 + prev_token = token + continue + elif in_backslash_continuation and token.type == TokenType.indent: + skipped_indent_balance -= 1 + prev_token = token + continue + # NEWLINE ends the logical line and the continuation tracking + elif token.type == TokenType.newline: + in_backslash_continuation = False + # Don't reset balance here - we still need to skip matching tokens + # After continuation ends, skip tokens to balance what we skipped during continuation + elif skipped_indent_balance > 0 and token.type == TokenType.indent: + skipped_indent_balance -= 1 + prev_token = token + continue + elif skipped_indent_balance < 0 and token.type == TokenType.dedent: + skipped_indent_balance += 1 + prev_token = token + continue + source_line = lines[token.start_line - 1] if token.type == TokenType.identifier and token_str in ("async", "await"): diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index efa7ad5e80d..fe7dc7bf490 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -98,6 +98,69 @@ def test_fstring() -> None: ) +def test_backslash_continuation() -> None: + """Test that backslash continuations with no indentation are handled correctly. + + This is a regression test for https://github.com/psf/black/issues/XXXX + where Black failed to parse code with backslash continuations followed by + unindented lines. + """ + # Simple case: backslash continuation with unindented number + assert_tokenizes( + "if True:\n foo = 1+\\\n2\n print(foo)\n", + [ + Token(type="NAME", string="if", start=(1, 0), end=(1, 2)), + Token(type="NAME", string="True", start=(1, 3), end=(1, 7)), + Token(type="OP", string=":", start=(1, 7), end=(1, 8)), + Token(type="NEWLINE", string="\n", start=(1, 8), end=(1, 9)), + Token(type="INDENT", string=" ", start=(2, 0), end=(2, 4)), + Token(type="NAME", string="foo", start=(2, 4), end=(2, 7)), + Token(type="OP", string="=", start=(2, 8), end=(2, 9)), + Token(type="NUMBER", string="1", start=(2, 10), end=(2, 11)), + Token(type="OP", string="+", start=(2, 11), end=(2, 12)), + Token(type="NL", string="\\\n", start=(2, 12), end=(2, 14)), + Token(type="NUMBER", string="2", start=(3, 0), end=(3, 1)), + Token(type="NEWLINE", string="\n", start=(3, 1), end=(3, 2)), + Token(type="NAME", string="print", start=(4, 4), end=(4, 9)), + Token(type="OP", string="(", start=(4, 9), end=(4, 10)), + Token(type="NAME", string="foo", start=(4, 10), end=(4, 13)), + Token(type="OP", string=")", start=(4, 13), end=(4, 14)), + Token(type="NEWLINE", string="\n", start=(4, 14), end=(4, 15)), + Token(type="DEDENT", string="", start=(5, 0), end=(5, 0)), + Token(type="ENDMARKER", string="", start=(5, 0), end=(5, 0)), + ], + ) + + # Multiple backslash continuations + assert_tokenizes( + "if True:\n result = 1+\\\n2+\\\n3\n print(result)\n", + [ + Token(type="NAME", string="if", start=(1, 0), end=(1, 2)), + Token(type="NAME", string="True", start=(1, 3), end=(1, 7)), + Token(type="OP", string=":", start=(1, 7), end=(1, 8)), + Token(type="NEWLINE", string="\n", start=(1, 8), end=(1, 9)), + Token(type="INDENT", string=" ", start=(2, 0), end=(2, 4)), + Token(type="NAME", string="result", start=(2, 4), end=(2, 10)), + Token(type="OP", string="=", start=(2, 11), end=(2, 12)), + Token(type="NUMBER", string="1", start=(2, 13), end=(2, 14)), + Token(type="OP", string="+", start=(2, 14), end=(2, 15)), + Token(type="NL", string="\\\n", start=(2, 15), end=(2, 17)), + Token(type="NUMBER", string="2", start=(3, 0), end=(3, 1)), + Token(type="OP", string="+", start=(3, 1), end=(3, 2)), + Token(type="NL", string="\\\n", start=(3, 2), end=(3, 4)), + Token(type="NUMBER", string="3", start=(4, 0), end=(4, 1)), + Token(type="NEWLINE", string="\n", start=(4, 1), end=(4, 2)), + Token(type="NAME", string="print", start=(5, 4), end=(5, 9)), + Token(type="OP", string="(", start=(5, 9), end=(5, 10)), + Token(type="NAME", string="result", start=(5, 10), end=(5, 16)), + Token(type="OP", string=")", start=(5, 16), end=(5, 17)), + Token(type="NEWLINE", string="\n", start=(5, 17), end=(5, 18)), + Token(type="DEDENT", string="", start=(6, 0), end=(6, 0)), + Token(type="ENDMARKER", string="", start=(6, 0), end=(6, 0)), + ], + ) + + # Run "echo some code | python tests/test_tokenize.py" to generate test cases. if __name__ == "__main__": code = sys.stdin.read() From d3b20ad8807865f399d55dbc97768ea4196508ff Mon Sep 17 00:00:00 2001 From: Ashinee-work Date: Sat, 10 Jan 2026 12:47:19 +0530 Subject: [PATCH 2/2] fixed build issues --- tests/test_tokenize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index fe7dc7bf490..59bb52c8203 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -100,7 +100,7 @@ def test_fstring() -> None: def test_backslash_continuation() -> None: """Test that backslash continuations with no indentation are handled correctly. - + This is a regression test for https://github.com/psf/black/issues/XXXX where Black failed to parse code with backslash continuations followed by unindented lines. @@ -130,7 +130,7 @@ def test_backslash_continuation() -> None: Token(type="ENDMARKER", string="", start=(5, 0), end=(5, 0)), ], ) - + # Multiple backslash continuations assert_tokenizes( "if True:\n result = 1+\\\n2+\\\n3\n print(result)\n",