Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions src/blib2to3/pgen2/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@ def tokenize(source: str, grammar: Grammar | None = None) -> Iterator[TokenInfo]
line, column = 1, 0

prev_token: pytokens.Token | None = None
in_backslash_continuation = False
# Track the balance of INDENT/DEDENT tokens skipped during continuation
# Positive means we skipped more DEDENTs than INDENTs
skipped_indent_balance = 0
try:
for token in pytokens.tokenize(source):
token = transform_whitespace(token, source, prev_token)
Expand All @@ -163,6 +167,33 @@ def tokenize(source: str, grammar: Grammar | None = None) -> Iterator[TokenInfo]
prev_token = token
continue

# Track if we just saw a backslash continuation (NL token starting with \)
if token.type == TokenType.nl and token_str.startswith("\\"):
in_backslash_continuation = True
# Skip INDENT/DEDENT tokens that come during a backslash continuation
# (before the logical line ends with a NEWLINE token)
elif in_backslash_continuation and token.type == TokenType.dedent:
skipped_indent_balance += 1
prev_token = token
continue
elif in_backslash_continuation and token.type == TokenType.indent:
skipped_indent_balance -= 1
prev_token = token
continue
# NEWLINE ends the logical line and the continuation tracking
elif token.type == TokenType.newline:
in_backslash_continuation = False
# Don't reset balance here - we still need to skip matching tokens
# After continuation ends, skip tokens to balance what we skipped during continuation
elif skipped_indent_balance > 0 and token.type == TokenType.indent:
skipped_indent_balance -= 1
prev_token = token
continue
elif skipped_indent_balance < 0 and token.type == TokenType.dedent:
skipped_indent_balance += 1
prev_token = token
continue

source_line = lines[token.start_line - 1]

if token.type == TokenType.identifier and token_str in ("async", "await"):
Expand Down
63 changes: 63 additions & 0 deletions tests/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,69 @@ def test_fstring() -> None:
)


def test_backslash_continuation() -> None:
"""Test that backslash continuations with no indentation are handled correctly.

This is a regression test for https://github.com/psf/black/issues/XXXX
where Black failed to parse code with backslash continuations followed by
unindented lines.
"""
# Simple case: backslash continuation with unindented number
assert_tokenizes(
"if True:\n foo = 1+\\\n2\n print(foo)\n",
[
Token(type="NAME", string="if", start=(1, 0), end=(1, 2)),
Token(type="NAME", string="True", start=(1, 3), end=(1, 7)),
Token(type="OP", string=":", start=(1, 7), end=(1, 8)),
Token(type="NEWLINE", string="\n", start=(1, 8), end=(1, 9)),
Token(type="INDENT", string=" ", start=(2, 0), end=(2, 4)),
Token(type="NAME", string="foo", start=(2, 4), end=(2, 7)),
Token(type="OP", string="=", start=(2, 8), end=(2, 9)),
Token(type="NUMBER", string="1", start=(2, 10), end=(2, 11)),
Token(type="OP", string="+", start=(2, 11), end=(2, 12)),
Token(type="NL", string="\\\n", start=(2, 12), end=(2, 14)),
Token(type="NUMBER", string="2", start=(3, 0), end=(3, 1)),
Token(type="NEWLINE", string="\n", start=(3, 1), end=(3, 2)),
Token(type="NAME", string="print", start=(4, 4), end=(4, 9)),
Token(type="OP", string="(", start=(4, 9), end=(4, 10)),
Token(type="NAME", string="foo", start=(4, 10), end=(4, 13)),
Token(type="OP", string=")", start=(4, 13), end=(4, 14)),
Token(type="NEWLINE", string="\n", start=(4, 14), end=(4, 15)),
Token(type="DEDENT", string="", start=(5, 0), end=(5, 0)),
Token(type="ENDMARKER", string="", start=(5, 0), end=(5, 0)),
],
)

# Multiple backslash continuations
assert_tokenizes(
"if True:\n result = 1+\\\n2+\\\n3\n print(result)\n",
[
Token(type="NAME", string="if", start=(1, 0), end=(1, 2)),
Token(type="NAME", string="True", start=(1, 3), end=(1, 7)),
Token(type="OP", string=":", start=(1, 7), end=(1, 8)),
Token(type="NEWLINE", string="\n", start=(1, 8), end=(1, 9)),
Token(type="INDENT", string=" ", start=(2, 0), end=(2, 4)),
Token(type="NAME", string="result", start=(2, 4), end=(2, 10)),
Token(type="OP", string="=", start=(2, 11), end=(2, 12)),
Token(type="NUMBER", string="1", start=(2, 13), end=(2, 14)),
Token(type="OP", string="+", start=(2, 14), end=(2, 15)),
Token(type="NL", string="\\\n", start=(2, 15), end=(2, 17)),
Token(type="NUMBER", string="2", start=(3, 0), end=(3, 1)),
Token(type="OP", string="+", start=(3, 1), end=(3, 2)),
Token(type="NL", string="\\\n", start=(3, 2), end=(3, 4)),
Token(type="NUMBER", string="3", start=(4, 0), end=(4, 1)),
Token(type="NEWLINE", string="\n", start=(4, 1), end=(4, 2)),
Token(type="NAME", string="print", start=(5, 4), end=(5, 9)),
Token(type="OP", string="(", start=(5, 9), end=(5, 10)),
Token(type="NAME", string="result", start=(5, 10), end=(5, 16)),
Token(type="OP", string=")", start=(5, 16), end=(5, 17)),
Token(type="NEWLINE", string="\n", start=(5, 17), end=(5, 18)),
Token(type="DEDENT", string="", start=(6, 0), end=(6, 0)),
Token(type="ENDMARKER", string="", start=(6, 0), end=(6, 0)),
],
)


# Run "echo some code | python tests/test_tokenize.py" to generate test cases.
if __name__ == "__main__":
code = sys.stdin.read()
Expand Down
Loading