Skip to content

Commit acdbb18

Browse files
committed
Fix using escape chars
The lexer did have too many different patterns which caused various issues in the interpretation of escape chars. With this patch we are better compatible with sphinx. The results are validated with sphinx 7. The lexer now has a separate BACKSLASH char, which allows parsers and rules to handle them as they need. by skipping the `\` in most rules we do never have conflicts and rules are not interpreting the char after the `\`.
1 parent 18c8776 commit acdbb18

18 files changed

Lines changed: 90 additions & 64 deletions

File tree

packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ final class InlineLexer extends AbstractLexer
5353
public const STRONG_DELIMITER = 22;
5454
public const NBSP = 23;
5555
public const VARIABLE_DELIMITER = 24;
56-
public const ESCAPED_SIGN = 25;
56+
public const BACKSLASH = 25;
5757

5858
/**
5959
* Map between string position and position in token list.
@@ -68,8 +68,7 @@ final class InlineLexer extends AbstractLexer
6868
protected function getCatchablePatterns(): array
6969
{
7070
return [
71-
'\\\\``', // must be a separate case, as the next pattern would split in "\`" + "`", causing it to become a intepreted text
72-
'\\\\[\s\S]', // Escaping hell... needs escaped slash in regex, but also in php.
71+
'\\\\', // Escaping hell... needs escaped slash in regex, but also in php.
7372
'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}',
7473
'(?<=^|\s)[a-z0-9-]+_{2}', //Inline href.
7574
'(?<=^|\s)[a-z0-9-]+_{1}(?=[\s\.+]|$)', //Inline href.
@@ -133,7 +132,7 @@ protected function getType(string &$value)
133132
'[' => self::ANNOTATION_START,
134133
']' => self::ANNOTATION_END,
135134
'~' => self::NBSP,
136-
'\\``' => self::ESCAPED_SIGN,
135+
'\\' => self::BACKSLASH,
137136
default => null,
138137
};
139138

@@ -150,10 +149,6 @@ protected function getType(string &$value)
150149
return self::NAMED_REFERENCE;
151150
}
152151

153-
if (strlen($value) === 2 && $value[0] === '\\') {
154-
return self::ESCAPED_SIGN;
155-
}
156-
157152
if (strlen($value) === 1 && ctype_space($value)) {
158153
return self::WHITESPACE;
159154
}

packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/DefaultTextRoleRule.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): InlineNod
4949
->getDefaultTextRole()
5050
->processNode($blockContext->getDocumentParserContext(), '', $text, $text);
5151

52+
case InlineLexer::BACKSLASH:
53+
$lexer->moveNext();
54+
55+
$text .= $lexer->token->value;
56+
57+
break;
5258
default:
5359
$text .= $token->value;
5460
}

packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/EmphasisRule.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): InlineNod
4848

4949
return new EmphasisInlineNode([new PlainTextInlineNode($text)]);
5050

51+
case InlineLexer::BACKSLASH:
52+
$lexer->moveNext();
53+
$text .= $lexer->token->value;
54+
55+
break;
5156
default:
5257
$text .= $token->value;
5358
}

packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/EscapeRule.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;
1919

2020
use function preg_match;
21-
use function substr;
2221

2322
/**
2423
* Rule to escape characters with a backslash
@@ -27,13 +26,14 @@ final class EscapeRule extends ReferenceRule
2726
{
2827
public function applies(InlineLexer $lexer): bool
2928
{
30-
return $lexer->token?->type === InlineLexer::ESCAPED_SIGN;
29+
return $lexer->token?->type === InlineLexer::BACKSLASH;
3130
}
3231

3332
public function apply(BlockContext $blockContext, InlineLexer $lexer): PlainTextInlineNode|null
3433
{
34+
$lexer->moveNext();
35+
3536
$char = $lexer->token?->value ?? '';
36-
$char = substr($char, 1);
3737
$lexer->moveNext();
3838

3939
if (preg_match('/^\s$/', $char)) {

packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/TextRoleRule.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
use phpDocumentor\Guides\RestructuredText\Parser\BlockContext;
1818
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;
1919

20-
use function substr;
21-
2220
/**
2321
* Rule to parse for text roles such as ``:ref:`something` `
2422
*/
@@ -77,9 +75,11 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): InlineNod
7775
$rawPart .= $token->value;
7876

7977
break;
80-
case InlineLexer::ESCAPED_SIGN:
81-
$part .= substr($token->value, 1);
78+
case InlineLexer::BACKSLASH:
8279
$rawPart .= $token->value;
80+
$lexer->moveNext();
81+
$part .= $lexer->token->value;
82+
$rawPart .= $lexer->token->value;
8383

8484
break;
8585
default:

packages/guides-restructured-text/src/RestructuredText/TextRoles/LiteralTextRole.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,6 @@ public function processNode(
4343
string $content,
4444
string $rawContent,
4545
): GenericTextRoleInlineNode {
46-
return new GenericTextRoleInlineNode('literal', $rawContent, $this->getClass());
46+
return new GenericTextRoleInlineNode('literal', $content, $this->getClass());
4747
}
4848
}

packages/guides-restructured-text/tests/unit/Parser/InlineLexerTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ public static function inlineLexerProvider(): array
8080
],
8181
'Escaped double backtick' => [
8282
83-
[InlineLexer::ESCAPED_SIGN],
83+
[InlineLexer::BACKSLASH],
8484
],
8585
];
8686
}

packages/guides-restructured-text/tests/unit/Parser/Productions/InlineRules/TextRoleRuleTest.php

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,27 @@ public static function roleFormatProvider(): Generator
5151
'con`tent',
5252
'con\`tent',
5353
];
54+
55+
yield 'role with escaped backslash' => [
56+
':role:`a\\\\b`',
57+
'role',
58+
'a\\b',
59+
'a\\\\b',
60+
];
61+
62+
yield 'role with escaped backtick at end' => [
63+
':role:`text\``',
64+
'role',
65+
'text`',
66+
'text\`',
67+
];
68+
69+
yield 'role with only escaped backtick' => [
70+
':role:`\``',
71+
'role',
72+
'`',
73+
'\`',
74+
];
5475
}
5576

5677
#[DataProvider('roleFormatProvider')]
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
<p><code>Backslashes are not escaped \\ \` \&lt; in literals</code></p>
2-
<p><code>Backslashes are not escaped \\ \` \&lt; in default text roles</code></p>
3-
<p><code>Backslashes are not escaped \\ \` \&lt; in code</code></p>
2+
<p><code>Backslashes are not escaped \ ` &lt; in default text roles</code></p>
3+
<p><code>Backslashes are not escaped \ ` &lt; in code</code></p>
4+
<p><code>Backslashes are not escaped \ ` &lt; in code</code></p>

tests/Functional/tests/code-textrole-no-escape/code-textrole-no-escape.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@
33
`Backslashes are not escaped \\ \` \< in default text roles`
44

55
:code:`Backslashes are not escaped \\ \` \< in code`
6+
7+
:literal:`Backslashes are not escaped \\ \` \< in code`

0 commit comments

Comments
 (0)