Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,22 @@ foreach ($parser->parseFileStream($stream) as $query) {

Available parsers: `MySqlMultiQueryParser`, `PostgreSqlMultiQueryParser`, `SqlServerMultiQueryParser`, `SqliteMultiQueryParser`.

**Preserve leading comments:**

By default, comments preceding a query are stripped. Pass `preserveLeadingComments: true` to any parser to keep them as a prefix of the yielded query instead -- useful when comments carry meaningful annotations:

```php
$parser = new MySqlMultiQueryParser(preserveLeadingComments: true);

$sql = "-- create the users table\nCREATE TABLE users (id INT);";

foreach ($parser->parseString($sql) as $query) {
echo $query; // "-- create the users table\nCREATE TABLE users (id INT)"
}
```

All comment styles supported by the given dialect (`--`, `/* */`, and `#` for MySQL) that directly precede a query are preserved with their original formatting; only pure leading whitespace is stripped. A comment that sits between two queries is treated as preceding the following one. Comments not followed by any query (e.g. a trailing comment at the end of input) are dropped.

### License

MIT. See full [license](license.md).
21 changes: 21 additions & 0 deletions src/BaseMultiQueryParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@

abstract class BaseMultiQueryParser implements IMultiQueryParser
{
public function __construct(
private bool $preserveLeadingComments = false,
) {
}


/**
* @param positive-int $chunkSize
* @return Iterator<string>
Expand Down Expand Up @@ -55,6 +61,21 @@ public function parseString(string $s): Iterator
abstract public function parseStringStream(Iterator $stream): Iterator;


/**
* Builds the yielded query string, prepending captured leading comments when enabled.
*
* @param array<array-key, string> $match
*/
protected function buildQuery(array $match): string
{
if (!$this->preserveLeadingComments) {
return $match['query'];
}

return ($match['leadingComments'] ?? '') . $match['query'];
}


/**
* @param resource $fileStream
* @param positive-int $chunkSize
Expand Down
17 changes: 10 additions & 7 deletions src/MySqlMultiQueryParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public function parseStringStream(Iterator $stream): Iterator
$patternIterator->setPattern($this->getQueryPattern($match['delimiter']));

} elseif (isset($match['query']) && $match['query'] !== '') {
yield $match['query'];
yield $this->buildQuery($match);
}
}
}
Expand All @@ -30,12 +30,15 @@ private function getQueryPattern(string $delimiter): string

return /** @lang PhpRegExp */ "
~
(?:
\\s
| /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/
| --[^\\n]*+(?:\\n|\\z)
| \\#[^\\n]*+(?:\\n|\\z)
)*+
\\s*+
(?<leadingComments>
(?:
\\s
| /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/
| --[^\\n]*+(?:\\n|\\z)
| \\#[^\\n]*+(?:\\n|\\z)
)*+
)

(?:
(?i:
Expand Down
2 changes: 1 addition & 1 deletion src/PatternIterator.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
* the regex engine commits to the construct — if the closing delimiter is missing (because
* it is in a later chunk), the overall match fails, causing the iterator to load more data.
*
* @implements IteratorAggregate<int, array<mixed>>
* @implements IteratorAggregate<int, array<array-key, string>>
*/
class PatternIterator implements IteratorAggregate
{
Expand Down
15 changes: 9 additions & 6 deletions src/PostgreSqlMultiQueryParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public function parseStringStream(Iterator $stream): Iterator

foreach ($patternIterator as $match) {
if (isset($match['query']) && $match['query'] !== '') {
yield $match['query'];
yield $this->buildQuery($match);
}
}
}
Expand All @@ -29,11 +29,14 @@ private function getQueryPattern(): string
(?<nestedBc> /\\* (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/ )
)

(?:
\\s
| /\\* (*PRUNE) (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/
| -- [^\\n]*+
)*+
\\s*+
(?<leadingComments>
(?:
\\s
| /\\* (*PRUNE) (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/
| -- [^\\n]*+
)*+
)

(?:
(?:
Expand Down
15 changes: 9 additions & 6 deletions src/SqlServerMultiQueryParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public function parseStringStream(Iterator $stream): Iterator

foreach ($patternIterator as $match) {
if (isset($match['query']) && $match['query'] !== '') {
yield $match['query'];
yield $this->buildQuery($match);
}
}
}
Expand Down Expand Up @@ -45,11 +45,14 @@ private function getQueryPattern(): string
(?<nestedBc> /\\* (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/ )
)

(?:
\\s
| /\\* (*PRUNE) (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/
| -- [^\\n]*+
)*+
\\s*+
(?<leadingComments>
(?:
\\s
| /\\* (*PRUNE) (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/
| -- [^\\n]*+
)*+
)

(?:
(?:
Expand Down
5 changes: 3 additions & 2 deletions src/SqliteMultiQueryParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public function parseStringStream(Iterator $stream): Iterator

foreach ($patternIterator as $match) {
if (isset($match['query']) && $match['query'] !== '') {
yield $match['query'];
yield $this->buildQuery($match);
}
}
}
Expand Down Expand Up @@ -55,7 +55,8 @@ private function getQueryPattern(): string
)
)

(?&skip)
\s*+
(?<leadingComments> (?&skip) )

(?:
(?:
Expand Down
46 changes: 44 additions & 2 deletions tests/cases/MySqlMultiQueryParserTest.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ require_once __DIR__ . '/../inc/MultiQueryParserTestCase.php';

class MySqlMultiQueryParserTest extends MultiQueryParserTestCase
{
protected function createParser(): IMultiQueryParser
protected function createParser(bool $preserveLeadingComments = false): IMultiQueryParser
{
return new MySqlMultiQueryParser();
return new MySqlMultiQueryParser($preserveLeadingComments);
}


Expand Down Expand Up @@ -45,6 +45,48 @@ class MySqlMultiQueryParserTest extends MultiQueryParserTestCase
}


/**
* MySQL-specific leading-comment cases: # hash comments. The generic line- and
* block-comment cases are covered by the shared test in MultiQueryParserTestCase.
*
* @dataProvider providePreserveLeadingCommentsHashData
* @param list<string> $expectedQueries
*/
public function testPreserveLeadingCommentsHash(string $content, array $expectedQueries): void
{
$parser = $this->createParser(preserveLeadingComments: true);
$queries = iterator_to_array($parser->parseString($content));
Assert::same($expectedQueries, $queries);
}


/**
* @return list<array{string, list<string>}>
*/
protected function providePreserveLeadingCommentsHashData(): array
{
return [
// # hash comments are preserved as a prefix
[
"# hash note\nSELECT 1;",
["# hash note\nSELECT 1"],
],
// All three comment styles mixed, with original formatting preserved
[
"-- a\n# b\n/* c */\nSELECT 1;",
["-- a\n# b\n/* c */\nSELECT 1"],
],
// A hash comment between two queries attaches to the following query
[
"SELECT 1; # between\nSELECT 2;",
["SELECT 1", "# between\nSELECT 2"],
],
// Hash-comment-only input yields nothing
["# only a comment", []],
];
}


/**
* @return list<array{string, list<string>}>
*/
Expand Down
4 changes: 2 additions & 2 deletions tests/cases/PostgreSqlMultiQueryParserTest.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ require_once __DIR__ . '/../inc/MultiQueryParserTestCase.php';

class PostgreSqlMultiQueryParserTest extends MultiQueryParserTestCase
{
protected function createParser(): IMultiQueryParser
protected function createParser(bool $preserveLeadingComments = false): IMultiQueryParser
{
return new PostgreSqlMultiQueryParser();
return new PostgreSqlMultiQueryParser($preserveLeadingComments);
}


Expand Down
4 changes: 2 additions & 2 deletions tests/cases/SqlServerMultiQueryParserTest.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ require_once __DIR__ . '/../inc/MultiQueryParserTestCase.php';

class SqlServerMultiQueryParserTest extends MultiQueryParserTestCase
{
protected function createParser(): IMultiQueryParser
protected function createParser(bool $preserveLeadingComments = false): IMultiQueryParser
{
return new SqlServerMultiQueryParser();
return new SqlServerMultiQueryParser($preserveLeadingComments);
}


Expand Down
4 changes: 2 additions & 2 deletions tests/cases/SqliteMultiQueryParserTest.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ require_once __DIR__ . '/../inc/MultiQueryParserTestCase.php';

class SqliteMultiQueryParserTest extends MultiQueryParserTestCase
{
protected function createParser(): IMultiQueryParser
protected function createParser(bool $preserveLeadingComments = false): IMultiQueryParser
{
return new SqliteMultiQueryParser();
return new SqliteMultiQueryParser($preserveLeadingComments);
}


Expand Down
107 changes: 106 additions & 1 deletion tests/inc/MultiQueryParserTestCase.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

abstract class MultiQueryParserTestCase extends TestCase
{
abstract protected function createParser(): IMultiQueryParser;
abstract protected function createParser(bool $preserveLeadingComments = false): IMultiQueryParser;


/**
Expand Down Expand Up @@ -80,6 +80,111 @@ public function testChunkBoundary(array $chunks, array $expectedQueries): void
}


/**
* Dialect-agnostic leading-comment cases (line and block comments), shared by every
* parser. Dialect-specific comment styles are tested in the subclasses.
*
* @dataProvider provideCommonPreserveLeadingCommentsData
* @param list<string> $expectedQueries
*/
public function testPreserveLeadingComments(string $content, array $expectedQueries): void
{
$parser = $this->createParser(preserveLeadingComments: true);
$queries = iterator_to_array($parser->parseString($content));
Assert::same($expectedQueries, $queries);
}


/**
* The restructured leading-comment pattern must keep streaming chunk-safe:
* every two-chunk split of the input must reproduce the whole-string result.
*/
public function testPreserveLeadingCommentsChunkBoundary(): void
{
$parser = $this->createParser(preserveLeadingComments: true);
$content = implode("\n", [
'-- header comment',
'-- second line',
'SELECT 1;',
'',
'SELECT 2;',
'/* block ; with semi */',
'SELECT 3;',
'SELECT 4; -- trailing',
'-- leading before 5',
'SELECT 5;',
]);

$expected = iterator_to_array($parser->parseString($content));
$len = strlen($content);

for ($i = 0; $i <= $len; $i++) {
$chunks = [substr($content, 0, $i), substr($content, $i)];
$queries = iterator_to_array($parser->parseStringStream(new \ArrayIterator($chunks)));
Assert::same($expected, $queries, "Failed with chunk boundary at offset $i");
}
}


/**
* @return list<array{string, list<string>}>
*/
protected function provideCommonPreserveLeadingCommentsData(): array
{
return [
// A single -- comment kept as a prefix of the following query
[
"-- create the users table\nCREATE TABLE users (id INT);",
["-- create the users table\nCREATE TABLE users (id INT)"],
],
// Multiple consecutive -- comment lines
[
"-- line 1\n-- line 2\nSELECT 1;",
["-- line 1\n-- line 2\nSELECT 1"],
],
// Each query keeps only its own leading comment
[
"-- first\nSELECT 1;\n-- second\nSELECT 2;",
["-- first\nSELECT 1", "-- second\nSELECT 2"],
],
// A comment between two queries attaches to the following query
[
"SELECT 1; -- between\nSELECT 2;",
["SELECT 1", "-- between\nSELECT 2"],
],
// /* */ block comments are preserved too
[
"/* block */ SELECT 1;",
["/* block */ SELECT 1"],
],
// Mixed comment types preserve their original formatting
[
"-- a\n/* b */\nSELECT 1;",
["-- a\n/* b */\nSELECT 1"],
],
// Pure leading whitespace / blank lines before the comment are stripped
[
"\n\n-- spaced\n\nSELECT 1;",
["-- spaced\n\nSELECT 1"],
],
// Comment-only input yields nothing (no query to attach to)
["-- only a comment", []],
["-- line 1\n-- line 2\n", []],
["/* only a block */", []],
// A trailing comment after the last query (no following query) is dropped
[
"SELECT 1;\n-- trailing",
["SELECT 1"],
],
// Pure whitespace produces no leading prefix
[
"\n\nSELECT 1;\n\n",
["SELECT 1"],
],
];
}


public function testFile(): void
{
$parser = $this->createParser();
Expand Down
Loading