-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPatternIterator.php
More file actions
97 lines (79 loc) · 2.73 KB
/
PatternIterator.php
File metadata and controls
97 lines (79 loc) · 2.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
<?php declare(strict_types = 1);
namespace Nextras\MultiQueryParser;
use Iterator;
use IteratorAggregate;
use Nextras\MultiQueryParser\Exception\RuntimeException;
use function preg_last_error_msg;
use function preg_match;
use function strlen;
use function substr;
/**
* Applies a regex pattern to a chunked string stream, yielding matches sequentially.
*
* Safety mechanism: when a match consumes all remaining data in the buffer and the stream
* has more chunks, the match is held back (not yielded) until more data is loaded. This
* prevents yielding incomplete matches at chunk boundaries.
*
* Pattern design constraint: patterns with opening/closing delimiter constructs (such as
* string literals `'...'`, block comments `/*...* /`, or dollar-quoted strings `$$...$$`)
* must include `(*PRUNE)` after the opening delimiter, e.g. `' (*PRUNE) [^']* '`.
* Without this, when a chunk boundary falls inside such a construct, the closing delimiter
* is absent from the buffer, the construct fails to match, and the regex falls back to a
* generic single-character alternative (e.g. `(?!;) .`). This exposes characters inside the
* construct (like semicolons inside a string) as false delimiters, producing an incorrect
* match that terminates in the middle of the buffer — where the safety mechanism cannot
* detect the problem. The `(*PRUNE)` verb ensures that once the opening delimiter matches,
* the regex engine commits to the construct — if the closing delimiter is missing (because
* it is in a later chunk), the overall match fails, causing the iterator to load more data.
*
* @implements IteratorAggregate<int, array<mixed>>
*/
class PatternIterator implements IteratorAggregate
{
/**
* @param Iterator<string> $stream
*/
public function __construct(
private Iterator $stream,
private string $pattern,
) {
}
public function getPattern(): string
{
return $this->pattern;
}
public function setPattern(string $pattern): void
{
$this->pattern = $pattern;
}
public function getIterator(): Iterator
{
$s = '';
$offset = 0;
while ($this->stream->valid()) {
$s = substr($s, $offset) . $this->stream->current();
$this->stream->next();
$offset = 0;
while (true) {
$result = preg_match($this->pattern, $s, $matches, 0, $offset);
if ($result === false) {
throw new RuntimeException(preg_last_error_msg());
}
if ($result !== 1) {
break;
}
if (strlen($matches[0]) + $offset === strlen($s) && $this->stream->valid()) {
break;
}
if (strlen($matches[0]) === 0) {
break;
}
yield $matches;
$offset += strlen($matches[0]);
}
}
if ($offset !== strlen($s)) {
throw new RuntimeException("Failed to parse stream, please report an issue.");
}
}
}