Skip to content

Commit 9130cca

Browse files
committed
Added utility to get first full statement from a buffer.
Misc coding style fixes.
1 parent e6a562e commit 9130cca

2 files changed

Lines changed: 117 additions & 38 deletions

File tree

src/Lexer.php

Lines changed: 54 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -63,37 +63,38 @@ class Lexer
6363
{
6464

6565
/**
66-
* A list of methods that are used in lexing the SQL query.
67-
*
68-
* @var array
69-
*/
66+
* A list of methods that are used in lexing the SQL query.
67+
*
68+
* @var array
69+
*/
7070
public static $PARSER_METHODS = array(
7171

72-
// It is best to put the parsers in order of their complexity
73-
// (ascending) and their occurrence rate (descending).
74-
//
75-
// Conflicts:
76-
//
77-
// 1. `parseDelimiter` and `parseUnknown`, `parseKeyword`, `parseNumber`
78-
// They fight over delimiter. The delimiter may be a keyword, a number
79-
// or almost any character which makes the delimiter one of the first
80-
// tokens that must be parsed.
81-
//
82-
// 1. `parseNumber` and `parseOperator`
83-
// They fight over `+` and `-`.
84-
//
85-
// 2. `parseComment` and `parseOperator`
86-
// They fight over `/` (as in ```/*comment*/``` or ```a / b```)
87-
//
88-
// 3. `parseBool` and `parseKeyword`
89-
// They fight over `TRUE` and `FALSE`.
90-
//
91-
// 4. `parseKeyword` and `parseUnknown`
92-
// They fight over words. `parseUnknown` does not know about keywords.
93-
94-
'parseDelimiter', 'parseWhitespace', 'parseNumber', 'parseComment',
95-
'parseOperator', 'parseBool', 'parseString', 'parseSymbol',
96-
'parseKeyword', 'parseUnknown'
72+
// It is best to put the parsers in order of their complexity
73+
// (ascending) and their occurrence rate (descending).
74+
//
75+
// Conflicts:
76+
//
77+
// 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
78+
// They fight over delimiter. The delimiter may be a keyword, a
79+
// number or almost any character which makes the delimiter one of
80+
// the first tokens that must be parsed.
81+
//
82+
// 1. `parseNumber` and `parseOperator`
83+
// They fight over `+` and `-`.
84+
//
85+
// 2. `parseComment` and `parseOperator`
86+
// They fight over `/` (as in ```/*comment*/``` or ```a / b```)
87+
//
88+
// 3. `parseBool` and `parseKeyword`
89+
// They fight over `TRUE` and `FALSE`.
90+
//
91+
// 4. `parseKeyword` and `parseUnknown`
92+
// They fight over words. `parseUnknown` does not know about
93+
// keywords.
94+
95+
'parseDelimiter', 'parseWhitespace', 'parseNumber',
96+
'parseComment', 'parseOperator', 'parseBool', 'parseString',
97+
'parseSymbol', 'parseKeyword', 'parseUnknown'
9798
);
9899

99100
/**
@@ -149,7 +150,7 @@ class Lexer
149150
*
150151
* @var string
151152
*/
152-
public $delimiter = ';';
153+
public $delimiter;
153154

154155
/**
155156
* The length of the delimiter.
@@ -160,7 +161,7 @@ class Lexer
160161
*
161162
* @var int
162163
*/
163-
public $delimiterLen = 1;
164+
public $delimiterLen;
164165

165166
/**
166167
* List of errors that occurred during lexing.
@@ -178,10 +179,12 @@ class Lexer
178179
/**
179180
* Constructor.
180181
*
181-
* @param string|UtfString $str The query to be lexed.
182-
* @param bool $strict Whether strict mode should be enabled or not.
182+
* @param string|UtfString $str The query to be lexed.
183+
* @param bool $strict Whether strict mode should be
184+
* enabled or not.
185+
* @param string $delimiter The delimiter to be used.
183186
*/
184-
public function __construct($str, $strict = false)
187+
public function __construct($str, $strict = false, $delimiter = null)
185188
{
186189
// `strlen` is used instead of `mb_strlen` because the lexer needs to
187190
// parse each byte of the input.
@@ -201,11 +204,24 @@ public function __construct($str, $strict = false)
201204
$this->strict = $strict;
202205

203206
// Setting the delimiter.
204-
$this->delimiter = static::$DEFAULT_DELIMITER;
207+
$this->setDelimiter(
208+
!empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER
209+
);
205210

206211
$this->lex();
207212
}
208213

214+
/**
215+
* Sets the delimiter.
216+
*
217+
* @param string $delimiter The new delimiter.
218+
*/
219+
public function setDelimiter($delimiter)
220+
{
221+
$this->delimiter = $delimiter;
222+
$this->delimiterLen = strlen($delimiter);
223+
}
224+
209225
/**
210226
* Parses the string and extracts lexemes.
211227
*
@@ -521,9 +537,9 @@ public function parseComment()
521537
while ((++$this->last < $this->len) && ($this->str[$this->last] !== "\n")) {
522538
$token .= $this->str[$this->last];
523539
}
524-
if ($this->last < $this->len) {
525-
$token .= $this->str[$this->last];
526-
}
540+
541+
// Adding the line ending.
542+
$token .= "\n";
527543
}
528544
return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
529545
}

src/Utils/Query.php

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,10 @@ public static function getClause($statement, $list, $clause, $type = 0, $skipFir
587587
for ($i = $statement->first; $i <= $statement->last; ++$i) {
588588
$token = $list->tokens[$i];
589589

590+
if ($token->type === Token::TYPE_COMMENT) {
591+
continue;
592+
}
593+
590594
if ($token->type === Token::TYPE_OPERATOR) {
591595
if ($token->value === '(') {
592596
++$brackets;
@@ -708,4 +712,63 @@ public static function replaceClauses($statement, $list, array $ops)
708712

709713
return $ret;
710714
}
715+
716+
/**
717+
* Gets the first full statement in the query.
718+
*
719+
* @param string $query The query to be analyzed.
720+
* @param string $delimiter The delimiter to be used.
721+
*
722+
* @return array Array containing the first full query, the
723+
* remaining part of the query and the last
724+
* delimiter.
725+
*/
726+
public static function getFirstStatement($query, $delimiter = null)
727+
{
728+
$lexer = new Lexer($query, false, $delimiter);
729+
$list = $lexer->list;
730+
731+
/**
732+
* Whether a full statement was found.
733+
* @var bool
734+
*/
735+
$fullStatement = false;
736+
737+
/**
738+
* The first full statement.
739+
* @var string
740+
*/
741+
$statement = '';
742+
743+
for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) {
744+
$token = $list->tokens[$list->idx];
745+
746+
if ($token->type === Token::TYPE_COMMENT) {
747+
continue;
748+
}
749+
750+
$statement .= $token->token;
751+
752+
if (($token->type === Token::TYPE_DELIMITER) && (!empty($token->value))) {
753+
$delimiter = $token->value;
754+
$fullStatement = true;
755+
break;
756+
}
757+
}
758+
759+
// No statement was found so we return the entire query as being the
760+
// remaining part.
761+
if (!$fullStatement) {
762+
return array(null, $query, $delimiter);
763+
}
764+
765+
// At least one query was found so we have to build the rest of the
766+
// remaining query.
767+
$query = '';
768+
for (++$list->idx; $list->idx < $list->count; ++$list->idx) {
769+
$query .= $list->tokens[$list->idx]->value;
770+
}
771+
772+
return array(trim($statement), $query, $delimiter);
773+
}
711774
}

0 commit comments

Comments
 (0)