@@ -63,37 +63,38 @@ class Lexer
6363 {
6464
6565 /**
66- * A list of methods that are used in lexing the SQL query.
67- *
68- * @var array
69- */
66+ * A list of methods that are used in lexing the SQL query.
67+ *
68+ * @var array
69+ */
7070 public static $ PARSER_METHODS = array (
7171
72- // It is best to put the parsers in order of their complexity
73- // (ascending) and their occurrence rate (descending).
74- //
75- // Conflicts:
76- //
77- // 1. `parseDelimiter` and `parseUnknown`, `parseKeyword`, `parseNumber`
78- // They fight over delimiter. The delimiter may be a keyword, a number
79- // or almost any character which makes the delimiter one of the first
80- // tokens that must be parsed.
81- //
82- // 1. `parseNumber` and `parseOperator`
83- // They fight over `+` and `-`.
84- //
85- // 2. `parseComment` and `parseOperator`
86- // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
87- //
88- // 3. `parseBool` and `parseKeyword`
89- // They fight over `TRUE` and `FALSE`.
90- //
91- // 4. `parseKeyword` and `parseUnknown`
92- // They fight over words. `parseUnknown` does not know about keywords.
93-
94- 'parseDelimiter ' , 'parseWhitespace ' , 'parseNumber ' , 'parseComment ' ,
95- 'parseOperator ' , 'parseBool ' , 'parseString ' , 'parseSymbol ' ,
96- 'parseKeyword ' , 'parseUnknown '
72+ // It is best to put the parsers in order of their complexity
73+ // (ascending) and their occurrence rate (descending).
74+ //
75+ // Conflicts:
76+ //
77+ // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
78+ // They fight over delimiter. The delimiter may be a keyword, a
79+ // number or almost any character which makes the delimiter one of
80+ // the first tokens that must be parsed.
81+ //
82+ // 1. `parseNumber` and `parseOperator`
83+ // They fight over `+` and `-`.
84+ //
85+ // 2. `parseComment` and `parseOperator`
86+ // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
87+ //
88+ // 3. `parseBool` and `parseKeyword`
89+ // They fight over `TRUE` and `FALSE`.
90+ //
91+ // 4. `parseKeyword` and `parseUnknown`
92+ // They fight over words. `parseUnknown` does not know about
93+ // keywords.
94+
95+ 'parseDelimiter ' , 'parseWhitespace ' , 'parseNumber ' ,
96+ 'parseComment ' , 'parseOperator ' , 'parseBool ' , 'parseString ' ,
97+ 'parseSymbol ' , 'parseKeyword ' , 'parseUnknown '
9798 );
9899
99100 /**
@@ -149,7 +150,7 @@ class Lexer
149150 *
150151 * @var string
151152 */
152- public $ delimiter = ' ; ' ;
153+ public $ delimiter ;
153154
154155 /**
155156 * The length of the delimiter.
@@ -160,7 +161,7 @@ class Lexer
160161 *
161162 * @var int
162163 */
163- public $ delimiterLen = 1 ;
164+ public $ delimiterLen ;
164165
165166 /**
166167 * List of errors that occurred during lexing.
@@ -178,10 +179,12 @@ class Lexer
178179 /**
179180 * Constructor.
180181 *
181- * @param string|UtfString $str The query to be lexed.
182- * @param bool $strict Whether strict mode should be enabled or not.
182+ * @param string|UtfString $str The query to be lexed.
183+ * @param bool $strict Whether strict mode should be
184+ * enabled or not.
185+ * @param string $delimiter The delimiter to be used.
183186 */
184- public function __construct ($ str , $ strict = false )
187+ public function __construct ($ str , $ strict = false , $ delimiter = null )
185188 {
186189 // `strlen` is used instead of `mb_strlen` because the lexer needs to
187190 // parse each byte of the input.
@@ -201,11 +204,24 @@ public function __construct($str, $strict = false)
201204 $ this ->strict = $ strict ;
202205
203206 // Setting the delimiter.
204- $ this ->delimiter = static ::$ DEFAULT_DELIMITER ;
207+ $ this ->setDelimiter (
208+ !empty ($ delimiter ) ? $ delimiter : static ::$ DEFAULT_DELIMITER
209+ );
205210
206211 $ this ->lex ();
207212 }
208213
214+ /**
215+ * Sets the delimiter.
216+ *
217+ * @param string $delimiter The new delimiter.
218+ */
219+ public function setDelimiter ($ delimiter )
220+ {
221+ $ this ->delimiter = $ delimiter ;
222+ $ this ->delimiterLen = strlen ($ delimiter );
223+ }
224+
209225 /**
210226 * Parses the string and extracts lexemes.
211227 *
@@ -521,9 +537,9 @@ public function parseComment()
521537 while ((++$ this ->last < $ this ->len ) && ($ this ->str [$ this ->last ] !== "\n" )) {
522538 $ token .= $ this ->str [$ this ->last ];
523539 }
524- if ( $ this -> last < $ this -> len ) {
525- $ token .= $ this -> str [ $ this -> last ];
526- }
540+
541+ // Adding the line ending.
542+ $ token .= "\n" ;
527543 }
528544 return new Token ($ token , Token::TYPE_COMMENT , Token::FLAG_COMMENT_SQL );
529545 }
0 commit comments