Skip to content

Commit 25df449

Browse files
committed
Lexer parses bit values.
1 parent 7861701 commit 25df449

4 files changed

Lines changed: 53 additions & 21 deletions

File tree

src/Lexer.php

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -559,26 +559,33 @@ public function parseNumber()
559559
// Below are the states of the machines and the conditions to change
560560
// the state.
561561
//
562-
// 1 ---------------------[ + or - ]---------------------> 1
563-
// 1 --------------------[ 0x or 0X ]--------------------> 2
564-
// 1 ---------------------[ 0 to 9 ]---------------------> 3
565-
// 1 ------------------------[ . ]-----------------------> 4
562+
// 1 --------------------[ + or - ]-------------------> 1
563+
// 1 -------------------[ 0x or 0X ]------------------> 2
564+
// 1 --------------------[ 0 to 9 ]-------------------> 3
565+
// 1 -----------------------[ . ]---------------------> 4
566+
// 1 -----------------------[ b ]---------------------> 7
566567
//
567-
// 2 ---------------------[ 0 to F ]---------------------> 2
568+
// 2 --------------------[ 0 to F ]-------------------> 2
568569
//
569-
// 3 ---------------------[ 0 to 9 ]---------------------> 3
570-
// 3 ------------------------[ . ]-----------------------> 4
571-
// 3 ---------------------[ e or E ]---------------------> 5
570+
// 3 --------------------[ 0 to 9 ]-------------------> 3
571+
// 3 -----------------------[ . ]---------------------> 4
572+
// 3 --------------------[ e or E ]-------------------> 5
572573
//
573-
// 4 ---------------------[ 0 to 9 ]---------------------> 4
574-
// 4 ---------------------[ e or E ]---------------------> 5
574+
// 4 --------------------[ 0 to 9 ]-------------------> 4
575+
// 4 --------------------[ e or E ]-------------------> 5
575576
//
576-
// 5 ----------------[ + or - or 0 to 9 ]----------------> 6
577+
// 5 ---------------[ + or - or 0 to 9 ]--------------> 6
578+
//
579+
// 7 -----------------------[ ' ]---------------------> 8
580+
//
581+
// 8 --------------------[ 0 or 1 ]-------------------> 8
582+
// 8 -----------------------[ ' ]---------------------> 9
577583
//
578584
// State 1 may be reached by negative numbers.
579585
// State 2 is reached only by hex numbers.
580586
// State 4 is reached only by float numbers.
581587
// State 5 is reached only by numbers in approximate form.
588+
// State 7 is reached only by numbers in bit representation.
582589
//
583590
// Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
584591
// state other than these is invalid.
@@ -590,15 +597,19 @@ public function parseNumber()
590597
if ($state === 1) {
591598
if ($this->str[$this->last] === '-') {
592599
$flags |= Token::FLAG_NUMBER_NEGATIVE;
593-
} elseif (($this->str[$this->last] === '0') && ($this->last + 1 < $this->len)
594-
&& (($this->str[$this->last + 1] === 'x') || ($this->str[$this->last + 1] === 'X'))
600+
} elseif (($this->last + 1 < $this->len)
601+
&& ($this->str[$this->last] === '0')
602+
&& (($this->str[$this->last + 1] === 'x')
603+
|| ($this->str[$this->last + 1] === 'X'))
595604
) {
596605
$token .= $this->str[$this->last++];
597606
$state = 2;
598607
} elseif (($this->str[$this->last] >= '0') && ($this->str[$this->last] <= '9')) {
599608
$state = 3;
600609
} elseif ($this->str[$this->last] === '.') {
601610
$state = 4;
611+
} elseif ($this->str[$this->last] === 'b') {
612+
$state = 7;
602613
} elseif ($this->str[$this->last] !== '+') {
603614
// `+` is a valid character in a number.
604615
break;
@@ -642,10 +653,30 @@ public function parseNumber()
642653
// Just digits are valid characters.
643654
break;
644655
}
656+
} elseif ($state === 7) {
657+
$flags |= Token::FLAG_NUMBER_BINARY;
658+
if ($this->str[$this->last] === '\'') {
659+
$state = 8;
660+
} else {
661+
break;
662+
}
663+
} elseif ($state === 8) {
664+
if ($this->str[$this->last] === '\'') {
665+
$state = 9;
666+
} elseif (($this->str[$this->last] !== '0')
667+
&& ($this->str[$this->last] !== '1')
668+
) {
669+
break;
670+
}
671+
} elseif ($state === 9) {
672+
break;
645673
}
646674
$token .= $this->str[$this->last];
647675
}
648-
if (($state === 2) || ($state === 3) || (($token !== '.') && ($state === 4)) || ($state === 6)) {
676+
if (($state === 2) || ($state === 3)
677+
|| (($token !== '.') && ($state === 4))
678+
|| ($state === 6) || ($state === 9)
679+
) {
649680
--$this->last;
650681
return new Token($token, Token::TYPE_NUMBER, $flags);
651682
}

src/Token.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ class Token
134134
const FLAG_NUMBER_FLOAT = 2;
135135
const FLAG_NUMBER_APPROXIMATE = 4;
136136
const FLAG_NUMBER_NEGATIVE = 8;
137+
const FLAG_NUMBER_BINARY = 16;
137138

138139
// Strings related flags.
139140
const FLAG_STRING_SINGLE_QUOTES = 1;

tests/data/lexer/lexNumber.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, 0XFfA, 1e-10, 1e10, .5e10;
1+
SELECT 12, 34, 5.67, 0x89, -10, --11, +12, .15, 0xFFa, 0xfFA, 0XFfA, 1e-10, 1e10, .5e10, b'10';
22
-- invalid number
3-
SELECT 12ex10;
3+
SELECT 12ex10, b'15';

0 commit comments

Comments
 (0)