Skip to content

Commit d99e99a

Browse files
committed
tokenizer
1 parent 7963a3b commit d99e99a

1 file changed

Lines changed: 21 additions & 4 deletions

File tree

cstruct/c_parser.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
import re
2626
from collections import OrderedDict
27-
from typing import Union, Optional, Any, Dict, Type, TYPE_CHECKING
27+
from typing import Union, Optional, Any, Dict, List, Type, TYPE_CHECKING
2828
from .base import DEFINES, ENUMS, TYPEDEFS, STRUCTS
2929
from .field import calculate_padding, Kind, FieldType
3030
from .c_expr import c_eval
@@ -36,6 +36,8 @@
3636

3737
__all__ = ['parse_struct', 'parse_struct_def', 'parse_enum_def', 'Tokens']
3838

39+
SEPARATORS = [" ", "\t", "\n", ";", "{", "}", ":", ",", "="]
40+
SPACES = [" ", "\t", "\n"]
3941

4042
class Tokens(object):
4143
def __init__(self, text: str) -> None:
@@ -52,9 +54,24 @@ def __init__(self, text: str) -> None:
5254
raise ParserError(f"Parsing line {line}")
5355
else:
5456
lines.append(line)
55-
text = " ".join(lines)
56-
text = text.replace(";", " ; ").replace("{", " { ").replace("}", " } ").replace(",", " , ").replace("=", " = ")
57-
self.tokens = text.split()
57+
text = "\n".join(lines)
58+
self.tokens = self.tokenize(text)
59+
60+
def tokenize(self, text) -> List[str]:
61+
tokens: List[str] = []
62+
t: List[str] = []
63+
for c in text:
64+
if c in SEPARATORS:
65+
if t:
66+
tokens.append("".join(t))
67+
t.clear()
68+
if c not in SPACES:
69+
tokens.append(c)
70+
else:
71+
t.append(c)
72+
if t:
73+
tokens.append(t.getvalue())
74+
return tokens
5875

5976
def pop(self) -> str:
6077
return self.tokens.pop(0)

0 commit comments

Comments
 (0)