Skip to content

Commit 0c25d8c

Browse files
walacglemco
authored andcommitted
rv/rvgen: refactor automata.py to use iterator-based parsing
Refactor the DOT file parsing logic in automata.py to use Python's iterator-based patterns instead of manual cursor indexing. The previous implementation relied on while loops with explicit cursor management, which made the code prone to off-by-one errors and would crash on malformed input files containing empty lines. The new implementation uses enumerate and itertools.islice to iterate over lines, eliminating manual cursor tracking. Functions that search for specific markers now use for loops with early returns and explicit AutomataError exceptions for missing markers, rather than assuming the markers exist. Additional bounds checking ensures that split line arrays have sufficient elements before accessing specific indices, preventing IndexError exceptions on malformed DOT files. The matrix creation and event variable extraction methods now use functional patterns with map combined with itertools.islice, making the intent clearer while maintaining the same behavior. Minor improvements include using extend instead of append in a loop, adding empty file validation, and replacing enumerate with range where the enumerated value was unused. Signed-off-by: Wander Lairson Costa <wander@redhat.com> Reviewed-by: Gabriele Monaco <gmonaco@redhat.com> Link: https://lore.kernel.org/r/20260223162407.147003-12-wander@redhat.com Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
1 parent d474fed commit 0c25d8c

1 file changed

Lines changed: 71 additions & 45 deletions

File tree

tools/verification/rvgen/rvgen/automata.py

Lines changed: 71 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import ntpath
1212
import re
1313
from typing import Iterator
14+
from itertools import islice
1415

1516
class _ConstraintKey:
1617
"""Base class for constraint keys."""
@@ -89,37 +90,54 @@ def __get_model_name(self) -> str:
8990
return model_name
9091

9192
def __open_dot(self) -> list[str]:
92-
cursor = 0
9393
dot_lines = []
9494
try:
9595
with open(self.__dot_path) as dot_file:
96-
dot_lines = dot_file.read().splitlines()
96+
dot_lines = dot_file.readlines()
9797
except OSError as exc:
9898
raise AutomataError(exc.strerror) from exc
9999

100+
if not dot_lines:
101+
raise AutomataError(f"{self.__dot_path} is empty")
102+
100103
# checking the first line:
101-
line = dot_lines[cursor].split()
104+
line = dot_lines[0].split()
102105

103-
if (line[0] != "digraph") or (line[1] != "state_automaton"):
106+
if len(line) < 2 or line[0] != "digraph" or line[1] != "state_automaton":
104107
raise AutomataError(f"Not a valid .dot format: {self.__dot_path}")
105-
else:
106-
cursor += 1
108+
107109
return dot_lines
108110

109111
def __get_cursor_begin_states(self) -> int:
110-
cursor = 0
111-
while self.__dot_lines[cursor].split()[0] != "{node":
112-
cursor += 1
113-
return cursor
112+
for cursor, line in enumerate(self.__dot_lines):
113+
split_line = line.split()
114+
115+
if len(split_line) and split_line[0] == "{node":
116+
return cursor
117+
118+
raise AutomataError("Could not find a beginning state")
114119

115120
def __get_cursor_begin_events(self) -> int:
116-
cursor = 0
117-
while self.__dot_lines[cursor].split()[0] != "{node":
118-
cursor += 1
119-
while self.__dot_lines[cursor].split()[0] == "{node":
120-
cursor += 1
121-
# skip initial state transition
122-
cursor += 1
121+
state = 0
122+
cursor = 0 # make pyright happy
123+
124+
for cursor, line in enumerate(self.__dot_lines):
125+
line = line.split()
126+
if not line:
127+
continue
128+
129+
if state == 0:
130+
if line[0] == "{node":
131+
state = 1
132+
elif line[0] != "{node":
133+
break
134+
else:
135+
raise AutomataError("Could not find beginning event")
136+
137+
cursor += 1 # skip initial state transition
138+
if cursor == len(self.__dot_lines):
139+
raise AutomataError("Dot file ended after event beginning")
140+
123141
return cursor
124142

125143
def __get_state_variables(self) -> tuple[list[str], str, list[str]]:
@@ -131,26 +149,27 @@ def __get_state_variables(self) -> tuple[list[str], str, list[str]]:
131149
cursor = self.__get_cursor_begin_states()
132150

133151
# process nodes
134-
while self.__dot_lines[cursor].split()[0] == "{node":
135-
line = self.__dot_lines[cursor].split()
136-
raw_state = line[-1]
152+
for line in islice(self.__dot_lines, cursor, None):
153+
split_line = line.split()
154+
if not split_line or split_line[0] != "{node":
155+
break
156+
157+
raw_state = split_line[-1]
137158

138159
# "enabled_fired"}; -> enabled_fired
139160
state = raw_state.replace('"', '').replace('};', '').replace(',', '_')
140161
if state.startswith(self.init_marker):
141162
initial_state = state[len(self.init_marker):]
142163
else:
143164
states.append(state)
144-
if "doublecircle" in self.__dot_lines[cursor]:
165+
if "doublecircle" in line:
145166
final_states.append(state)
146167
has_final_states = True
147168

148-
if "ellipse" in self.__dot_lines[cursor]:
169+
if "ellipse" in line:
149170
final_states.append(state)
150171
has_final_states = True
151172

152-
cursor += 1
153-
154173
states = sorted(set(states))
155174
states.remove(initial_state)
156175

@@ -163,18 +182,21 @@ def __get_state_variables(self) -> tuple[list[str], str, list[str]]:
163182
return states, initial_state, final_states
164183

165184
def __get_event_variables(self) -> tuple[list[str], list[str]]:
185+
events: list[str] = []
186+
envs: list[str] = []
166187
# here we are at the begin of transitions, take a note, we will return later.
167188
cursor = self.__get_cursor_begin_events()
168189

169-
events = []
170-
envs = []
171-
while self.__dot_lines[cursor].lstrip()[0] == '"':
190+
for line in map(str.lstrip, islice(self.__dot_lines, cursor, None)):
191+
if not line.startswith('"'):
192+
break
193+
172194
# transitions have the format:
173195
# "all_fired" -> "both_fired" [ label = "disable_irq" ];
174196
# ------------ event is here ------------^^^^^
175-
if self.__dot_lines[cursor].split()[1] == "->":
176-
line = self.__dot_lines[cursor].split()
177-
event = "".join(line[line.index("label") + 2:-1]).replace('"', '')
197+
split_line = line.split()
198+
if len(split_line) > 1 and split_line[1] == "->":
199+
event = "".join(split_line[split_line.index("label") + 2:-1]).replace('"', '')
178200

179201
# when a transition has more than one label, they are like this
180202
# "local_irq_enable\nhw_local_irq_enable_n"
@@ -187,21 +209,20 @@ def __get_event_variables(self) -> tuple[list[str], list[str]]:
187209
ev, *constr = i.split(";")
188210
if constr:
189211
if len(constr) > 2:
190-
raise ValueError("Only 1 constraint and 1 reset are supported")
212+
raise AutomataError("Only 1 constraint and 1 reset are supported")
191213
envs += self.__extract_env_var(constr)
192214
events.append(ev)
193215
else:
194216
# state labels have the format:
195217
# "enable_fired" [label = "enable_fired\ncondition"];
196218
# ----- label is here -----^^^^^
197219
# label and node name must be the same, condition is optional
198-
state = self.__dot_lines[cursor].split("label")[1].split('"')[1]
220+
state = line.split("label")[1].split('"')[1]
199221
_, *constr = state.split("\\n")
200222
if constr:
201223
if len(constr) > 1:
202-
raise ValueError("Only 1 constraint is supported in the state")
224+
raise AutomataError("Only 1 constraint is supported in the state")
203225
envs += self.__extract_env_var([constr[0].replace(" ", "")])
204-
cursor += 1
205226

206227
return sorted(set(events)), sorted(set(envs))
207228

@@ -265,18 +286,24 @@ def __create_matrix(self) -> tuple[list[list[str]], dict[_ConstraintKey, list[st
265286
nr_state += 1
266287

267288
# declare the matrix....
268-
matrix = [[self.invalid_state_str for x in range(nr_event)] for y in range(nr_state)]
289+
matrix = [[self.invalid_state_str for _ in range(nr_event)] for _ in range(nr_state)]
269290
constraints: dict[_ConstraintKey, list[str]] = {}
270291

271292
# and we are back! Let's fill the matrix
272293
cursor = self.__get_cursor_begin_events()
273294

274-
while self.__dot_lines[cursor].lstrip()[0] == '"':
275-
if self.__dot_lines[cursor].split()[1] == "->":
276-
line = self.__dot_lines[cursor].split()
277-
origin_state = line[0].replace('"', '').replace(',', '_')
278-
dest_state = line[2].replace('"', '').replace(',', '_')
279-
possible_events = "".join(line[line.index("label") + 2:-1]).replace('"', '')
295+
for line in map(str.lstrip,
296+
islice(self.__dot_lines, cursor, None)):
297+
298+
if not line or line[0] != '"':
299+
break
300+
301+
split_line = line.split()
302+
303+
if len(split_line) > 2 and split_line[1] == "->":
304+
origin_state = split_line[0].replace('"', '').replace(',', '_')
305+
dest_state = split_line[2].replace('"', '').replace(',', '_')
306+
possible_events = "".join(split_line[split_line.index("label") + 2:-1]).replace('"', '')
280307
for event in possible_events.split("\\n"):
281308
event, *constr = event.split(";")
282309
if constr:
@@ -287,22 +314,21 @@ def __create_matrix(self) -> tuple[list[list[str]], dict[_ConstraintKey, list[st
287314
self.self_loop_reset_events.add(event)
288315
matrix[states_dict[origin_state]][events_dict[event]] = dest_state
289316
else:
290-
state = self.__dot_lines[cursor].split("label")[1].split('"')[1]
317+
state = line.split("label")[1].split('"')[1]
291318
state, *constr = state.replace(" ", "").split("\\n")
292319
if constr:
293320
constraints[_StateConstraintKey(states_dict[state])] = constr
294-
cursor += 1
295321

296322
return matrix, constraints
297323

298324
def __store_init_events(self) -> tuple[list[bool], list[bool]]:
299325
events_start = [False] * len(self.events)
300326
events_start_run = [False] * len(self.events)
301-
for i, _ in enumerate(self.events):
327+
for i in range(len(self.events)):
302328
curr_event_will_init = 0
303329
curr_event_from_init = False
304330
curr_event_used = 0
305-
for j, _ in enumerate(self.states):
331+
for j in range(len(self.states)):
306332
if self.function[j][i] != self.invalid_state_str:
307333
curr_event_used += 1
308334
if self.function[j][i] == self.initial_state:

0 commit comments

Comments
 (0)