Skip to content

Commit 205c5d2

Browse files
authored
Fix #1120: change unsafe regex in symbolic_operator.py (#1140)
CodeQL scan [report #566](https://github.com/quantumlib/OpenFermion/security/code-scanning/566) flagged a regex used on line 165 of `src/openfermion/ops/operators/symbolic_operator.py` as being potential subject to a DoS attach. The warning is this: ```python pattern = r'(.*?)\[(.*?)\]' # regex for a term for match in re.findall(pattern, long_string, flags=re.DOTALL): ``` ``` This regular expression that depends on a user-provided value may run slow on strings with many repetitions of 'a'. This regular expression that depends on a user-provided value may run slow on strings starting with '[' and with many repetitions of '[a'. This regular expression that depends on a user-provided value may run slow on strings with many repetitions of 'a'. This regular expression that depends on a user-provided value may run slow on strings starting with '[' and with many repetitions of '[a'. ``` This changes the regular expression to avoid `.*` yet still be able to match the same patterns as before. Additional tests in `symbolic_operator_test.py` verify that this will parse strings correctly.
1 parent ba5d0b8 commit 205c5d2

2 files changed

Lines changed: 93 additions & 1 deletion

File tree

src/openfermion/ops/operators/symbolic_operator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def _long_string_init(self, long_string, coefficient):
161161
'1.5 [2^ 3] + 1.4 [3^ 0]'
162162
"""
163163

164-
pattern = r'(.*?)\[(.*?)\]' # regex for a term
164+
pattern = r'([^[\]]*)\[([^\]]*)\]' # regex for a term
165165
for match in re.findall(pattern, long_string, flags=re.DOTALL):
166166
# Determine the coefficient for this term
167167
coef_string = re.sub(r"\s+", "", match[0])

src/openfermion/ops/operators/symbolic_operator_test.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,98 @@ def test_init_long_str(self):
332332
correct = DummyOperator1('3^ 2', complex(-2.3, -1.7))
333333
self.assertEqual(len((fermion_op - correct).terms), 0)
334334

335+
def test_init_long_str_regex(self):
336+
"""Test the regex for parsing long string initializations."""
337+
# Test coefficient variations
338+
op = DummyOperator1('1.5 [0^ 1]')
339+
self.assertTrue(op == DummyOperator1('0^ 1', 1.5))
340+
341+
op = DummyOperator1('-1.5 [0^ 1]')
342+
self.assertTrue(op == DummyOperator1('0^ 1', -1.5))
343+
344+
op = DummyOperator1('2 [0^ 1]')
345+
self.assertTrue(op == DummyOperator1('0^ 1', 2.0))
346+
347+
op = DummyOperator1('-2 [0^ 1]')
348+
self.assertTrue(op == DummyOperator1('0^ 1', -2.0))
349+
350+
op = DummyOperator1('.5 [0^ 1]')
351+
self.assertTrue(op == DummyOperator1('0^ 1', 0.5))
352+
353+
op = DummyOperator1('(1+2j) [0^ 1]')
354+
self.assertTrue(op == DummyOperator1('0^ 1', 1 + 2j))
355+
356+
op = DummyOperator1('( 1 + 2j ) [0^ 1]')
357+
self.assertTrue(op == DummyOperator1('0^ 1', 1 + 2j))
358+
359+
op = DummyOperator1('-(1+2j) [0^ 1]')
360+
self.assertTrue(op == DummyOperator1('0^ 1', -1 - 2j))
361+
362+
op = DummyOperator1('3j [0^ 1]')
363+
self.assertTrue(op == DummyOperator1('0^ 1', 3j))
364+
365+
op = DummyOperator1('-3j [0^ 1]')
366+
self.assertTrue(op == DummyOperator1('0^ 1', -3j))
367+
368+
op = DummyOperator1('+1.5 [0^ 1]')
369+
self.assertTrue(op == DummyOperator1('0^ 1', 1.5))
370+
371+
op = DummyOperator1('- 1.5 [0^ 1]')
372+
self.assertTrue(op == DummyOperator1('0^ 1', -1.5))
373+
374+
op = DummyOperator1('[0^ 1]')
375+
self.assertTrue(op == DummyOperator1('0^ 1', 1.0))
376+
377+
op = DummyOperator1('-[0^ 1]')
378+
self.assertTrue(op == DummyOperator1('0^ 1', -1.0))
379+
380+
op = DummyOperator1('+[0^ 1]')
381+
self.assertTrue(op == DummyOperator1('0^ 1', 1.0))
382+
383+
# Test spacing variations
384+
op = DummyOperator1('1.5[0^ 1]')
385+
self.assertTrue(op == DummyOperator1('0^ 1', 1.5))
386+
387+
op = DummyOperator1('1.5 [0^ 1]')
388+
self.assertTrue(op == DummyOperator1('0^ 1', 1.5))
389+
390+
op = DummyOperator1('1.5 [ 0^ 1 ]')
391+
self.assertTrue(op == DummyOperator1('0^ 1', 1.5))
392+
393+
op = DummyOperator1(' 1.5 [0^ 1] ')
394+
self.assertTrue(op == DummyOperator1('0^ 1', 1.5))
395+
396+
# Test multiple terms
397+
op = DummyOperator1('1.5 [0^ 1] + 2.0 [2^ 3]')
398+
correct = DummyOperator1('0^ 1', 1.5) + DummyOperator1('2^ 3', 2.0)
399+
self.assertTrue(op == correct)
400+
401+
op = DummyOperator1('1.5 [0^ 1] - 2.0 [2^ 3]')
402+
correct = DummyOperator1('0^ 1', 1.5) + DummyOperator1('2^ 3', -2.0)
403+
self.assertTrue(op == correct)
404+
405+
op = DummyOperator1('1.5[0^ 1] - 2.0 [ 2^ 3 ]')
406+
correct = DummyOperator1('0^ 1', 1.5) + DummyOperator1('2^ 3', -2.0)
407+
self.assertTrue(op == correct)
408+
409+
# Test multiline strings
410+
op = DummyOperator1('1.5 [0^ 1]\n+\n2.0 [2^ 3]')
411+
correct = DummyOperator1('0^ 1', 1.5) + DummyOperator1('2^ 3', 2.0)
412+
self.assertTrue(op == correct)
413+
414+
# Test edge cases
415+
op = DummyOperator1('1.5 []')
416+
self.assertTrue(op == DummyOperator1((), 1.5))
417+
418+
op = DummyOperator1('1.5 [] - 0.5 []')
419+
self.assertTrue(op == DummyOperator1((), 1.0))
420+
421+
op = DummyOperator1('+ [0^ 1]')
422+
self.assertTrue(op == DummyOperator1('0^ 1', 1.0))
423+
424+
op = DummyOperator1('- [0^ 1]')
425+
self.assertTrue(op == DummyOperator1('0^ 1', -1.0))
426+
335427
def test_merges_multiple_whitespace(self):
336428
fermion_op = DummyOperator1(' \n ')
337429
self.assertEqual(fermion_op.terms, {(): 1})

0 commit comments

Comments
 (0)