[3.13] gh-140797: Forbid capturing groups in re.Scanner lexicon patte… · serhiy-storchaka/cpython@ee894d2 (original) (raw)

3 files changed

lines changed

Original file line number Diff line number Diff line change
@@ -399,9 +399,12 @@ def __init__(self, lexicon, flags=0):
399 399 s = _parser.State()
400 400 s.flags = flags
401 401 for phrase, action in lexicon:
402 +sub_pattern = _parser.parse(phrase, flags)
403 +if sub_pattern.state.groups != 1:
404 +raise ValueError("Cannot use capturing groups in re.Scanner")
402 405 gid = s.opengroup()
403 406 p.append(_parser.SubPattern(s, [
404 - (SUBPATTERN, (gid, 0, 0, _parser.parse(phrase, flags))),
407 + (SUBPATTERN, (gid, 0, 0, sub_pattern)),
405 408 ]))
406 409 s.closegroup(gid, p[-1])
407 410 p = _parser.SubPattern(s, [(BRANCH, (None, p))])
Original file line number Diff line number Diff line change
@@ -1638,6 +1638,24 @@ def s_int(scanner, token): return int(token)
1638 1638 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
1639 1639 'op+', 'bar'], ''))
1640 1640
1641 +def test_bug_gh140797(self):
1642 +# gh140797: Capturing groups are not allowed in re.Scanner
1643 +
1644 +msg = r"Cannot use capturing groups in re\.Scanner"
1645 +# Capturing group throws an error
1646 +with self.assertRaisesRegex(ValueError, msg):
1647 +Scanner([("(a)b", None)])
1648 +
1649 +# Named Group
1650 +with self.assertRaisesRegex(ValueError, msg):
1651 +Scanner([("(?Pa)", None)])
1652 +
1653 +# Non-capturing groups should pass normally
1654 +s = Scanner([("(?:a)b", lambda scanner, token: token)])
1655 +result, rem = s.scan("ab")
1656 +self.assertEqual(result,['ab'])
1657 +self.assertEqual(rem,'')
1658 +
1641 1659 def test_bug_448951(self):
1642 1660 # bug 448951 (similar to 429357, but with single char match)
1643 1661 # (Also test greedy matches.)
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
1 +The undocumented :class:`!re.Scanner` class now forbids regular expressions containing capturing groups in its lexicon patterns. Patterns using capturing groups could
2 +previously lead to crashes with segmentation fault. Use non-capturing groups (?:...) instead.