[3.13] gh-140797: Forbid capturing groups in re.Scanner lexicon patte… · serhiy-storchaka/cpython@ee894d2 (original) (raw)
3 files changed
lines changed
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -399,9 +399,12 @@ def __init__(self, lexicon, flags=0): | ||
| 399 | 399 | s = _parser.State() |
| 400 | 400 | s.flags = flags |
| 401 | 401 | for phrase, action in lexicon: |
| 402 | +sub_pattern = _parser.parse(phrase, flags) | |
| 403 | +if sub_pattern.state.groups != 1: | |
| 404 | +raise ValueError("Cannot use capturing groups in re.Scanner") | |
| 402 | 405 | gid = s.opengroup() |
| 403 | 406 | p.append(_parser.SubPattern(s, [ |
| 404 | - (SUBPATTERN, (gid, 0, 0, _parser.parse(phrase, flags))), | |
| 407 | + (SUBPATTERN, (gid, 0, 0, sub_pattern)), | |
| 405 | 408 | ])) |
| 406 | 409 | s.closegroup(gid, p[-1]) |
| 407 | 410 | p = _parser.SubPattern(s, [(BRANCH, (None, p))]) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1638,6 +1638,24 @@ def s_int(scanner, token): return int(token) | ||
| 1638 | 1638 | (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, |
| 1639 | 1639 | 'op+', 'bar'], '')) |
| 1640 | 1640 | |
| 1641 | +def test_bug_gh140797(self): | |
| 1642 | +# gh140797: Capturing groups are not allowed in re.Scanner | |
| 1643 | + | |
| 1644 | +msg = r"Cannot use capturing groups in re\.Scanner" | |
| 1645 | +# Capturing group throws an error | |
| 1646 | +with self.assertRaisesRegex(ValueError, msg): | |
| 1647 | +Scanner([("(a)b", None)]) | |
| 1648 | + | |
| 1649 | +# Named Group | |
| 1650 | +with self.assertRaisesRegex(ValueError, msg): | |
| 1651 | +Scanner([("(?Pa)", None)]) | |
| 1652 | + | |
| 1653 | +# Non-capturing groups should pass normally | |
| 1654 | +s = Scanner([("(?:a)b", lambda scanner, token: token)]) | |
| 1655 | +result, rem = s.scan("ab") | |
| 1656 | +self.assertEqual(result,['ab']) | |
| 1657 | +self.assertEqual(rem,'') | |
| 1658 | + | |
| 1641 | 1659 | def test_bug_448951(self): |
| 1642 | 1660 | # bug 448951 (similar to 429357, but with single char match) |
| 1643 | 1661 | # (Also test greedy matches.) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| 1 | +The undocumented :class:`!re.Scanner` class now forbids regular expressions containing capturing groups in its lexicon patterns. Patterns using capturing groups could | |
| 2 | +previously lead to crashes with segmentation fault. Use non-capturing groups (?:...) instead. |