cpython: 0b85ea4bd1af (original) (raw)
Mercurial > cpython
changeset 92628:0b85ea4bd1af
Issue #22437: Number of capturing groups in regular expression is no longer limited by 100. [#22437]
Serhiy Storchaka storchaka@gmail.com | |
---|---|
date | Mon, 29 Sep 2014 22:49:23 +0300 |
parents | 2b212a8186e0 |
children | f86fde20e9ce |
files | Doc/whatsnew/3.5.rst Lib/sre_compile.py Lib/sre_constants.py Lib/sre_parse.py Lib/test/test_re.py Misc/NEWS Modules/_sre.c Modules/sre.h |
diffstat | 8 files changed, 79 insertions(+), 30 deletions(-)[+] [-] Doc/whatsnew/3.5.rst 6 Lib/sre_compile.py 6 Lib/sre_constants.py 2 Lib/sre_parse.py 10 Lib/test/test_re.py 18 Misc/NEWS 3 Modules/_sre.c 57 Modules/sre.h 7 |
line wrap: on
line diff
--- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -217,6 +217,12 @@ os
- :class:
os.stat_result
now has a :attr:~os.stat_result.st_file_attributes
attribute on Windows (contributed by Ben Hoyt in :issue:21719
). +re +-- + +* Number of capturing groups in regular expression is no longer limited by 100.
--- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -470,12 +470,6 @@ def compile(p, flags=0): # print code
XXX: get rid of this limitation- if p.pattern.groups > 100:
raise AssertionError([](#l2.9)
"sorry, but this version only supports 100 named groups"[](#l2.10)
)[](#l2.11)
- # map in either direction groupindex = p.pattern.groupdict indexgroup = [None] * p.pattern.groups
--- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -15,7 +15,7 @@ MAGIC = 20031017 -from _sre import MAXREPEAT +from _sre import MAXREPEAT, MAXGROUPS
SRE standard exception (access as sre.error)
should this really be here?
--- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -72,6 +72,8 @@ class Pattern: def opengroup(self, name=None): gid = self.groups self.groups = gid + 1
if self.groups > MAXGROUPS:[](#l4.7)
raise error("groups number is too large")[](#l4.8) if name is not None:[](#l4.9) ogid = self.groupdict.get(name, None)[](#l4.10) if ogid is not None:[](#l4.11)
@@ -695,8 +697,14 @@ def _parse(source, state): else: try: condgroup = int(condname)
if condgroup < 0:[](#l4.16)
raise ValueError[](#l4.17) except ValueError:[](#l4.18) raise error("bad character in group name")[](#l4.19)
if not condgroup:[](#l4.20)
raise error("bad group number")[](#l4.21)
if condgroup >= MAXGROUPS:[](#l4.22)
raise error("the group number is too large")[](#l4.23) else:[](#l4.24) # flags[](#l4.25) if not source.next in FLAGS:[](#l4.26)
@@ -822,6 +830,8 @@ def parse_template(source, pattern): index = int(name) if index < 0: raise error("negative group number")
if index >= MAXGROUPS:[](#l4.31)
raise error("the group number is too large")[](#l4.32) except ValueError:[](#l4.33) if not name.isidentifier():[](#l4.34) raise error("bad character in group name")[](#l4.35)
--- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -193,6 +193,7 @@ class ReTests(unittest.TestCase): def test_symbolic_groups(self): re.compile('(?Px)(?P=a)(?(a)y)') re.compile('(?Px)(?P=a1)(?(a1)y)')
re.compile('(?P<a1>x)\1(?(1)y)')[](#l5.7) self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')[](#l5.8) self.assertRaises(re.error, re.compile, '(?Px)')[](#l5.9) self.assertRaises(re.error, re.compile, '(?P=)')[](#l5.10)
@@ -212,6 +213,10 @@ class ReTests(unittest.TestCase): re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)') re.compile('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)(?P=๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)(?(๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)y)') self.assertRaises(re.error, re.compile, '(?P<ยฉ>x)')
# Support > 100 groups.[](#l5.15)
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))[](#l5.16)
pat = '(?:%s)(?(200)z|t)' % pat[](#l5.17)
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))[](#l5.18)
def test_symbolic_refs(self): self.assertRaises(re.error, re.sub, '(?Px)', '\g<a', 'xx') @@ -228,6 +233,9 @@ class ReTests(unittest.TestCase): self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx') self.assertEqual(re.sub('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)', r'\g<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>', 'xx'), 'xx') self.assertRaises(re.error, re.sub, '(?Px)', r'\g<ยฉ>', 'xx')
# Support > 100 groups.[](#l5.26)
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))[](#l5.27)
self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8')[](#l5.28)
def test_re_subn(self): self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) @@ -404,6 +412,10 @@ class ReTests(unittest.TestCase): self.assertIsNone(p.match('abd')) self.assertIsNone(p.match('ac'))
# Support > 100 groups.[](#l5.36)
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))[](#l5.37)
pat = '(?:%s)(?(200)z)' % pat[](#l5.38)
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))[](#l5.39)
def test_re_groupref(self): self.assertEqual(re.match(r'^(|)?([^()]+)\1$', '|a|').groups(), @@ -1070,8 +1082,10 @@ class ReTests(unittest.TestCase): # a RuntimeError is raised instead of OverflowError. long_overflow = 2**128 self.assertRaises(TypeError, re.finditer, "a", {})
self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])[](#l5.47)
self.assertRaises(TypeError, _sre.compile, {}, 0, [])[](#l5.48)
with self.assertRaises(OverflowError):[](#l5.49)
_sre.compile("abc", 0, [long_overflow], 0, [], [])[](#l5.50)
with self.assertRaises(TypeError):[](#l5.51)
_sre.compile({}, 0, [], 0, [], [])[](#l5.52)
def test_search_dot_unicode(self): self.assertTrue(re.search("123.*-", '123abc-'))
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -145,6 +145,9 @@ Core and Builtins Library ------- +- Issue #22437: Number of capturing groups in regular expression is no longer
- Issue #17442: InteractiveInterpreter now displays the full chained traceback in its showtraceback method, to match the built in interactive interpreter.
--- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -357,6 +357,11 @@ state_init(SRE_STATE* state, PatternObje memset(state, 0, sizeof(SRE_STATE));
- state->mark = PyMem_New(void *, pattern->groups * 2);
- if (!state->mark) {
PyErr_NoMemory();[](#l7.9)
goto err;[](#l7.10)
- } state->lastmark = -1; state->lastindex = -1; @@ -409,6 +414,8 @@ state_init(SRE_STATE* state, PatternObje return string; err:
- PyMem_Del(state->mark);
- state->mark = NULL; if (state->buffer.buf) PyBuffer_Release(&state->buffer); return NULL; @@ -421,6 +428,8 @@ state_fini(SRE_STATE* state) PyBuffer_Release(&state->buffer); Py_XDECREF(state->string); data_stack_dealloc(state);
- PyMem_Del(state->mark);
- state->mark = NULL;
} /* calculate offset from start of string */ @@ -560,6 +569,7 @@ pattern_match(PatternObject *self, PyObj PyObject *pattern = NULL; SRE_STATE state; Py_ssize_t status;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Onn$O:match", _keywords, @@ -579,12 +589,14 @@ pattern_match(PatternObject *self, PyObj status = sre_match(&state, PatternObject_GetCode(self), 0); TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
} static PyObject* @@ -592,6 +604,7 @@ pattern_fullmatch(PatternObject* self, P { SRE_STATE state; Py_ssize_t status;
PyObject *string = NULL, string2 = NULL; Py_ssize_t start = 0; @@ -616,12 +629,14 @@ pattern_fullmatch(PatternObject self, P status = sre_match(&state, PatternObject_GetCode(self), 1); TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
} static PyObject* @@ -629,6 +644,7 @@ pattern_search(PatternObject* self, PyOb { SRE_STATE state; Py_ssize_t status;
PyObject *string = NULL, string2 = NULL; Py_ssize_t start = 0; @@ -652,12 +668,14 @@ pattern_search(PatternObject self, PyOb TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
} static PyObject* @@ -1417,7 +1435,7 @@ static PyObject PyObject groupindex = NULL; PyObject* indexgroup = NULL;
- if (!PyArg_ParseTuple(args, "OiO!nOO", &pattern, &flags, &PyList_Type, &code, &groups, &groupindex, &indexgroup)) return NULL;
@@ -1933,10 +1951,9 @@ static int static int _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) {
- if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
code >= end || end[-1] != SRE_OP_SUCCESS)[](#l7.130) FAIL;[](#l7.131)
- if (groups == 0) /* fix for simplejson */
return _validate_inner(code, end-1, groups); } @@ -2747,6 +2764,12 @@ PyMODINIT_FUNC PyInit__sre(void) Py_DECREF(x); }groups = 100; /* 100 groups should always be safe */[](#l7.133)
- x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
- if (x) {
PyDict_SetItemString(d, "MAXGROUPS", x);[](#l7.143)
Py_DECREF(x);[](#l7.144)
- }
+ x = PyUnicode_FromString(copyright); if (x) { PyDict_SetItemString(d, "copyright", x);
--- a/Modules/sre.h +++ b/Modules/sre.h @@ -18,8 +18,10 @@ #define SRE_CODE Py_UCS4 #if SIZEOF_SIZE_T > 4
define SRE_MAXREPEAT (~(SRE_CODE)0)
+# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2) #else
define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
+# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2) #endif typedef struct { @@ -52,9 +54,6 @@ typedef struct { typedef unsigned int (SRE_TOLOWER_HOOK)(unsigned int ch); -/ FIXME: shouldn't be a constant, really... / -#define SRE_MARK_SIZE 200 - typedef struct SRE_REPEAT_T { Py_ssize_t count; SRE_CODE pattern; /* points to REPEAT operator arguments / @@ -76,7 +75,7 @@ typedef struct { / registers */ Py_ssize_t lastindex; Py_ssize_t lastmark;