cpython: e295ad9be16d (original) (raw)
Mercurial > cpython
changeset 94716:e295ad9be16d 3.4
Issues #814253, #9179: Warnings now are raised when group references and conditional group references are used in lookbehind assertions in regular expressions. [#814253]
Serhiy Storchaka storchaka@gmail.com | |
---|---|
date | Sat, 21 Feb 2015 12:08:52 +0200 |
parents | 4dc8b7ed8973 |
children | a291916333ee 1628484c9408 |
files | Doc/library/re.rst Lib/sre_parse.py Lib/test/test_re.py Misc/NEWS |
diffstat | 4 files changed, 61 insertions(+), 3 deletions(-)[+] [-] Doc/library/re.rst 7 Lib/sre_parse.py 20 Lib/test/test_re.py 33 Misc/NEWS 4 |
line wrap: on
line diff
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -281,7 +281,9 @@ The special characters are:
assertion`. (?<=abc)def
will find a match in abcdef
, since the
lookbehind will back up 3 characters and check if the contained pattern matches.
The contained pattern must only match strings of some fixed length, meaning that
abc
ora|b
are allowed, buta*
anda{3,4}
are not. Group- references are not supported even if they match strings of some fixed length.
- Note that
patterns which start with positive lookbehind assertions will not match at the
beginning of the string being searched; you will most likely want to use the
:func:
search
function rather than the :func:match
function: @@ -301,7 +303,8 @@ The special characters are: Matches if the current position in the string is not preceded by a match for...
. This is called a :dfn:negative lookbehind assertion
. Similar to positive lookbehind assertions, the contained pattern must only match strings of
- some fixed length and shouldn't contain group references.
- Patterns which start with negative lookbehind assertions may match at the beginning of the string being searched.
(?(id/name)yes-pattern|no-pattern)
--- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -69,6 +69,8 @@ class Pattern: self.open = [] self.groups = 1 self.groupdict = {}
self.lookbehind = 0[](#l2.7)
+ def opengroup(self, name=None): gid = self.groups self.groups = gid + 1 @@ -352,6 +354,11 @@ def _escape(source, escape, state): if group < state.groups: if not state.checkgroup(group): raise error("cannot refer to open group")
if state.lookbehind:[](#l2.16)
import warnings[](#l2.17)
warnings.warn('group references in lookbehind '[](#l2.18)
'assertions are not supported',[](#l2.19)
RuntimeWarning)[](#l2.20) return GROUPREF, group[](#l2.21) raise ValueError[](#l2.22) if len(escape) == 2:[](#l2.23)
@@ -630,6 +637,11 @@ def _parse(source, state): if gid is None: msg = "unknown group name: {0!r}".format(name) raise error(msg)
if state.lookbehind:[](#l2.28)
import warnings[](#l2.29)
warnings.warn('group references in lookbehind '[](#l2.30)
'assertions are not supported',[](#l2.31)
RuntimeWarning)[](#l2.32) subpatternappend((GROUPREF, gid))[](#l2.33) continue[](#l2.34) else:[](#l2.35)
@@ -658,7 +670,10 @@ def _parse(source, state): raise error("syntax error") dir = -1 # lookbehind char = sourceget()
state.lookbehind += 1[](#l2.40) p = _parse_sub(source, state)[](#l2.41)
if dir < 0:[](#l2.42)
state.lookbehind -= 1[](#l2.43) if not sourcematch(")"):[](#l2.44) raise error("unbalanced parenthesis")[](#l2.45) if char == "=":[](#l2.46)
@@ -689,6 +704,11 @@ def _parse(source, state): condgroup = int(condname) except ValueError: raise error("bad character in group name")
if state.lookbehind:[](#l2.51)
import warnings[](#l2.52)
warnings.warn('group references in lookbehind '[](#l2.53)
'assertions are not supported',[](#l2.54)
RuntimeWarning)[](#l2.55) else:[](#l2.56) # flags[](#l2.57) if not source.next in FLAGS:[](#l2.58)
--- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -557,7 +557,7 @@ class ReTests(unittest.TestCase): self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0), "a\n\nb")
- def test_lookahead(self): self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a") self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a") self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
@@ -571,6 +571,37 @@ class ReTests(unittest.TestCase): self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a") self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
# Group reference.[](#l3.16)
self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))[](#l3.17)
self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))[](#l3.18)
# Named group reference.[](#l3.19)
self.assertTrue(re.match(r'(?P<g>a)b(?=(?P=g))a', 'aba'))[](#l3.20)
self.assertIsNone(re.match(r'(?P<g>a)b(?=(?P=g))c', 'abac'))[](#l3.21)
# Conditional group reference.[](#l3.22)
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))[](#l3.23)
self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))[](#l3.24)
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))[](#l3.25)
self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))[](#l3.26)
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))[](#l3.27)
# Group used before defined.[](#l3.28)
self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))[](#l3.29)
self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))[](#l3.30)
self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))[](#l3.31)
- def test_lookbehind(self):
self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))[](#l3.34)
self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))[](#l3.35)
self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))[](#l3.36)
self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))[](#l3.37)
# Group reference.[](#l3.38)
self.assertWarns(RuntimeWarning, re.compile, r'(a)a(?<=\1)c')[](#l3.39)
# Named group reference.[](#l3.40)
self.assertWarns(RuntimeWarning, re.compile, r'(?P<g>a)a(?<=(?P=g))c')[](#l3.41)
# Conditional group reference.[](#l3.42)
self.assertWarns(RuntimeWarning, re.compile, r'(a)b(?<=(?(1)b|x))c')[](#l3.43)
# Group used before defined.[](#l3.44)
self.assertWarns(RuntimeWarning, re.compile, r'(a)b(?<=(?(2)b|x))(c)')[](#l3.45)
+ def test_ignore_case(self): self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -13,6 +13,10 @@ Core and Builtins Library ------- +- Issues #814253, #9179: Warnings now are raised when group references and