cpython: 5387095b8675 (original) (raw)
Mercurial > cpython
changeset 94715:5387095b8675 2.7
Issues #814253, #9179: Warnings now are raised when group references and conditional group references are used in lookbehind assertions in regular expressions. [#814253]
Serhiy Storchaka storchaka@gmail.com | |
---|---|
date | Sat, 21 Feb 2015 12:08:36 +0200 |
parents | af8089217cc6 |
children | 4d8e37e54a7d |
files | Doc/library/re.rst Lib/sre_parse.py Lib/test/test_re.py Misc/NEWS |
diffstat | 4 files changed, 70 insertions(+), 6 deletions(-)[+] [-] Doc/library/re.rst 7 Lib/sre_parse.py 20 Lib/test/test_re.py 45 Misc/NEWS 4 |
line wrap: on
line diff
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -276,7 +276,9 @@ The special characters are:
assertion`. (?<=abc)def
will find a match in abcdef
, since the
lookbehind will back up 3 characters and check if the contained pattern matches.
The contained pattern must only match strings of some fixed length, meaning that
abc
ora|b
are allowed, buta*
anda{3,4}
are not. Group- references are not supported even if they match strings of some fixed length.
- Note that
patterns which start with positive lookbehind assertions will not match at the
beginning of the string being searched; you will most likely want to use the
:func:
search
function rather than the :func:match
function: @@ -296,7 +298,8 @@ The special characters are: Matches if the current position in the string is not preceded by a match for...
. This is called a :dfn:negative lookbehind assertion
. Similar to positive lookbehind assertions, the contained pattern must only match strings of
- some fixed length and shouldn't contain group references.
- Patterns which start with negative lookbehind assertions may match at the beginning of the string being searched.
(?(id/name)yes-pattern|no-pattern)
--- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -69,6 +69,8 @@ class Pattern: self.open = [] self.groups = 1 self.groupdict = {}
self.lookbehind = 0[](#l2.7)
+ def opengroup(self, name=None): gid = self.groups self.groups = gid + 1 @@ -299,6 +301,11 @@ def _escape(source, escape, state): if group < state.groups: if not state.checkgroup(group): raise error, "cannot refer to open group"
if state.lookbehind:[](#l2.16)
import warnings[](#l2.17)
warnings.warn('group references in lookbehind '[](#l2.18)
'assertions are not supported',[](#l2.19)
RuntimeWarning)[](#l2.20) return GROUPREF, group[](#l2.21) raise ValueError[](#l2.22) if len(escape) == 2:[](#l2.23)
@@ -578,6 +585,11 @@ def _parse(source, state): if gid is None: msg = "unknown group name: {0!r}".format(name) raise error(msg)
if state.lookbehind:[](#l2.28)
import warnings[](#l2.29)
warnings.warn('group references in lookbehind '[](#l2.30)
'assertions are not supported',[](#l2.31)
RuntimeWarning)[](#l2.32) subpatternappend((GROUPREF, gid))[](#l2.33) continue[](#l2.34) else:[](#l2.35)
@@ -606,7 +618,10 @@ def _parse(source, state): raise error, "syntax error" dir = -1 # lookbehind char = sourceget()
state.lookbehind += 1[](#l2.40) p = _parse_sub(source, state)[](#l2.41)
if dir < 0:[](#l2.42)
state.lookbehind -= 1[](#l2.43) if not sourcematch(")"):[](#l2.44) raise error, "unbalanced parenthesis"[](#l2.45) if char == "=":[](#l2.46)
@@ -637,6 +652,11 @@ def _parse(source, state): condgroup = int(condname) except ValueError: raise error, "bad character in group name"
if state.lookbehind:[](#l2.51)
import warnings[](#l2.52)
warnings.warn('group references in lookbehind '[](#l2.53)
'assertions are not supported',[](#l2.54)
RuntimeWarning)[](#l2.55) else:[](#l2.56) # flags[](#l2.57) if not source.next in FLAGS:[](#l2.58)
--- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1,7 +1,9 @@
-- coding: utf-8 --
-from test.test_support import verbose, run_unittest, import_module -from test.test_support import precisionbigmemtest, _2G, cpython_only -from test.test_support import captured_stdout, have_unicode, requires_unicode, u +from test.test_support import (
- verbose, run_unittest, import_module,
- precisionbigmemtest, _2G, cpython_only,
- captured_stdout, have_unicode, requires_unicode, u,
- check_warnings)
import locale import re from re import Scanner @@ -449,7 +451,7 @@ class ReTests(unittest.TestCase): self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0), "a\n\nb")
- def test_lookahead(self): self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a") self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a") self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
@@ -463,6 +465,41 @@ class ReTests(unittest.TestCase): self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a") self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
# Group reference.[](#l3.29)
self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))[](#l3.30)
self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))[](#l3.31)
# Named group reference.[](#l3.32)
self.assertTrue(re.match(r'(?P<g>a)b(?=(?P=g))a', 'aba'))[](#l3.33)
self.assertIsNone(re.match(r'(?P<g>a)b(?=(?P=g))c', 'abac'))[](#l3.34)
# Conditional group reference.[](#l3.35)
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))[](#l3.36)
self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))[](#l3.37)
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))[](#l3.38)
self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))[](#l3.39)
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))[](#l3.40)
# Group used before defined.[](#l3.41)
self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))[](#l3.42)
self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))[](#l3.43)
self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))[](#l3.44)
- def test_lookbehind(self):
self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))[](#l3.47)
self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))[](#l3.48)
self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))[](#l3.49)
self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))[](#l3.50)
# Group reference.[](#l3.51)
with check_warnings(('', RuntimeWarning)):[](#l3.52)
re.compile(r'(a)a(?<=\1)c')[](#l3.53)
# Named group reference.[](#l3.54)
with check_warnings(('', RuntimeWarning)):[](#l3.55)
re.compile(r'(?P<g>a)a(?<=(?P=g))c')[](#l3.56)
# Conditional group reference.[](#l3.57)
with check_warnings(('', RuntimeWarning)):[](#l3.58)
re.compile(r'(a)b(?<=(?(1)b|x))c')[](#l3.59)
# Group used before defined.[](#l3.60)
with check_warnings(('', RuntimeWarning)):[](#l3.61)
re.compile(r'(a)b(?<=(?(2)b|x))(c)')[](#l3.62)
+ def test_ignore_case(self): self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -18,6 +18,10 @@ Core and Builtins Library ------- +- Issues #814253, #9179: Warnings now are raised when group references and