GH-73435: Implement recursive wildcards in pathlib.PurePath.match()
… · Glyphack/cpython@49f90ba (original) (raw)
`@@ -54,13 +54,30 @@ def _ignore_error(exception):
`
54
54
`getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)
`
55
55
``
56
56
``
``
57
`+
@functools.cache
`
57
58
`def _is_case_sensitive(flavour):
`
58
59
`return flavour.normcase('Aa') == 'Aa'
`
59
60
``
60
61
`#
`
61
62
`# Globbing helpers
`
62
63
`#
`
63
64
``
``
65
+
``
66
`+
fnmatch.translate() returns a regular expression that includes a prefix and
`
``
67
`+
a suffix, which enable matching newlines and ensure the end of the string is
`
``
68
`+
matched, respectively. These features are undesirable for our implementation
`
``
69
`+
of PurePatch.match(), which represents path separators as newlines and joins
`
``
70
`+
pattern segments together. As a workaround, we define a slice object that
`
``
71
`+
can remove the prefix and suffix from any translate() result. See the
`
``
72
`+
_compile_pattern_lines() function for more details.
`
``
73
`+
FNMATCH_PREFIX, FNMATCH_SUFFIX = fnmatch.translate('').split('')
`
``
74
`+
_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX))
`
``
75
`+
_SWAP_SEP_AND_NEWLINE = {
`
``
76
`+
'/': str.maketrans({'/': '\n', '\n': '/'}),
`
``
77
`+
'\': str.maketrans({'\': '\n', '\n': '\'}),
`
``
78
`+
}
`
``
79
+
``
80
+
64
81
`@functools.lru_cache()
`
65
82
`def _make_selector(pattern_parts, flavour, case_sensitive):
`
66
83
`pat = pattern_parts[0]
`
`@@ -92,6 +109,51 @@ def _compile_pattern(pat, case_sensitive):
`
92
109
`return re.compile(fnmatch.translate(pat), flags).match
`
93
110
``
94
111
``
``
112
`+
@functools.lru_cache()
`
``
113
`+
def _compile_pattern_lines(pattern_lines, case_sensitive):
`
``
114
`` +
"""Compile the given pattern lines to an re.Pattern
object.
``
``
115
+
``
116
`+
The pattern_lines argument is a glob-style pattern (e.g. '**/*.py') with
`
``
117
`` +
its path separators and newlines swapped (e.g. '*\n.py`). By using
``
``
118
`` +
newlines to separate path components, and not setting re.DOTALL
, we
``
``
119
`` +
ensure that the *
wildcard cannot match path separators.
``
``
120
+
``
121
`` +
The returned re.Pattern
object may have its match()
method called to
``
``
122
`` +
match a complete pattern, or search()
to match from the right. The
``
``
123
`+
argument supplied to these methods must also have its path separators and
`
``
124
`+
newlines swapped.
`
``
125
`+
"""
`
``
126
+
``
127
`+
Match the start of the path, or just after a path separator
`
``
128
`+
parts = ['^']
`
``
129
`+
for part in pattern_lines.splitlines(keepends=True):
`
``
130
`+
if part == '**\n':
`
``
131
`+
'**/' component: we use '[\s\S]' rather than '.' so that path
`
``
132
`+
separators (i.e. newlines) are matched. The trailing '^' ensures
`
``
133
`+
we terminate after a path separator (i.e. on a new line).
`
``
134
`+
part = r'[\s\S]*^'
`
``
135
`+
elif part == '**':
`
``
136
`+
'**' component.
`
``
137
`+
part = r'[\s\S]*'
`
``
138
`+
elif '**' in part:
`
``
139
`+
raise ValueError("Invalid pattern: '**' can only be an entire path component")
`
``
140
`+
else:
`
``
141
`+
Any other component: pass to fnmatch.translate(). We slice off
`
``
142
`+
the common prefix and suffix added by translate() to ensure that
`
``
143
`+
re.DOTALL is not set, and the end of the string not matched,
`
``
144
`+
respectively. With DOTALL not set, '*' wildcards will not match
`
``
145
`+
path separators, because the '.' characters in the pattern will
`
``
146
`+
not match newlines.
`
``
147
`+
part = fnmatch.translate(part)[_FNMATCH_SLICE]
`
``
148
`+
parts.append(part)
`
``
149
`+
Match the end of the path, always.
`
``
150
`+
parts.append(r'\Z')
`
``
151
`+
flags = re.MULTILINE
`
``
152
`+
if not case_sensitive:
`
``
153
`+
flags |= re.IGNORECASE
`
``
154
`+
return re.compile(''.join(parts), flags=flags)
`
``
155
+
``
156
+
95
157
`class _Selector:
`
96
158
`"""A selector matches a specific glob pattern part against the children
`
97
159
` of a given path."""
`
`@@ -276,6 +338,10 @@ class PurePath:
`
276
338
`` # to implement comparison methods like __lt__()
.
``
277
339
`'_parts_normcase_cached',
`
278
340
``
``
341
`` +
The _lines_cached
slot stores the string path with path separators
``
``
342
`` +
and newlines swapped. This is used to implement match()
.
``
``
343
`+
'_lines_cached',
`
``
344
+
279
345
`` # The _hash
slot stores the hash of the case-normalized string
``
280
346
`` # path. It's set when __hash__()
is called for the first time.
``
281
347
`'_hash',
`
`@@ -441,6 +507,16 @@ def _parts_normcase(self):
`
441
507
`self._parts_normcase_cached = self._str_normcase.split(self._flavour.sep)
`
442
508
`return self._parts_normcase_cached
`
443
509
``
``
510
`+
@property
`
``
511
`+
def _lines(self):
`
``
512
`+
Path with separators and newlines swapped, for pattern matching.
`
``
513
`+
try:
`
``
514
`+
return self._lines_cached
`
``
515
`+
except AttributeError:
`
``
516
`+
trans = _SWAP_SEP_AND_NEWLINE[self._flavour.sep]
`
``
517
`+
self._lines_cached = str(self).translate(trans)
`
``
518
`+
return self._lines_cached
`
``
519
+
444
520
`def eq(self, other):
`
445
521
`if not isinstance(other, PurePath):
`
446
522
`return NotImplemented
`
`@@ -697,23 +773,18 @@ def match(self, path_pattern, *, case_sensitive=None):
`
697
773
`"""
`
698
774
` Return True if this path matches the given pattern.
`
699
775
` """
`
``
776
`+
if not isinstance(path_pattern, PurePath):
`
``
777
`+
path_pattern = self.with_segments(path_pattern)
`
700
778
`if case_sensitive is None:
`
701
779
`case_sensitive = _is_case_sensitive(self._flavour)
`
702
``
`-
pat = self.with_segments(path_pattern)
`
703
``
`-
if not pat.parts:
`
``
780
`+
pattern = _compile_pattern_lines(path_pattern._lines, case_sensitive)
`
``
781
`+
if path_pattern.drive or path_pattern.root:
`
``
782
`+
return pattern.match(self._lines) is not None
`
``
783
`+
elif path_pattern._tail:
`
``
784
`+
return pattern.search(self._lines) is not None
`
``
785
`+
else:
`
704
786
`raise ValueError("empty pattern")
`
705
``
`-
pat_parts = pat.parts
`
706
``
`-
parts = self.parts
`
707
``
`-
if pat.drive or pat.root:
`
708
``
`-
if len(pat_parts) != len(parts):
`
709
``
`-
return False
`
710
``
`-
elif len(pat_parts) > len(parts):
`
711
``
`-
return False
`
712
``
`-
for part, pat in zip(reversed(parts), reversed(pat_parts)):
`
713
``
`-
match = _compile_pattern(pat, case_sensitive)
`
714
``
`-
if not match(part):
`
715
``
`-
return False
`
716
``
`-
return True
`
``
787
+
717
788
``
718
789
`# Subclassing os.PathLike makes isinstance() checks slower,
`
719
790
`# which in turn makes Path construction slower. Register instead!
`