GH-73435: Implement recursive wildcards in pathlib.PurePath.match()… · Glyphack/cpython@49f90ba (original) (raw)

`@@ -54,13 +54,30 @@ def _ignore_error(exception):

`

54

54

`getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)

`

55

55

``

56

56

``

``

57

`+

@functools.cache

`

57

58

`def _is_case_sensitive(flavour):

`

58

59

`return flavour.normcase('Aa') == 'Aa'

`

59

60

``

60

61

`#

`

61

62

`# Globbing helpers

`

62

63

`#

`

63

64

``

``

65

+

``

66

`+

fnmatch.translate() returns a regular expression that includes a prefix and

`

``

67

`+

a suffix, which enable matching newlines and ensure the end of the string is

`

``

68

`+

matched, respectively. These features are undesirable for our implementation

`

``

69

`+

of PurePatch.match(), which represents path separators as newlines and joins

`

``

70

`+

pattern segments together. As a workaround, we define a slice object that

`

``

71

`+

can remove the prefix and suffix from any translate() result. See the

`

``

72

`+

_compile_pattern_lines() function for more details.

`

``

73

`+

FNMATCH_PREFIX, FNMATCH_SUFFIX = fnmatch.translate('').split('')

`

``

74

`+

_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX))

`

``

75

`+

_SWAP_SEP_AND_NEWLINE = {

`

``

76

`+

'/': str.maketrans({'/': '\n', '\n': '/'}),

`

``

77

`+

'\': str.maketrans({'\': '\n', '\n': '\'}),

`

``

78

`+

}

`

``

79

+

``

80

+

64

81

`@functools.lru_cache()

`

65

82

`def _make_selector(pattern_parts, flavour, case_sensitive):

`

66

83

`pat = pattern_parts[0]

`

`@@ -92,6 +109,51 @@ def _compile_pattern(pat, case_sensitive):

`

92

109

`return re.compile(fnmatch.translate(pat), flags).match

`

93

110

``

94

111

``

``

112

`+

@functools.lru_cache()

`

``

113

`+

def _compile_pattern_lines(pattern_lines, case_sensitive):

`

``

114

`` +

"""Compile the given pattern lines to an re.Pattern object.

``

``

115

+

``

116

`+

The pattern_lines argument is a glob-style pattern (e.g. '**/*.py') with

`

``

117

`` +

its path separators and newlines swapped (e.g. '*\n.py`). By using

``

``

118

`` +

newlines to separate path components, and not setting re.DOTALL, we

``

``

119

`` +

ensure that the * wildcard cannot match path separators.

``

``

120

+

``

121

`` +

The returned re.Pattern object may have its match() method called to

``

``

122

`` +

match a complete pattern, or search() to match from the right. The

``

``

123

`+

argument supplied to these methods must also have its path separators and

`

``

124

`+

newlines swapped.

`

``

125

`+

"""

`

``

126

+

``

127

`+

Match the start of the path, or just after a path separator

`

``

128

`+

parts = ['^']

`

``

129

`+

for part in pattern_lines.splitlines(keepends=True):

`

``

130

`+

if part == '**\n':

`

``

131

`+

'**/' component: we use '[\s\S]' rather than '.' so that path

`

``

132

`+

separators (i.e. newlines) are matched. The trailing '^' ensures

`

``

133

`+

we terminate after a path separator (i.e. on a new line).

`

``

134

`+

part = r'[\s\S]*^'

`

``

135

`+

elif part == '**':

`

``

136

`+

'**' component.

`

``

137

`+

part = r'[\s\S]*'

`

``

138

`+

elif '**' in part:

`

``

139

`+

raise ValueError("Invalid pattern: '**' can only be an entire path component")

`

``

140

`+

else:

`

``

141

`+

Any other component: pass to fnmatch.translate(). We slice off

`

``

142

`+

the common prefix and suffix added by translate() to ensure that

`

``

143

`+

re.DOTALL is not set, and the end of the string not matched,

`

``

144

`+

respectively. With DOTALL not set, '*' wildcards will not match

`

``

145

`+

path separators, because the '.' characters in the pattern will

`

``

146

`+

not match newlines.

`

``

147

`+

part = fnmatch.translate(part)[_FNMATCH_SLICE]

`

``

148

`+

parts.append(part)

`

``

149

`+

Match the end of the path, always.

`

``

150

`+

parts.append(r'\Z')

`

``

151

`+

flags = re.MULTILINE

`

``

152

`+

if not case_sensitive:

`

``

153

`+

flags |= re.IGNORECASE

`

``

154

`+

return re.compile(''.join(parts), flags=flags)

`

``

155

+

``

156

+

95

157

`class _Selector:

`

96

158

`"""A selector matches a specific glob pattern part against the children

`

97

159

` of a given path."""

`

`@@ -276,6 +338,10 @@ class PurePath:

`

276

338

`` # to implement comparison methods like __lt__().

``

277

339

`'_parts_normcase_cached',

`

278

340

``

``

341

`` +

The _lines_cached slot stores the string path with path separators

``

``

342

`` +

and newlines swapped. This is used to implement match().

``

``

343

`+

'_lines_cached',

`

``

344

+

279

345

`` # The _hash slot stores the hash of the case-normalized string

``

280

346

`` # path. It's set when __hash__() is called for the first time.

``

281

347

`'_hash',

`

`@@ -441,6 +507,16 @@ def _parts_normcase(self):

`

441

507

`self._parts_normcase_cached = self._str_normcase.split(self._flavour.sep)

`

442

508

`return self._parts_normcase_cached

`

443

509

``

``

510

`+

@property

`

``

511

`+

def _lines(self):

`

``

512

`+

Path with separators and newlines swapped, for pattern matching.

`

``

513

`+

try:

`

``

514

`+

return self._lines_cached

`

``

515

`+

except AttributeError:

`

``

516

`+

trans = _SWAP_SEP_AND_NEWLINE[self._flavour.sep]

`

``

517

`+

self._lines_cached = str(self).translate(trans)

`

``

518

`+

return self._lines_cached

`

``

519

+

444

520

`def eq(self, other):

`

445

521

`if not isinstance(other, PurePath):

`

446

522

`return NotImplemented

`

`@@ -697,23 +773,18 @@ def match(self, path_pattern, *, case_sensitive=None):

`

697

773

`"""

`

698

774

` Return True if this path matches the given pattern.

`

699

775

` """

`

``

776

`+

if not isinstance(path_pattern, PurePath):

`

``

777

`+

path_pattern = self.with_segments(path_pattern)

`

700

778

`if case_sensitive is None:

`

701

779

`case_sensitive = _is_case_sensitive(self._flavour)

`

702

``

`-

pat = self.with_segments(path_pattern)

`

703

``

`-

if not pat.parts:

`

``

780

`+

pattern = _compile_pattern_lines(path_pattern._lines, case_sensitive)

`

``

781

`+

if path_pattern.drive or path_pattern.root:

`

``

782

`+

return pattern.match(self._lines) is not None

`

``

783

`+

elif path_pattern._tail:

`

``

784

`+

return pattern.search(self._lines) is not None

`

``

785

`+

else:

`

704

786

`raise ValueError("empty pattern")

`

705

``

`-

pat_parts = pat.parts

`

706

``

`-

parts = self.parts

`

707

``

`-

if pat.drive or pat.root:

`

708

``

`-

if len(pat_parts) != len(parts):

`

709

``

`-

return False

`

710

``

`-

elif len(pat_parts) > len(parts):

`

711

``

`-

return False

`

712

``

`-

for part, pat in zip(reversed(parts), reversed(pat_parts)):

`

713

``

`-

match = _compile_pattern(pat, case_sensitive)

`

714

``

`-

if not match(part):

`

715

``

`-

return False

`

716

``

`-

return True

`

``

787

+

717

788

``

718

789

`# Subclassing os.PathLike makes isinstance() checks slower,

`

719

790

`# which in turn makes Path construction slower. Register instead!

`