cpython: cb7ee9d9cddd (original) (raw)
--- a/Doc/library/glob.rst
+++ b/Doc/library/glob.rst
@@ -14,7 +14,7 @@ The :mod:glob
module finds all the pat
according to the rules used by the Unix shell, although results are returned in
arbitrary order. No tilde expansion is done, but *
, ?
, and character
ranges expressed with []
will be correctly matched. This is done by using
-the :func:os.listdir
and :func:fnmatch.fnmatch
functions in concert, and
+the :func:os.scandir
and :func:fnmatch.fnmatch
functions in concert, and
not by actually invoking a subshell. Note that unlike :func:fnmatch.fnmatch
,
:mod:glob
treats filenames beginning with a dot (.
) as special cases.
(For tilde and shell variable expansion, use :func:os.path.expanduser
and
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -767,6 +767,10 @@ Optimizations
Argument Clinic this overhead is significantly decreased.
(Contributed by Serhiy Storchaka in :issue:27574
).
+* Optimized :func:~glob.glob
and :func:~glob.iglob
functions in the
- :mod:
glob
module; they are now about 3--6 times faster. - (Contributed by Serhiy Storchaka in :issue:
25596
). + Build and C API Changes =======================
--- a/Lib/glob.py +++ b/Lib/glob.py @@ -30,15 +30,16 @@ def iglob(pathname, *, recursive=False): If recursive is true, the pattern '**' will match any files and zero or more directories and subdirectories. """
- it = _iglob(pathname, recursive, False) if recursive and _isrecursive(pathname): s = next(it) # skip empty string assert not s return it -def _iglob(pathname, recursive): +def _iglob(pathname, recursive, dironly): dirname, basename = os.path.split(pathname) if not has_magic(pathname):
assert not dironly[](#l3.18) if basename:[](#l3.19) if os.path.lexists(pathname):[](#l3.20) yield pathname[](#l3.21)
@@ -49,47 +50,39 @@ def _iglob(pathname, recursive): return if not dirname: if recursive and _isrecursive(basename):
yield from glob2(dirname, basename)[](#l3.26)
yield from _glob2(dirname, basename, dironly)[](#l3.27) else:[](#l3.28)
yield from glob1(dirname, basename)[](#l3.29)
yield from _glob1(dirname, basename, dironly)[](#l3.30) return[](#l3.31)
os.path.split()
returns the argument itself as a dirname if it is adrive or UNC path. Prevent an infinite recursion if a drive or UNC path
if dirname != pathname and has_magic(dirname): contains magic characters (i.e. r'\?\C:').
dirs = _iglob(dirname, recursive)[](#l3.36)
else: dirs = [dirname] if has_magic(basename): if recursive and _isrecursive(basename):dirs = _iglob(dirname, recursive, True)[](#l3.37)
glob_in_dir = glob2[](#l3.42)
glob_in_dir = _glob2[](#l3.43) else:[](#l3.44)
glob_in_dir = glob1[](#l3.45)
glob_in_dir = glob0[](#l3.48)
for name in glob_in_dir(dirname, basename):[](#l3.51)
for name in glob_in_dir(dirname, basename, dironly):[](#l3.52) yield os.path.join(dirname, name)[](#l3.53)
These 2 helper functions non-recursively glob inside a literal directory.
-# They return a list of basenames. glob1
accepts a pattern while glob0
+# They return a list of basenames. _glob1 accepts a pattern while _glob0
takes a literal basename (so it only has to check for its existence).
- if not dirname:
if isinstance(pattern, bytes):[](#l3.62)
dirname = bytes(os.curdir, 'ASCII')[](#l3.63)
else:[](#l3.64)
dirname = os.curdir[](#l3.65)
- try:
names = os.listdir(dirname)[](#l3.67)
- except OSError:
return [][](#l3.69)
+def _glob1(dirname, pattern, dironly):
names = [x for x in names if not _ishidden(x)][](#l3.73)
return fnmatch.filter(names, pattern) -def glob0(dirname, basename): +def _glob0(dirname, basename, dironly): if not basename: #names = (x for x in names if not _ishidden(x))[](#l3.74)
os.path.split()
returns an empty basename for paths ending with a # directory separator. 'q*x/' should match only directories.
@@ -100,30 +93,49 @@ def glob0(dirname, basename): return [basename] return [] +# Following functions are not public but can be used by third-party code. + +def glob0(dirname, pattern):
+ +def glob1(dirname, pattern):
This helper function recursively yields relative pathnames inside a literal
directory.
-def glob2(dirname, pattern): +def _glob2(dirname, pattern, dironly): assert _isrecursive(pattern) yield pattern[:0]
-# Recursively yields relative pathnames inside a literal directory. -def _rlistdir(dirname): +# If dironly is false, yields all file names inside a directory. +# If dironly is true, yields only directory names. +def _iterdir(dirname, dironly): if not dirname: if isinstance(dirname, bytes): dirname = bytes(os.curdir, 'ASCII') else: dirname = os.curdir try:
with os.scandir(dirname) as it:[](#l3.117)
for entry in it:[](#l3.118)
try:[](#l3.119)
if not dironly or entry.is_dir():[](#l3.120)
yield entry.name[](#l3.121)
except OSError:[](#l3.122)
pass[](#l3.123)
- except OSError: return
+ +# Recursively yields relative pathnames inside a literal directory. +def _rlistdir(dirname, dironly):
- names = list(_iterdir(dirname, dironly)) for x in names: if not _ishidden(x): yield x path = os.path.join(dirname, x) if dirname else x
for y in _rlistdir(path):[](#l3.134)
for y in _rlistdir(path, dironly):[](#l3.135) yield os.path.join(x, y)[](#l3.136)
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -89,6 +89,9 @@ Core and Builtins Library ------- +- Issue #25596: Optimized glob() and iglob() functions in the