Skip to content

Commit

Permalink
pythonGH-125413: pathlib ABCs: use scandir() to speed up glob()
Browse files Browse the repository at this point in the history
Use the new `PathBase.scandir()` method in `PathBase.glob()`, which greatly
reduces the number of `PathBase.stat()` calls needed when globbing.

There are no user-facing changes, because the pathlib ABCs are still
private and `Path.glob()` doesn't use the implementation in its superclass.
  • Loading branch information
barneygale committed Nov 1, 2024
1 parent 260843d commit c7ec3c9
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 25 deletions.
13 changes: 4 additions & 9 deletions Lib/glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,12 +364,6 @@ def concat_path(path, text):
"""
raise NotImplementedError

@staticmethod
def parse_entry(entry):
"""Returns the path of an entry yielded from scandir().
"""
raise NotImplementedError

# High-level methods

def compile(self, pat):
Expand Down Expand Up @@ -438,6 +432,7 @@ def select_wildcard(path, exists=False):
except OSError:
pass
else:
prefix = self.add_slash(path)
for entry in entries:
if match is None or match(entry.name):
if dir_only:
Expand All @@ -446,7 +441,7 @@ def select_wildcard(path, exists=False):
continue
except OSError:
continue
entry_path = self.parse_entry(entry)
entry_path = self.concat_path(prefix, entry.name)
if dir_only:
yield from select_next(entry_path, exists=True)
else:
Expand Down Expand Up @@ -495,6 +490,7 @@ def select_recursive_step(stack, match_pos):
except OSError:
pass
else:
prefix = self.add_slash(path)
for entry in entries:
is_dir = False
try:
Expand All @@ -504,7 +500,7 @@ def select_recursive_step(stack, match_pos):
pass

if is_dir or not dir_only:
entry_path = self.parse_entry(entry)
entry_path = self.concat_path(prefix, entry.name)
if match is None or match(str(entry_path), match_pos):
if dir_only:
yield from select_next(entry_path, exists=True)
Expand Down Expand Up @@ -533,7 +529,6 @@ class _StringGlobber(_GlobberBase):
"""
lexists = staticmethod(os.path.lexists)
scandir = staticmethod(os.scandir)
parse_entry = operator.attrgetter('path')
concat_path = operator.add

if os.name == 'nt':
Expand Down
14 changes: 1 addition & 13 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,25 +94,13 @@ class PathGlobber(_GlobberBase):

lexists = operator.methodcaller('exists', follow_symlinks=False)
add_slash = operator.methodcaller('joinpath', '')

@staticmethod
def scandir(path):
"""Emulates os.scandir(), which returns an object that can be used as
a context manager. This method is called by walk() and glob().
"""
import contextlib
return contextlib.nullcontext(path.iterdir())
scandir = operator.methodcaller('scandir')

@staticmethod
def concat_path(path, text):
"""Appends text to the given path."""
return path.with_segments(path._raw_path + text)

@staticmethod
def parse_entry(entry):
"""Returns the path of an entry yielded from scandir()."""
return entry


class PurePathBase:
"""Base class for pure path objects.
Expand Down
8 changes: 5 additions & 3 deletions Lib/test/test_pathlib/test_pathlib_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1633,8 +1633,10 @@ def setUp(self):
p.joinpath('linkA').symlink_to('fileA')
p.joinpath('brokenLink').symlink_to('non-existing')
p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
p.joinpath('dirA', 'linkC').symlink_to(
parser.join('..', 'dirB'), target_is_directory=True)
p.joinpath('dirB', 'linkD').symlink_to(
parser.join('..', 'dirB'), target_is_directory=True)
p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')

def tearDown(self):
Expand Down Expand Up @@ -2479,7 +2481,7 @@ def test_glob_permissions(self):
if i % 2:
link.symlink_to(P(self.base, "dirE", "nonexistent"))
else:
link.symlink_to(P(self.base, "dirC"))
link.symlink_to(P(self.base, "dirC"), target_is_directory=True)

self.assertEqual(len(set(base.glob("*"))), 100)
self.assertEqual(len(set(base.glob("*/"))), 50)
Expand Down

0 comments on commit c7ec3c9

Please sign in to comment.