Skip to content

Commit

Permalink
pythonGH-125413: Add pathlib.Path.scandir() method
Browse files Browse the repository at this point in the history
Add `pathlib.Path.scandir()` as a trivial wrapper of `os.scandir()`.

In the private `pathlib._abc.PathBase` class, we can rework the
`iterdir()`, `glob()`, `walk()` and `copy()` methods to call `scandir()`
and make use of cached directory entry information, and thereby improve
performance. Because the `Path.copy()` method is provided by `PathBase`,
this also speeds up traversal when copying local files and directories.
  • Loading branch information
barneygale committed Oct 28, 2024
1 parent 19e93e2 commit d7f13d4
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 63 deletions.
29 changes: 29 additions & 0 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,35 @@ Reading directories
raised.


.. method:: Path.scandir()

When the path points to a directory, return an iterator of
:class:`os.DirEntry` objects corresponding to entries in the directory. The
returned iterator supports the :term:`context manager` protocol. It is
implemented using :func:`os.scandir` and gives the same guarantees.

Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can
significantly increase the performance of code that also needs file type or
file attribute information, because :class:`os.DirEntry` objects expose
this information if the operating system provides it when scanning a
directory.

The following example displays the names of subdirectories. The
``entry.is_dir()`` check will generally not make an additional system call::

>>> p = Path('docs')
>>> with p.scandir() as entries:
... for entry in entries:
... if entry.is_dir():
... entry.name
...
'_templates'
'_build'
'_static'

.. versionadded:: 3.14


.. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False)

Glob the given relative *pattern* in the directory represented by this path,
Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,12 @@ pathlib

(Contributed by Barney Gale in :gh:`73991`.)

* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator
of :class:`os.DirEntry` objects. This is exactly equivalent to calling
:func:`os.scandir` on a path object.

(Contributed by Barney Gale in :gh:`125413`.)


pdb
---
Expand Down
13 changes: 4 additions & 9 deletions Lib/glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,12 +364,6 @@ def concat_path(path, text):
"""
raise NotImplementedError

@staticmethod
def parse_entry(entry):
"""Returns the path of an entry yielded from scandir().
"""
raise NotImplementedError

# High-level methods

def compile(self, pat):
Expand Down Expand Up @@ -438,6 +432,7 @@ def select_wildcard(path, exists=False):
except OSError:
pass
else:
prefix = self.add_slash(path)
for entry in entries:
if match is None or match(entry.name):
if dir_only:
Expand All @@ -446,7 +441,7 @@ def select_wildcard(path, exists=False):
continue
except OSError:
continue
entry_path = self.parse_entry(entry)
entry_path = self.concat_path(prefix, entry.name)
if dir_only:
yield from select_next(entry_path, exists=True)
else:
Expand Down Expand Up @@ -495,6 +490,7 @@ def select_recursive_step(stack, match_pos):
except OSError:
pass
else:
prefix = self.add_slash(path)
for entry in entries:
is_dir = False
try:
Expand All @@ -504,7 +500,7 @@ def select_recursive_step(stack, match_pos):
pass

if is_dir or not dir_only:
entry_path = self.parse_entry(entry)
entry_path = self.concat_path(prefix, entry.name)
if match is None or match(str(entry_path), match_pos):
if dir_only:
yield from select_next(entry_path, exists=True)
Expand Down Expand Up @@ -533,7 +529,6 @@ class _StringGlobber(_GlobberBase):
"""
lexists = staticmethod(os.path.lexists)
scandir = staticmethod(os.scandir)
parse_entry = operator.attrgetter('path')
concat_path = operator.add

if os.name == 'nt':
Expand Down
64 changes: 32 additions & 32 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,25 +94,13 @@ class PathGlobber(_GlobberBase):

lexists = operator.methodcaller('exists', follow_symlinks=False)
add_slash = operator.methodcaller('joinpath', '')

@staticmethod
def scandir(path):
"""Emulates os.scandir(), which returns an object that can be used as
a context manager. This method is called by walk() and glob().
"""
import contextlib
return contextlib.nullcontext(path.iterdir())
scandir = operator.methodcaller('scandir')

@staticmethod
def concat_path(path, text):
"""Appends text to the given path."""
return path.with_segments(path._raw_path + text)

@staticmethod
def parse_entry(entry):
"""Returns the path of an entry yielded from scandir()."""
return entry


class PurePathBase:
"""Base class for pure path objects.
Expand Down Expand Up @@ -639,13 +627,23 @@ def write_text(self, data, encoding=None, errors=None, newline=None):
with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f:
return f.write(data)

def scandir(self):
"""Yield os.DirEntry objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
raise UnsupportedOperation(self._unsupported_msg('scandir()'))

def iterdir(self):
"""Yield path objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
raise UnsupportedOperation(self._unsupported_msg('iterdir()'))
with self.scandir() as entries:
names = [entry.name for entry in entries]
return map(self.joinpath, names)

def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
if case_sensitive is None:
Expand Down Expand Up @@ -695,16 +693,17 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False):
if not top_down:
paths.append((path, dirnames, filenames))
try:
for child in path.iterdir():
try:
if child.is_dir(follow_symlinks=follow_symlinks):
if not top_down:
paths.append(child)
dirnames.append(child.name)
else:
filenames.append(child.name)
except OSError:
filenames.append(child.name)
with path.scandir() as entries:
for entry in entries:
try:
if entry.is_dir(follow_symlinks=follow_symlinks):
if not top_down:
paths.append(path.joinpath(entry.name))
dirnames.append(entry.name)
else:
filenames.append(entry.name)
except OSError:
filenames.append(entry.name)
except OSError as error:
if on_error is not None:
on_error(error)
Expand Down Expand Up @@ -872,18 +871,19 @@ def copy(self, target, *, follow_symlinks=True, dirs_exist_ok=False,
if not isinstance(target, PathBase):
target = self.with_segments(target)
self._ensure_distinct_path(target)
stack = [(self, target)]
stack = [(self, self, target)]
while stack:
src, dst = stack.pop()
if not follow_symlinks and src.is_symlink():
src_entry, src, dst = stack.pop()
if not follow_symlinks and src_entry.is_symlink():
dst._symlink_to_target_of(src)
if preserve_metadata:
src._copy_metadata(dst, follow_symlinks=False)
elif src.is_dir():
children = src.iterdir()
dst.mkdir(exist_ok=dirs_exist_ok)
stack.extend((child, dst.joinpath(child.name))
for child in children)
elif src_entry.is_dir():
with src.scandir() as entries:
dst.mkdir(exist_ok=dirs_exist_ok)
stack.extend(
(entry, src.joinpath(entry.name), dst.joinpath(entry.name))
for entry in entries)
if preserve_metadata:
src._copy_metadata(dst)
else:
Expand Down
8 changes: 8 additions & 0 deletions Lib/pathlib/_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,14 @@ def _filter_trailing_slash(self, paths):
path_str = path_str[:-1]
yield path_str

def scandir(self):
"""Yield os.DirEntry objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
return os.scandir(self)

def iterdir(self):
"""Yield path objects of the directory contents.
Expand Down
Loading

0 comments on commit d7f13d4

Please sign in to comment.