Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-125413: pathlib: use scandir() to speed up copy() #126263

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 115 additions & 91 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,110 @@ def isabs(self, path):
raise UnsupportedOperation(self._unsupported_msg('isabs()'))


class CopierBase:
"""Base class for path copiers, which transfer files and directories from
one path object to another.

A reference to this class is available as PathBase._copier. When
PathBase.copy() is called, it uses the copier type of the *target* path to
perform the copy; this allows writing of data and metadata to occur
together (or in a particular order) where supported or required by the
path type.
"""
__slots__ = ('follow_symlinks', 'dirs_exist_ok', 'preserve_metadata')

def __init__(self, follow_symlinks=True, dirs_exist_ok=False,
preserve_metadata=False):
self.follow_symlinks = follow_symlinks
self.dirs_exist_ok = dirs_exist_ok
self.preserve_metadata = preserve_metadata

@classmethod
def ensure_different_files(cls, source, target):
"""Raise OSError(EINVAL) if both paths refer to the same file."""
try:
if not target.samefile(source):
return
except (OSError, ValueError):
return
err = OSError(EINVAL, "Source and target are the same file")
err.filename = str(source)
err.filename2 = str(target)
raise err

@classmethod
def ensure_distinct_paths(cls, source, target):
"""Raise OSError(EINVAL) if the target is within the source path."""
# Note: there is no straightforward, foolproof algorithm to determine
# if one directory is within another (a particularly perverse example
# would be a single network share mounted in one location via NFS, and
# in another location via CIFS), so we simply checks whether the
# other path is lexically equal to, or within, this path.
if source == target:
err = OSError(EINVAL, "Source and target are the same path")
elif source in target.parents:
err = OSError(EINVAL, "Source path is a parent of target path")
else:
return
err.filename = str(source)
err.filename2 = str(target)
raise err

def copy(self, source, target):
"""Copy the given file or directory tree to the given target."""
self.ensure_distinct_paths(source, target)
if self.preserve_metadata:
metadata_keys = source._readable_metadata & target._writable_metadata
else:
metadata_keys = frozenset()
if not self.follow_symlinks and source.is_symlink():
self.copy_symlink(source, target, metadata_keys)
elif source.is_dir():
self.copy_dir(source, target, metadata_keys)
else:
self.copy_file(source, target, metadata_keys)

def copy_dir(self, source, target, metadata_keys, dir_entry=None):
"""Copy the given directory to the given target."""
metadata = source._read_metadata(metadata_keys, dir_entry=dir_entry)
with source.scandir() as entries:
target.mkdir(exist_ok=self.dirs_exist_ok)
for entry in entries:
src = source.joinpath(entry.name)
dst = target.joinpath(entry.name)
if not self.follow_symlinks and entry.is_symlink():
self.copy_symlink(src, dst, metadata_keys, entry)
elif entry.is_dir():
self.copy_dir(src, dst, metadata_keys, entry)
else:
self.copy_file(src, dst, metadata_keys, entry)
target._write_metadata(metadata)

def copy_file(self, source, target, metadata_keys, dir_entry=None):
"""Copy the given file to the given target."""
self.ensure_different_files(source, target)
metadata = source._read_metadata(metadata_keys, dir_entry=dir_entry)
with source.open('rb') as source_f:
try:
with target.open('wb') as target_f:
copyfileobj(source_f, target_f)
except IsADirectoryError as e:
if not target.exists():
# Raise a less confusing exception.
raise FileNotFoundError(
f'Directory does not exist: {target}') from e
else:
raise
target._write_metadata(metadata)

def copy_symlink(self, source, target, metadata_keys, dir_entry=None):
"""Copy the given symlink to the given target."""
metadata = source._read_metadata(
metadata_keys, follow_symlinks=False, dir_entry=dir_entry)
target.symlink_to(source.readlink())
target._write_metadata(metadata, follow_symlinks=False)


class PathGlobber(_GlobberBase):
"""
Class providing shell-style globbing for path objects.
Expand Down Expand Up @@ -425,6 +529,9 @@ class PathBase(PurePathBase):

# Maximum number of symlinks to follow in resolve()
_max_symlinks = 40
_copier = CopierBase
_readable_metadata = frozenset()
_writable_metadata = frozenset()

@classmethod
def _unsupported_msg(cls, attribute):
Expand Down Expand Up @@ -565,39 +672,6 @@ def samefile(self, other_path):
return (st.st_ino == other_st.st_ino and
st.st_dev == other_st.st_dev)

def _ensure_different_file(self, other_path):
"""
Raise OSError(EINVAL) if both paths refer to the same file.
"""
try:
if not self.samefile(other_path):
return
except (OSError, ValueError):
return
err = OSError(EINVAL, "Source and target are the same file")
err.filename = str(self)
err.filename2 = str(other_path)
raise err

def _ensure_distinct_path(self, other_path):
"""
Raise OSError(EINVAL) if the other path is within this path.
"""
# Note: there is no straightforward, foolproof algorithm to determine
# if one directory is within another (a particularly perverse example
# would be a single network share mounted in one location via NFS, and
# in another location via CIFS), so we simply checks whether the
# other path is lexically equal to, or within, this path.
if self == other_path:
err = OSError(EINVAL, "Source and target are the same path")
elif self in other_path.parents:
err = OSError(EINVAL, "Source path is a parent of target path")
else:
return
err.filename = str(self)
err.filename2 = str(other_path)
raise err

def open(self, mode='r', buffering=-1, encoding=None,
errors=None, newline=None):
"""
Expand Down Expand Up @@ -805,13 +879,6 @@ def symlink_to(self, target, target_is_directory=False):
"""
raise UnsupportedOperation(self._unsupported_msg('symlink_to()'))

def _symlink_to_target_of(self, link):
"""
Make this path a symlink with the same target as the given link. This
is used by copy().
"""
self.symlink_to(link.readlink())

def hardlink_to(self, target):
"""
Make this path a hard link pointing to the same file as *target*.
Expand All @@ -832,74 +899,31 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False):
"""
raise UnsupportedOperation(self._unsupported_msg('mkdir()'))

# Metadata keys supported by this path type.
_readable_metadata = _writable_metadata = frozenset()

def _read_metadata(self, keys=None, *, follow_symlinks=True):
def _read_metadata(self, metadata_keys, *, follow_symlinks=True, dir_entry=None):
"""
Returns path metadata as a dict with string keys.
"""
if not metadata_keys:
return {}
raise UnsupportedOperation(self._unsupported_msg('_read_metadata()'))

def _write_metadata(self, metadata, *, follow_symlinks=True):
"""
Sets path metadata from the given dict with string keys.
"""
if not metadata:
return
raise UnsupportedOperation(self._unsupported_msg('_write_metadata()'))

def _copy_metadata(self, target, *, follow_symlinks=True):
"""
Copies metadata (permissions, timestamps, etc) from this path to target.
"""
# Metadata types supported by both source and target.
keys = self._readable_metadata & target._writable_metadata
if keys:
metadata = self._read_metadata(keys, follow_symlinks=follow_symlinks)
target._write_metadata(metadata, follow_symlinks=follow_symlinks)

def _copy_file(self, target):
"""
Copy the contents of this file to the given target.
"""
self._ensure_different_file(target)
with self.open('rb') as source_f:
try:
with target.open('wb') as target_f:
copyfileobj(source_f, target_f)
except IsADirectoryError as e:
if not target.exists():
# Raise a less confusing exception.
raise FileNotFoundError(
f'Directory does not exist: {target}') from e
else:
raise

def copy(self, target, *, follow_symlinks=True, dirs_exist_ok=False,
preserve_metadata=False):
"""
Recursively copy this file or directory tree to the given destination.
"""
if not isinstance(target, PathBase):
target = self.with_segments(target)
self._ensure_distinct_path(target)
stack = [(self, target)]
while stack:
src, dst = stack.pop()
if not follow_symlinks and src.is_symlink():
dst._symlink_to_target_of(src)
if preserve_metadata:
src._copy_metadata(dst, follow_symlinks=False)
elif src.is_dir():
children = src.iterdir()
dst.mkdir(exist_ok=dirs_exist_ok)
stack.extend((child, dst.joinpath(child.name))
for child in children)
if preserve_metadata:
src._copy_metadata(dst)
else:
src._copy_file(dst)
if preserve_metadata:
src._copy_metadata(dst)
copier = target._copier(follow_symlinks, dirs_exist_ok, preserve_metadata)
copier.copy(self, target)
return target

def copy_into(self, target_dir, *, follow_symlinks=True,
Expand Down Expand Up @@ -946,7 +970,7 @@ def move(self, target):
"""
Recursively move this file or directory tree to the given destination.
"""
self._ensure_different_file(target)
target._copier.ensure_different_files(self, target)
try:
return self.replace(target)
except UnsupportedOperation:
Expand Down
60 changes: 33 additions & 27 deletions Lib/pathlib/_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from pathlib._os import (copyfile, file_metadata_keys, read_file_metadata,
write_file_metadata)
from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase
from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase, CopierBase


__all__ = [
Expand Down Expand Up @@ -57,6 +57,33 @@ def __repr__(self):
return "<{}.parents>".format(type(self._path).__name__)


class _Copier(CopierBase):
"""Copier class that uses fast OS copy routine where possible, and ensures
symlinks' target_is_directory argument is properly set on Windows.
"""
__slots__ = ()

if copyfile:
def copy_file(self, source, target, metadata_keys, dir_entry=None):
"""Copy the given file to the given target."""
try:
source = os.fspath(source)
except TypeError:
if not isinstance(source, PathBase):
raise
CopierBase.copy_file(self, source, target, metadata_keys, dir_entry)
else:
copyfile(source, os.fspath(target))

if os.name == 'nt':
def copy_symlink(self, source, target, metadata_keys, dir_entry=None):
"""Copy the given symlink to the given target."""
metadata = source._read_metadata(
metadata_keys, follow_symlinks=False, dir_entry=dir_entry)
target.symlink_to(source.readlink(), (dir_entry or source).is_dir())
target._write_metadata(metadata, follow_symlinks=False)


class PurePath(PurePathBase):
"""Base class for manipulating paths without I/O.

Expand Down Expand Up @@ -512,6 +539,11 @@ class Path(PathBase, PurePath):
but cannot instantiate a WindowsPath on a POSIX system or vice versa.
"""
__slots__ = ()
_copier = _Copier
_readable_metadata = file_metadata_keys
_writable_metadata = file_metadata_keys
_read_metadata = read_file_metadata
_write_metadata = write_file_metadata
as_uri = PurePath.as_uri

@classmethod
Expand Down Expand Up @@ -789,24 +821,6 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False):
if not exist_ok or not self.is_dir():
raise

_readable_metadata = _writable_metadata = file_metadata_keys
_read_metadata = read_file_metadata
_write_metadata = write_file_metadata

if copyfile:
def _copy_file(self, target):
"""
Copy the contents of this file to the given target.
"""
try:
target = os.fspath(target)
except TypeError:
if not isinstance(target, PathBase):
raise
PathBase._copy_file(self, target)
else:
copyfile(os.fspath(self), target)

def chmod(self, mode, *, follow_symlinks=True):
"""
Change the permissions of the path, like os.chmod().
Expand Down Expand Up @@ -869,14 +883,6 @@ def symlink_to(self, target, target_is_directory=False):
"""
os.symlink(target, self, target_is_directory)

if os.name == 'nt':
def _symlink_to_target_of(self, link):
"""
Make this path a symlink with the same target as the given link.
This is used by copy().
"""
self.symlink_to(link.readlink(), link.is_dir())

if hasattr(os, "link"):
def hardlink_to(self, target):
"""
Expand Down
Loading
Loading