Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.9] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) #123432

Merged
merged 2 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions Lib/test/test_zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3054,6 +3054,83 @@ def test_implied_dirs_performance(self):
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
zipfile.CompleteDirs._implied_dirs(data)

def test_malformed_paths(self):
"""
Path should handle malformed paths gracefully.

Paths with leading slashes are not visible.

Paths with dots are treated like regular files.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("/one-slash.txt", b"content")
zf.writestr("//two-slash.txt", b"content")
zf.writestr("../parent.txt", b"content")
zf.filename = ''
root = zipfile.Path(zf)
assert list(map(str, root.iterdir())) == ['../']
assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'

def test_unsupported_names(self):
"""
Path segments with special characters are readable.

On some platforms or file systems, characters like
``:`` and ``?`` are not allowed, but they are valid
in the zip file.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("path?", b"content")
zf.writestr("V: NMS.flac", b"fLaC...")
zf.filename = ''
root = zipfile.Path(zf)
contents = root.iterdir()
assert next(contents).name == 'path?'
assert next(contents).name == 'V: NMS.flac'
assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."

def test_backslash_not_separator(self):
"""
In a zip file, backslashes are not separators.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
zf.filename = ''
root = zipfile.Path(zf)
(first,) = root.iterdir()
assert not first.is_dir()
assert first.name == 'foo\\bar'


class DirtyZipInfo(zipfile.ZipInfo):
"""
Bypass name sanitization.
"""

def __init__(self, filename, *args, **kwargs):
super().__init__(filename, *args, **kwargs)
self.filename = filename

@classmethod
def for_name(cls, name, archive):
"""
Construct the same way that ZipFile.writestr does.

TODO: extract this functionality and re-use
"""
self = cls(filename=name, date_time=time.localtime(time.time())[:6])
self.compress_type = archive.compression
self.compress_level = archive.compresslevel
if self.filename.endswith('/'): # pragma: no cover
self.external_attr = 0o40775 << 16 # drwxrwxr-x
self.external_attr |= 0x10 # MS-DOS directory flag
else:
self.external_attr = 0o600 << 16 # ?rw-------
return self


if __name__ == "__main__":
unittest.main()
9 changes: 7 additions & 2 deletions Lib/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2146,7 +2146,7 @@ def _parents(path):
def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path
posixpath.sep, generate all elements of that path.

>>> list(_ancestry('b/d'))
['b/d', 'b']
Expand All @@ -2158,9 +2158,14 @@ def _ancestry(path):
['b']
>>> list(_ancestry(''))
[]

Multiple separators are treated like a single.

>>> list(_ancestry('//b//d///f//'))
['//b//d///f', '//b//d', '//b']
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
while path.rstrip(posixpath.sep):
yield path
path, tail = posixpath.split(path)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
causing infinite loops (gh-122905) without breaking contents using
legitimate characters.
Loading