From e0264a61119d551658d9445af38323ba94fc16db Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Thu, 22 Aug 2024 19:24:33 -0400 Subject: [PATCH] [3.10] gh-122905: Sanitize names in zipfile.Path. (GH-122906) (#123160) [3.10] [3.11] gh-122905: Sanitize names in zipfile.Path. (GH-122906) (GH-122925) * gh-122905: Sanitize names in zipfile.Path. (GH-122906) Ported from zipp 3.19.1; ref jaraco/zippGH-119. (cherry picked from commit 9cd03263100ddb1657826cc4a71470786cab3932) * [3.11] gh-122905: Sanitize names in zipfile.Path. (GH-122906) Ported from zipp 3.19.1; ref jaraco/zippGH-119. (cherry picked from commit 9cd03263100ddb1657826cc4a71470786cab3932) (cherry picked from commit 795f2597a4be988e2bb19b69ff9958e981cb894e) --- Lib/test/test_zipfile.py | 17 ++++++ Lib/zipfile.py | 61 ++++++++++++++++++- ...-08-11-14-08-04.gh-issue-122905.7tDsxA.rst | 1 + 3 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 32c01704d9d1d6..a60dc11688d20b 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -3280,6 +3280,23 @@ def test_extract_orig_with_implied_dirs(self, alpharep): zipfile.Path(zf) zf.extractall(source_path.parent) + def test_malformed_paths(self): + """ + Path should handle malformed paths. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("/one-slash.txt", b"content") + zf.writestr("//two-slash.txt", b"content") + zf.writestr("../parent.txt", b"content") + zf.filename = '' + root = zipfile.Path(zf) + assert list(map(str, root.iterdir())) == [ + 'one-slash.txt', + 'two-slash.txt', + 'parent.txt', + ] + class StripExtraTests(unittest.TestCase): # Note: all of the "z" characters are technically invalid, but up diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 7d18bc2479fcda..cbac8d9160e72b 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -9,6 +9,7 @@ import itertools import os import posixpath +import re import shutil import stat import struct @@ -2182,7 +2183,65 @@ def _difference(minuend, subtrahend): return itertools.filterfalse(set(subtrahend).__contains__, minuend) -class CompleteDirs(ZipFile): +class SanitizedNames: + """ + ZipFile mix-in to ensure names are sanitized. + """ + + def namelist(self): + return list(map(self._sanitize, super().namelist())) + + @staticmethod + def _sanitize(name): + r""" + Ensure a relative path with posix separators and no dot names. + Modeled after + https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813 + but provides consistent cross-platform behavior. + >>> san = SanitizedNames._sanitize + >>> san('/foo/bar') + 'foo/bar' + >>> san('//foo.txt') + 'foo.txt' + >>> san('foo/.././bar.txt') + 'foo/bar.txt' + >>> san('foo../.bar.txt') + 'foo../.bar.txt' + >>> san('\\foo\\bar.txt') + 'foo/bar.txt' + >>> san('D:\\foo.txt') + 'D/foo.txt' + >>> san('\\\\server\\share\\file.txt') + 'server/share/file.txt' + >>> san('\\\\?\\GLOBALROOT\\Volume3') + '?/GLOBALROOT/Volume3' + >>> san('\\\\.\\PhysicalDrive1\\root') + 'PhysicalDrive1/root' + Retain any trailing slash. + >>> san('abc/') + 'abc/' + Raises a ValueError if the result is empty. + >>> san('../..') + Traceback (most recent call last): + ... + ValueError: Empty filename + """ + + def allowed(part): + return part and part not in {'..', '.'} + + # Remove the drive letter. + # Don't use ntpath.splitdrive, because that also strips UNC paths + bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE) + clean = bare.replace('\\', '/') + parts = clean.split('/') + joined = '/'.join(filter(allowed, parts)) + if not joined: + raise ValueError("Empty filename") + return joined + '/' * name.endswith('/') + + +class CompleteDirs(SanitizedNames, ZipFile): """ A ZipFile subclass that ensures that implied directories are always included in the namelist. diff --git a/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst b/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst new file mode 100644 index 00000000000000..1be44c906c4f30 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst @@ -0,0 +1 @@ +:class:`zipfile.Path` objects now sanitize names from the zipfile.