diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 0f6c0f2107ce6b6..01259303ec410f7 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -2245,6 +2245,64 @@ def test_decompress_without_3rd_party_library(self): with zipfile.ZipFile(zip_file) as zf: self.assertRaises(RuntimeError, zf.extract, 'a.txt') + def test_full_overlap(self): + data = ( + b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' + b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' + b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P' + b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2' + b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK' + b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' + b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05' + b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00' + b'\x00\x00\x00' + ) + with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: + self.assertEqual(zipf.namelist(), ['a', 'b']) + zi = zipf.getinfo('a') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + zi = zipf.getinfo('b') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + self.assertEqual(len(zipf.read('a')), 1033) + with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'): + zipf.read('b') + + def test_quoted_overlap(self): + data = ( + b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc' + b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00' + b'\x1f\x00\xe0\xffPK\x03\x04\x14\x00\x00\x00\x08\x00\xa0l' + b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00' + b'\x00\x00b\xed\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\' + b'd\x0b`PK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0' + b'lH\x05Y\xfc8\x044\x00\x00\x00(\x04\x00\x00\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00aPK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0l' + b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00' + b'bPK\x05\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00' + b'\x00S\x00\x00\x00\x00\x00' + ) + with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: + self.assertEqual(zipf.namelist(), ['a', 'b']) + zi = zipf.getinfo('a') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 52) + self.assertEqual(zi.file_size, 1064) + zi = zipf.getinfo('b') + self.assertEqual(zi.header_offset, 36) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'): + zipf.read('a') + self.assertEqual(len(zipf.read('b')), 1033) + def tearDown(self): unlink(TESTFN) unlink(TESTFN2) @@ -2543,6 +2601,10 @@ def test_random_open(self): for f in get_files(self): self.zip_random_open_test(f, self.compression) + def test_random_open(self): + for f in get_files(self): + self.zip_random_open_test(f, self.compression) + class StoredTestsWithRandomBinaryFiles(AbstractTestsWithRandomBinaryFiles, unittest.TestCase): diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 2c963de18e4f953..605f8cc1734b513 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -395,6 +395,7 @@ class ZipInfo (object): 'compress_size', 'file_size', '_raw_time', + '_end_offset', ) def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): @@ -429,6 +430,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): self.external_attr = 0 # External file attributes self.compress_size = 0 # Size of the compressed file self.file_size = 0 # Size of the uncompressed file + self._end_offset = None # Start of the next local header or central directory # Other attributes are set by class ZipFile: # header_offset Byte offset to the file header # CRC CRC-32 of the uncompressed file @@ -1487,6 +1489,12 @@ def _RealGetContents(self): if self.debug > 2: print("total", total) + end_offset = self.start_dir + for zinfo in sorted(self.filelist, + key=lambda zinfo: zinfo.header_offset, + reverse=True): + zinfo._end_offset = end_offset + end_offset = zinfo.header_offset def namelist(self): """Return a list of file names in the archive.""" @@ -1643,6 +1651,10 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): 'File name in directory %r and header %r differ.' % (zinfo.orig_filename, fname)) + if (zinfo._end_offset is not None and + zef_file.tell() + zinfo.compress_size > zinfo._end_offset): + raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r}") + # check for encrypted flag & handle password is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED if is_encrypted: diff --git a/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst new file mode 100644 index 000000000000000..be279caffc46eeb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst @@ -0,0 +1,3 @@ +Protect :mod:`zipfile` from "quoted-overlap" zipbomb. It now raises +BadZipFile when try to read an entry that overlaps with other entry or +central directory.