Skip to content

Commit e4cd001

Browse files
itamarofacebook-github-bot
authored andcommitted
Support zip64 in zipimport
Summary: upstream PR: python/cpython#94146 upstream issue: python/cpython#89739 Reviewed By: zsol Differential Revision: D54468361 fbshipit-source-id: 36d09b9480fb24a13157e7996f072451f7df9a79
1 parent 16841b7 commit e4cd001

File tree

3 files changed

+141
-38
lines changed

3 files changed

+141
-38
lines changed

Doc/library/zipimport.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ Any files may be present in the ZIP archive, but importers are only invoked for
3030
corresponding :file:`.pyc` file, meaning that if a ZIP archive
3131
doesn't contain :file:`.pyc` files, importing may be rather slow.
3232

33+
.. versionchanged:: 3.13
34+
ZIP64 is supported
35+
3336
.. versionchanged:: 3.8
3437
Previously, ZIP archives with an archive comment were not supported.
3538

Lib/test/test_zipimport.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ def makeZip(self, files, zipName=TEMP_ZIP, **kw):
128128
f.write(stuff)
129129
f.write(data)
130130

131+
def getZip64Files(self):
132+
# This is the simplest way to make zipfile generate the zip64 EOCD block
133+
return {f"f{n}.py": (NOW, test_src) for n in range(65537)}
134+
131135
def doTest(self, expected_ext, files, *modules, **kw):
132136
self.makeZip(files, **kw)
133137

@@ -761,6 +765,14 @@ def testLargestPossibleComment(self):
761765
files = {TESTMOD + ".py": (NOW, test_src)}
762766
self.doTest(".py", files, TESTMOD, comment=b"c" * ((1 << 16) - 1))
763767

768+
def testZip64(self):
769+
files = self.getZip64Files()
770+
self.doTest(".py", files, "f6")
771+
772+
def testZip64CruftAndComment(self):
773+
files = self.getZip64Files()
774+
self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1))
775+
764776

765777
@support.requires_zlib()
766778
class CompressedZipImportTestCase(UncompressedZipImportTestCase):

Lib/zipimport.py

Lines changed: 126 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,14 @@ class ZipImportError(ImportError):
4040
_module_type = type(sys)
4141

4242
END_CENTRAL_DIR_SIZE = 22
43-
STRING_END_ARCHIVE = b'PK\x05\x06'
43+
END_CENTRAL_DIR_SIZE_64 = 56
44+
END_CENTRAL_DIR_LOCATOR_SIZE_64 = 20
45+
STRING_END_ARCHIVE = b'PK\x05\x06' # standard EOCD signature
46+
STRING_END_LOCATOR_64 = b'PK\x06\x07' # Zip64 EOCD Locator signature
47+
STRING_END_ZIP_64 = b'PK\x06\x06' # Zip64 EOCD signature
4448
MAX_COMMENT_LEN = (1 << 16) - 1
49+
MAX_UINT32 = 0xffffffff
50+
ZIP64_EXTRA_TAG = 0x1
4551

4652
class zipimporter(_bootstrap_external._LoaderBasics):
4753
"""zipimporter(archivepath) -> zipimporter object
@@ -352,49 +358,72 @@ def _read_directory(archive):
352358
# to not cause problems when some runs 'python3 /dev/fd/9 9<some_script'
353359
start_offset = fp.tell()
354360
try:
361+
# Check if there's a comment.
355362
try:
356-
fp.seek(-END_CENTRAL_DIR_SIZE, 2)
357-
header_position = fp.tell()
358-
buffer = fp.read(END_CENTRAL_DIR_SIZE)
363+
fp.seek(0, 2)
364+
file_size = fp.tell()
359365
except OSError:
360-
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
361-
if len(buffer) != END_CENTRAL_DIR_SIZE:
362-
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
363-
if buffer[:4] != STRING_END_ARCHIVE:
364-
# Bad: End of Central Dir signature
365-
# Check if there's a comment.
366-
try:
367-
fp.seek(0, 2)
368-
file_size = fp.tell()
369-
except OSError:
370-
raise ZipImportError(f"can't read Zip file: {archive!r}",
371-
path=archive)
372-
max_comment_start = max(file_size - MAX_COMMENT_LEN -
373-
END_CENTRAL_DIR_SIZE, 0)
374-
try:
375-
fp.seek(max_comment_start)
376-
data = fp.read()
377-
except OSError:
378-
raise ZipImportError(f"can't read Zip file: {archive!r}",
379-
path=archive)
380-
pos = data.rfind(STRING_END_ARCHIVE)
381-
if pos < 0:
382-
raise ZipImportError(f'not a Zip file: {archive!r}',
383-
path=archive)
366+
raise ZipImportError(f"can't read Zip file: {archive!r}",
367+
path=archive)
368+
max_comment_plus_dirs_size = (
369+
MAX_COMMENT_LEN + END_CENTRAL_DIR_SIZE +
370+
END_CENTRAL_DIR_SIZE_64 + END_CENTRAL_DIR_LOCATOR_SIZE_64)
371+
max_comment_start = max(file_size - max_comment_plus_dirs_size, 0)
372+
try:
373+
fp.seek(max_comment_start)
374+
data = fp.read(max_comment_plus_dirs_size)
375+
except OSError:
376+
raise ZipImportError(f"can't read Zip file: {archive!r}",
377+
path=archive)
378+
pos = data.rfind(STRING_END_ARCHIVE)
379+
pos64 = data.rfind(STRING_END_ZIP_64)
380+
381+
if (pos64 >= 0 and pos64+END_CENTRAL_DIR_SIZE_64+END_CENTRAL_DIR_LOCATOR_SIZE_64==pos):
382+
# Zip64 at "correct" offset from standard EOCD
383+
buffer = data[pos64:pos64 + END_CENTRAL_DIR_SIZE_64]
384+
if len(buffer) != END_CENTRAL_DIR_SIZE_64:
385+
raise ZipImportError(
386+
f"corrupt Zip64 file: Expected {END_CENTRAL_DIR_SIZE_64} byte "
387+
f"zip64 central directory, but read {len(buffer)} bytes.",
388+
path=archive)
389+
header_position = file_size - len(data) + pos64
390+
391+
central_directory_size = int.from_bytes(buffer[40:48], 'little')
392+
central_directory_position = int.from_bytes(buffer[48:56], 'little')
393+
num_entries = int.from_bytes(buffer[24:32], 'little')
394+
elif pos >= 0:
384395
buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
385396
if len(buffer) != END_CENTRAL_DIR_SIZE:
386397
raise ZipImportError(f"corrupt Zip file: {archive!r}",
387398
path=archive)
399+
388400
header_position = file_size - len(data) + pos
389401

390-
header_size = _unpack_uint32(buffer[12:16])
391-
header_offset = _unpack_uint32(buffer[16:20])
392-
if header_position < header_size:
402+
# Buffer now contains a valid EOCD, and header_position gives the
403+
# starting position of it.
404+
central_directory_size = _unpack_uint32(buffer[12:16])
405+
central_directory_position = _unpack_uint32(buffer[16:20])
406+
num_entries = _unpack_uint16(buffer[8:10])
407+
408+
# N.b. if someday you want to prefer the standard (non-zip64) EOCD,
409+
# you need to adjust position by 76 for arc to be 0.
410+
else:
411+
raise ZipImportError(f'not a Zip file: {archive!r}',
412+
path=archive)
413+
414+
# Buffer now contains a valid EOCD, and header_position gives the
415+
# starting position of it.
416+
# XXX: These are cursory checks but are not as exact or strict as they
417+
# could be. Checking the arc-adjusted value is probably good too.
418+
if header_position < central_directory_size:
393419
raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
394-
if header_position < header_offset:
420+
if header_position < central_directory_position:
395421
raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
396-
header_position -= header_size
397-
arc_offset = header_position - header_offset
422+
header_position -= central_directory_size
423+
# On just-a-zipfile these values are the same and arc_offset is zero; if
424+
# the file has some bytes prepended, `arc_offset` is the number of such
425+
# bytes. This is used for pex as well as self-extracting .exe.
426+
arc_offset = header_position - central_directory_position
398427
if arc_offset < 0:
399428
raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
400429

@@ -411,6 +440,11 @@ def _read_directory(archive):
411440
raise EOFError('EOF read where not expected')
412441
# Start of file header
413442
if buffer[:4] != b'PK\x01\x02':
443+
if count != num_entries:
444+
raise ZipImportError(
445+
f"mismatched num_entries: {count} should be {num_entries} in {archive!r}",
446+
path=archive,
447+
)
414448
break # Bad: Central Dir File Header
415449
if len(buffer) != 46:
416450
raise EOFError('EOF read where not expected')
@@ -426,9 +460,6 @@ def _read_directory(archive):
426460
comment_size = _unpack_uint16(buffer[32:34])
427461
file_offset = _unpack_uint32(buffer[42:46])
428462
header_size = name_size + extra_size + comment_size
429-
if file_offset > header_offset:
430-
raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
431-
file_offset += arc_offset
432463

433464
try:
434465
name = fp.read(name_size)
@@ -440,7 +471,10 @@ def _read_directory(archive):
440471
# slower than reading the data because fseek flushes stdio's
441472
# internal buffers. See issue #8745.
442473
try:
443-
if len(fp.read(header_size - name_size)) != header_size - name_size:
474+
extra_data_len = header_size - name_size
475+
extra_data = memoryview(fp.read(extra_data_len))
476+
477+
if len(extra_data) != extra_data_len:
444478
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
445479
except OSError:
446480
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
@@ -457,6 +491,60 @@ def _read_directory(archive):
457491

458492
name = name.replace('/', path_sep)
459493
path = _bootstrap_external._path_join(archive, name)
494+
495+
# Ordering matches unpacking below.
496+
if (
497+
file_size == MAX_UINT32 or
498+
data_size == MAX_UINT32 or
499+
file_offset == MAX_UINT32
500+
):
501+
# need to decode extra_data looking for a zip64 extra (which might not
502+
# be present)
503+
while extra_data:
504+
if len(extra_data) < 4:
505+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
506+
tag = _unpack_uint16(extra_data[:2])
507+
size = _unpack_uint16(extra_data[2:4])
508+
if len(extra_data) < 4 + size:
509+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
510+
if tag == ZIP64_EXTRA_TAG:
511+
if (len(extra_data) - 4) % 8 != 0:
512+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
513+
num_extra_values = (len(extra_data) - 4) // 8
514+
if num_extra_values > 3:
515+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
516+
values = struct.unpack_from(f"<{min(num_extra_values, 3)}Q",
517+
extra_data, offset=4)
518+
519+
# N.b. Here be dragons: the ordering of these is different than
520+
# the header fields, and it's really easy to get it wrong since
521+
# naturally-occuring zips that use all 3 are >4GB
522+
if file_size == MAX_UINT32:
523+
file_size = values.pop(0)
524+
if data_size == MAX_UINT32:
525+
data_size = values.pop(0)
526+
if file_offset == MAX_UINT32:
527+
file_offset = values.pop(0)
528+
529+
break
530+
531+
# For a typical zip, this bytes-slicing only happens 2-3 times, on
532+
# small data like timestamps and filesizes.
533+
extra_data = extra_data[4+size:]
534+
else:
535+
_bootstrap._verbose_message(
536+
"zipimport: suspected zip64 but no zip64 extra for {!r}",
537+
path,
538+
)
539+
# XXX These two statements seem swapped because `central_directory_position`
540+
# is a position within the actual file, but `file_offset` (when compared) is
541+
# as encoded in the entry, not adjusted for this file.
542+
# N.b. this must be after we've potentially read the zip64 extra which can
543+
# change `file_offset`.
544+
if file_offset > central_directory_position:
545+
raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
546+
file_offset += arc_offset
547+
460548
t = (path, compress, data_size, file_size, file_offset, time, date, crc)
461549
files[name] = t
462550
count += 1

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy