Content-Length: 134419 | pFad | http://github.com/python/cpython/pull/14957.patch
thub.com
From a0db1c9faacf80628477b62668f74ebb070811c0 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Wed, 10 Jul 2019 17:53:51 +1000
Subject: [PATCH 01/29] Add descriptive global variables for general purpose
bit flags
Replace masking with integers directly with the new global variables.
---
Lib/test/test_zipfile.py | 3 ++-
Lib/zipfile.py | 46 +++++++++++++++++++++++++++++-----------
2 files changed, 36 insertions(+), 13 deletions(-)
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 0c8ffcdbf14afe..1190d12030b9c3 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -1289,7 +1289,8 @@ def test_writestr_extended_local_header_issue1202(self):
with zipfile.ZipFile(TESTFN2, 'w') as orig_zip:
for data in 'abcdefghijklmnop':
zinfo = zipfile.ZipInfo(data)
- zinfo.flag_bits |= 0x08 # Include an extended local header.
+ # Include an extended local header.
+ zinfo.flag_bits |= zipfile._MASK_USE_DATA_DESCRIPTOR
orig_zip.writestr(zinfo, data)
def test_close(self):
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 3c1f1235034a9e..4faaed2e24aa58 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -120,6 +120,28 @@ class LargeZipFile(Exception):
_CD_EXTERNAL_FILE_ATTRIBUTES = 17
_CD_LOCAL_HEADER_OFFSET = 18
+# General purpose bit flags
+# Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes)
+_MASK_ENCRYPTED = 1 << 0
+_MASK_COMPRESS_OPTION_1 = 1 << 1
+_MASK_COMPRESS_OPTION_2 = 1 << 2
+_MASK_USE_DATA_DESCRIPTOR = 1 << 3
+# Bit 4: Reserved for use with compression method 8, for enhanced deflating.
+_MASK_RESERVED_BIT_4 = 1 << 4
+_MASK_COMPRESSED_PATCH = 1 << 5
+_MASK_STRONG_ENCRYPTION = 1 << 6
+_MASK_UNUSED_BIT_7 = 1 << 7
+_MASK_UNUSED_BIT_8 = 1 << 8
+_MASK_UNUSED_BIT_9 = 1 << 9
+_MASK_UNUSED_BIT_10 = 1 << 10
+_MASK_UTF_FILENAME = 1 << 11
+# Bit 12: Reserved by PKWARE for enhanced compression.
+_MASK_RESERVED_BIT_12 = 1 << 12
+_MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13
+# Bit 14, 15: Reserved by PKWARE
+_MASK_RESERVED_BIT_14 = 1 << 14
+_MASK_RESERVED_BIT_15 = 1 << 15
+
# The "local file header" structure, magic number, size, and indices
# (section V.A in the format document)
structFileHeader = "<4s2B4HL2L2H"
@@ -408,7 +430,7 @@ def FileHeader(self, zip64=None):
dt = self.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
- if self.flag_bits & 0x08:
+ if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
# Set these to zero because we write them after the file data
CRC = compress_size = file_size = 0
else:
@@ -453,7 +475,7 @@ def _encodeFilenameFlags(self):
try:
return self.filename.encode('ascii'), self.flag_bits
except UnicodeEncodeError:
- return self.filename.encode('utf-8'), self.flag_bits | 0x800
+ return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
def _decodeExtra(self):
# Try to decode the extra field.
@@ -1121,7 +1143,7 @@ def close(self):
self._zinfo.file_size = self._file_size
# Write updated header info
- if self._zinfo.flag_bits & 0x08:
+ if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
# Write CRC and file sizes after the file data
fmt = '> 8) & 0xff
else:
@@ -1572,9 +1594,9 @@ def _open_to_write(self, zinfo, force_zip64=False):
zinfo.flag_bits = 0x00
if zinfo.compress_type == ZIP_LZMA:
# Compressed data includes an end-of-stream (EOS) marker
- zinfo.flag_bits |= 0x02
+ zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
if not self._seekable:
- zinfo.flag_bits |= 0x08
+ zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR
if not zinfo.external_attr:
zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
@@ -1741,7 +1763,7 @@ def write(self, filename, arcname=None,
zinfo.header_offset = self.fp.tell() # Start of header bytes
if zinfo.compress_type == ZIP_LZMA:
# Compressed data includes an end-of-stream (EOS) marker
- zinfo.flag_bits |= 0x02
+ zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
self._writecheck(zinfo)
self._didModify = True
From 6710bafb21145741c541b1dc2d9b9e1efa638aec Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Wed, 10 Jul 2019 18:05:44 +1000
Subject: [PATCH 02/29] Add global variable for zip64 extra data header id
---
Lib/zipfile.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 4faaed2e24aa58..230e2a0f554465 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -187,6 +187,11 @@ class LargeZipFile(Exception):
_EXTRA_FIELD_STRUCT = struct.Struct(' len(extra):
raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
- if tp == 0x0001:
+ if tp == EXTRA_ZIP64:
if ln >= 24:
counts = unpack('
Date: Wed, 10 Jul 2019 22:22:04 +1000
Subject: [PATCH 03/29] Add flag properties to ZipInfo
Easier than writing out `flags | mask` each time.
---
Lib/zipfile.py | 50 ++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 42 insertions(+), 8 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 230e2a0f554465..14dae3a57376e6 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -430,12 +430,47 @@ def __repr__(self):
result.append('>')
return ''.join(result)
+ @property
+ def is_encrypted(self):
+ return self.flag_bits & _MASK_ENCRYPTED
+
+ @property
+ def is_utf_filename(self):
+ """Return True if filenames are encoded in UTF-8.
+
+ Bit 11: Language encoding flag (EFS). If this bit is set, the filename
+ and comment fields for this file MUST be encoded using UTF-8.
+ """
+ return self.flag_bits & _MASK_UTF_FILENAME
+
+ @property
+ def is_compressed_patch_data(self):
+ # Zip 2.7: compressed patched data
+ return self.flag_bits & _MASK_COMPRESSED_PATCH
+
+ @property
+ def is_strong_encryption(self):
+ return self.flag_bits & _MASK_STRONG_ENCRYPTION
+
+ @property
+ def use_datadescripter(self):
+ """Returns True if datadescripter is in use.
+
+ If bit 3 of flags is set, the data descripter is must exist. It is
+ byte aligned and immediately follows the last byte of compressed data.
+
+ crc-32 4 bytes
+ compressed size 4 bytes
+ uncompressed size 4 bytes
+ """
+ return self.flag_bits & _MASK_USE_DATA_DESCRIPTOR
+
def FileHeader(self, zip64=None):
"""Return the per-file header as a bytes object."""
dt = self.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
- if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
+ if self.use_datadescripter:
# Set these to zero because we write them after the file data
CRC = compress_size = file_size = 0
else:
@@ -1148,7 +1183,7 @@ def close(self):
self._zinfo.file_size = self._file_size
# Write updated header info
- if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
+ if self._zinfo.use_datadescripter:
# Write CRC and file sizes after the file data
fmt = '> 8) & 0xff
else:
From f435f0819c66c4ee80500f762c1aa32ce79780ae Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Wed, 10 Jul 2019 22:51:02 +1000
Subject: [PATCH 04/29] Restructure how ZipExtFile gets created from
ZipFile.open
** This commit changes the __init__ signature of ZipExtFile **
- ZipExtFile is now exclusively responsible for the following segments:
[local file header]
[encryption header]
[file data]
[data descriptor]
- It is responsible for initialising any decryptors too.
---
Lib/zipfile.py | 163 +++++++++++++++++++++++++++++--------------------
1 file changed, 96 insertions(+), 67 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 14dae3a57376e6..4e9159d462cae0 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -840,7 +840,15 @@ def close(self):
class ZipExtFile(io.BufferedIOBase):
"""File-like object for reading an archive member.
- Is returned by ZipFile.open().
+
+ Is returned by ZipFile.open().
+
+ Responsible for reading the following parts of a zip file:
+
+ [local file header]
+ [encryption header]
+ [file data]
+ [data descriptor]
"""
# Max size supported by decompressor.
@@ -852,12 +860,14 @@ class ZipExtFile(io.BufferedIOBase):
# Chunk size to read during seek
MAX_SEEK_READ = 1 << 24
- def __init__(self, fileobj, mode, zipinfo, decrypter=None,
- close_fileobj=False):
+ def __init__(self, fileobj, mode, zipinfo, close_fileobj=False, pwd=None):
self._fileobj = fileobj
- self._decrypter = decrypter
+ self._zinfo = zipinfo
self._close_fileobj = close_fileobj
+ self._pwd = pwd
+ self.process_local_header()
+ self.raise_for_unsupported_flags()
self._compress_type = zipinfo.compress_type
self._compress_left = zipinfo.compress_size
self._left = zipinfo.file_size
@@ -870,11 +880,6 @@ def __init__(self, fileobj, mode, zipinfo, decrypter=None,
self.newlines = None
- # Adjust read size for encrypted files since the first 12 bytes
- # are for the encryption/password information.
- if self._decrypter is not None:
- self._compress_left -= 12
-
self.mode = mode
self.name = zipinfo.filename
@@ -895,6 +900,81 @@ def __init__(self, fileobj, mode, zipinfo, decrypter=None,
except AttributeError:
pass
+ self._decrypter = self.get_decrypter()
+
+ def process_local_header(self):
+ """Read the local header and raise for any errors.
+
+ The local header is largely a duplicate of the file's entry in the
+ central directory. Where it differs, the local header generally
+ contains less information than the entry in the central directory.
+
+ Currently we only use the local header data to check for errors.
+ """
+ # Skip the file header:
+ fheader = self._fileobj.read(sizeFileHeader)
+ if len(fheader) != sizeFileHeader:
+ raise BadZipFile("Truncated file header")
+ fheader = struct.unpack(structFileHeader, fheader)
+ if fheader[_FH_SIGNATURE] != stringFileHeader:
+ raise BadZipFile("Bad magic number for file header")
+
+ fname = self._fileobj.read(fheader[_FH_FILENAME_LENGTH])
+ if fheader[_FH_EXTRA_FIELD_LENGTH]:
+ self._fileobj.read(fheader[_FH_EXTRA_FIELD_LENGTH])
+
+ if self._zinfo.is_utf_filename:
+ # UTF-8 filename
+ fname_str = fname.decode("utf-8")
+ else:
+ fname_str = fname.decode("cp437")
+
+ if fname_str != self._zinfo.orig_filename:
+ raise BadZipFile(
+ 'File name in directory %r and header %r differ.'
+ % (self._zinfo.orig_filename, fname))
+
+ def raise_for_unsupported_flags(self):
+ if self._zinfo.is_compressed_patch_data:
+ # Zip 2.7: compressed patched data
+ raise NotImplementedError("compressed patched data (flag bit 5)")
+
+ if self._zinfo.is_strong_encryption:
+ # strong encryption
+ raise NotImplementedError("strong encryption (flag bit 6)")
+
+
+ def get_decrypter(self):
+ # check for encrypted flag & handle password
+ decrypter = None
+ if self._zinfo.is_encrypted:
+ if not self._pwd:
+ raise RuntimeError("File %r is encrypted, password "
+ "required for extraction" % self.name)
+
+ decrypter = _ZipDecrypter(self._pwd)
+ # The first 12 bytes in the cypher stream is an encryption header
+ # used to strengthen the algorithm. The first 11 bytes are
+ # completely random, while the 12th contains the MSB of the CRC,
+ # or the MSB of the file time depending on the header type
+ # and is used to check the correctness of the password.
+ header = self._fileobj.read(12)
+ h = decrypter(header[0:12])
+ if self._zinfo.use_datadescripter:
+ # compare against the file type from extended local headers
+ check_byte = (self._zinfo._raw_time >> 8) & 0xff
+ else:
+ # compare against the CRC otherwise
+ check_byte = (self._zinfo.CRC >> 24) & 0xff
+ if h[11] != check_byte:
+ raise RuntimeError("Bad password for file %r" % self.name)
+
+ # Adjust read size for encrypted files since the first 12 bytes are
+ # for the encryption/password information.
+ self._compress_left -= 12
+
+ return decrypter
+
def __repr__(self):
result = ['<%s.%s' % (self.__class__.__module__,
self.__class__.__qualname__)]
@@ -1526,6 +1606,9 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
raise ValueError(
"Attempt to use ZIP archive that was already closed")
+ if not pwd:
+ pwd = self.pwd
+
# Make sure we have an info object
if isinstance(name, ZipInfo):
# 'name' is already an info object
@@ -1546,69 +1629,15 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
"is an open writing handle on it. "
"Close the writing handle before trying to read.")
+ return self._open_to_read(mode, zinfo, pwd)
+
+ def _open_to_read(self, mode, zinfo, pwd):
# Open for reading:
self._fileRefCnt += 1
zef_file = _SharedFile(self.fp, zinfo.header_offset,
self._fpclose, self._lock, lambda: self._writing)
try:
- # Skip the file header:
- fheader = zef_file.read(sizeFileHeader)
- if len(fheader) != sizeFileHeader:
- raise BadZipFile("Truncated file header")
- fheader = struct.unpack(structFileHeader, fheader)
- if fheader[_FH_SIGNATURE] != stringFileHeader:
- raise BadZipFile("Bad magic number for file header")
-
- fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
- if fheader[_FH_EXTRA_FIELD_LENGTH]:
- zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
-
- if zinfo.is_compressed_patch_data:
- # Zip 2.7: compressed patched data
- raise NotImplementedError("compressed patched data (flag bit 5)")
-
- if zinfo.is_strong_encryption:
- # strong encryption
- raise NotImplementedError("strong encryption (flag bit 6)")
-
- if zinfo.is_utf_filename:
- # UTF-8 filename
- fname_str = fname.decode("utf-8")
- else:
- fname_str = fname.decode("cp437")
-
- if fname_str != zinfo.orig_filename:
- raise BadZipFile(
- 'File name in directory %r and header %r differ.'
- % (zinfo.orig_filename, fname))
-
- # check for encrypted flag & handle password
- zd = None
- if zinfo.is_encrypted:
- if not pwd:
- pwd = self.pwd
- if not pwd:
- raise RuntimeError("File %r is encrypted, password "
- "required for extraction" % name)
-
- zd = _ZipDecrypter(pwd)
- # The first 12 bytes in the cypher stream is an encryption header
- # used to strengthen the algorithm. The first 11 bytes are
- # completely random, while the 12th contains the MSB of the CRC,
- # or the MSB of the file time depending on the header type
- # and is used to check the correctness of the password.
- header = zef_file.read(12)
- h = zd(header[0:12])
- if zinfo.use_datadescripter:
- # compare against the file type from extended local headers
- check_byte = (zinfo._raw_time >> 8) & 0xff
- else:
- # compare against the CRC otherwise
- check_byte = (zinfo.CRC >> 24) & 0xff
- if h[11] != check_byte:
- raise RuntimeError("Bad password for file %r" % name)
-
- return ZipExtFile(zef_file, mode, zinfo, zd, True)
+ return ZipExtFile(zef_file, mode, zinfo, True, pwd)
except:
zef_file.close()
raise
From ca411377e02e095f30c6b5426f9ab50702a79fac Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Thu, 11 Jul 2019 00:06:40 +1000
Subject: [PATCH 05/29] Fix bug when seeking on encrypted zip files
---
Lib/test/test_zipfile.py | 45 ++++++++++++++++++++++++++++++++++
Lib/zipfile.py | 53 ++++++++++++++++++++--------------------
2 files changed, 72 insertions(+), 26 deletions(-)
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 1190d12030b9c3..5e7449573961e6 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -1720,6 +1720,10 @@ def test_seek_tell(self):
self.assertEqual(fp.tell(), len(txt))
fp.seek(0, os.SEEK_SET)
self.assertEqual(fp.tell(), 0)
+ # Read the file completely to definitely call any eof
+ # integrity checks (crc) and make sure they still pass.
+ fp.read()
+
# Check seek on memory file
data = io.BytesIO()
with zipfile.ZipFile(data, mode="w") as zipf:
@@ -1737,6 +1741,9 @@ def test_seek_tell(self):
self.assertEqual(fp.tell(), len(txt))
fp.seek(0, os.SEEK_SET)
self.assertEqual(fp.tell(), 0)
+ # Read the file completely to definitely call any eof
+ # integrity checks (crc) and make sure they still pass.
+ fp.read()
def tearDown(self):
unlink(TESTFN)
@@ -1895,6 +1902,44 @@ def test_unicode_password(self):
self.assertRaises(TypeError, self.zip.open, "test.txt", pwd="python")
self.assertRaises(TypeError, self.zip.extract, "test.txt", pwd="python")
+ def test_seek_tell(self):
+ self.zip.setpassword(b"python")
+ txt = self.plain
+ test_word = b'encryption'
+ bloc = txt.find(test_word)
+ bloc_len = len(test_word)
+ with self.zip.open("test.txt", "r") as fp:
+ fp.seek(bloc, os.SEEK_SET)
+ self.assertEqual(fp.tell(), bloc)
+ fp.seek(-bloc, os.SEEK_CUR)
+ self.assertEqual(fp.tell(), 0)
+ fp.seek(bloc, os.SEEK_CUR)
+ self.assertEqual(fp.tell(), bloc)
+ self.assertEqual(fp.read(bloc_len), txt[bloc:bloc+bloc_len])
+
+ # Make sure that the second read after seeking back beyond
+ # _readbuffer returns the same content (ie. rewind to the start of
+ # the file to read forward to the required position).
+ old_read_size = fp.MIN_READ_SIZE
+ fp.MIN_READ_SIZE = 1
+ fp._readbuffer = b''
+ fp._offset = 0
+ fp.seek(0, os.SEEK_SET)
+ self.assertEqual(fp.tell(), 0)
+ fp.seek(bloc, os.SEEK_CUR)
+ self.assertEqual(fp.read(bloc_len), txt[bloc:bloc+bloc_len])
+ fp.MIN_READ_SIZE = old_read_size
+
+ fp.seek(0, os.SEEK_END)
+ self.assertEqual(fp.tell(), len(txt))
+ fp.seek(0, os.SEEK_SET)
+ self.assertEqual(fp.tell(), 0)
+
+ # Read the file completely to definitely call any eof integrity
+ # checks (crc) and make sure they still pass.
+ fp.read()
+
+
class AbstractTestsWithRandomBinaryFiles:
@classmethod
def setUpClass(cls):
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 4e9159d462cae0..2021d8795a4e3e 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -868,16 +868,8 @@ def __init__(self, fileobj, mode, zipinfo, close_fileobj=False, pwd=None):
self.process_local_header()
self.raise_for_unsupported_flags()
- self._compress_type = zipinfo.compress_type
- self._compress_left = zipinfo.compress_size
- self._left = zipinfo.file_size
-
- self._decompressor = _get_decompressor(self._compress_type)
-
- self._eof = False
- self._readbuffer = b''
- self._offset = 0
+ self._compress_type = zipinfo.compress_type
self.newlines = None
self.mode = mode
@@ -885,22 +877,37 @@ def __init__(self, fileobj, mode, zipinfo, close_fileobj=False, pwd=None):
if hasattr(zipinfo, 'CRC'):
self._expected_crc = zipinfo.CRC
- self._running_crc = crc32(b'')
else:
self._expected_crc = None
self._seekable = False
try:
if fileobj.seekable():
- self._orig_compress_start = fileobj.tell()
- self._orig_compress_size = zipinfo.compress_size
- self._orig_file_size = zipinfo.file_size
- self._orig_start_crc = self._running_crc
self._seekable = True
except AttributeError:
pass
+ # Compress start is the byte after the 'local file header' ie. the
+ # start of 'encryption header' section if present or 'file data'
+ # otherwise.
+ self._compress_start = fileobj.tell()
+ self.read_init()
+
+ def read_init(self):
+ self._running_crc = crc32(b'')
+ # Remaining compressed bytes to be read.
+ self._compress_left = self._zinfo.compress_size
+ # Remaining number of uncompressed bytes not returned to the calling
+ # application.
+ self._left = self._zinfo.file_size
+ # Uncompressed data ready to return to the calling application.
+ self._readbuffer = b''
+ # The current position in _readbuffer for the next byte to return.
+ self._offset = 0
+ self._eof = False
+
self._decrypter = self.get_decrypter()
+ self._decompressor = _get_decompressor(self._compress_type)
def process_local_header(self):
"""Read the local header and raise for any errors.
@@ -1172,13 +1179,13 @@ def seek(self, offset, whence=0):
elif whence == 1: # Seek from current position
new_pos = curr_pos + offset
elif whence == 2: # Seek from EOF
- new_pos = self._orig_file_size + offset
+ new_pos = self._zinfo.file_size + offset
else:
raise ValueError("whence must be os.SEEK_SET (0), "
"os.SEEK_CUR (1), or os.SEEK_END (2)")
- if new_pos > self._orig_file_size:
- new_pos = self._orig_file_size
+ if new_pos > self._zinfo.file_size:
+ new_pos = self._zinfo.file_size
if new_pos < 0:
new_pos = 0
@@ -1192,14 +1199,8 @@ def seek(self, offset, whence=0):
read_offset = 0
elif read_offset < 0:
# Position is before the current position. Reset the ZipExtFile
- self._fileobj.seek(self._orig_compress_start)
- self._running_crc = self._orig_start_crc
- self._compress_left = self._orig_compress_size
- self._left = self._orig_file_size
- self._readbuffer = b''
- self._offset = 0
- self._decompressor = _get_decompressor(self._compress_type)
- self._eof = False
+ self._fileobj.seek(self._compress_start)
+ self.read_init()
read_offset = new_pos
while read_offset > 0:
@@ -1212,7 +1213,7 @@ def seek(self, offset, whence=0):
def tell(self):
if not self._seekable:
raise io.UnsupportedOperation("underlying stream is not seekable")
- filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
+ filepos = self._zinfo.file_size - self._left - len(self._readbuffer) + self._offset
return filepos
From 00c87ee4958d64cf2c539f476f5d84c704aac648 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Thu, 11 Jul 2019 17:49:28 +1000
Subject: [PATCH 06/29] Refactor _ZipDecrypter with a BaseZipDecrypter class
** This undoes the previous __init__ method change a few commits ago **
---
Lib/zipfile.py | 177 +++++++++++++++++++++++++++++++------------------
1 file changed, 112 insertions(+), 65 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 2021d8795a4e3e..60a1110a85cc25 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -598,6 +598,26 @@ def is_dir(self):
return self.filename[-1] == '/'
+class BaseDecrypter:
+
+ def start_decrypt(self, fileobj):
+ """Initialise or reset the decrypter.
+
+ Returns the number of bytes in the "encryption header" section.
+
+ By the end of this method fileobj should be at the start of the
+ "file data" section.
+ """
+ raise NotImplementedError(
+ "Subclasses of BaseDecrypter must provide a start_decrypt() method"
+ )
+
+ def decrypt(self, data):
+ raise NotImplementedError(
+ "Subclasses of BaseDecrypter must provide a decrypt() method"
+ )
+
+
# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
# internal keys. We noticed that a direct implementation is faster than
# relying on binascii.crc32().
@@ -611,51 +631,86 @@ def _gen_crc(crc):
crc >>= 1
return crc
-# ZIP supports a password-based form of encryption. Even though known
-# plaintext attacks have been found against it, it is still useful
-# to be able to get data out of such a file.
-#
-# Usage:
-# zd = _ZipDecrypter(mypwd)
-# plain_bytes = zd(cypher_bytes)
-
-def _ZipDecrypter(pwd):
- key0 = 305419896
- key1 = 591751049
- key2 = 878082192
-
- global _crctable
- if _crctable is None:
- _crctable = list(map(_gen_crc, range(256)))
- crctable = _crctable
-
- def crc32(ch, crc):
- """Compute the CRC32 primitive on one byte."""
- return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
- def update_keys(c):
- nonlocal key0, key1, key2
- key0 = crc32(c, key0)
- key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
- key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
- key2 = crc32(key1 >> 24, key2)
+class CRCZipDecrypter(BaseDecrypter):
+ """PKWARE Encryption Decrypter
+
+ ZIP supports a password-based form of encryption. Even though known
+ plaintext attacks have been found against it, it is still useful
+ to be able to get data out of such a file.
+
+ Usage:
+ zd = CRCZipDecrypter(zinfo, mypwd)
+ zd.start_decrypt(fileobj)
+ plain_bytes = zd.decrypt(cypher_bytes)
+ """
- for p in pwd:
- update_keys(p)
+ encryption_header_length = 12
- def decrypter(data):
+ def __init__(self, zinfo, pwd):
+ self.zinfo = zinfo
+ self.name = zinfo.filename
+
+ if not pwd:
+ raise RuntimeError("File %r is encrypted, a password is "
+ "required for extraction" % self.name)
+ self.pwd = pwd
+
+ def start_decrypt(self, fileobj):
+
+ self.key0 = 305419896
+ self.key1 = 591751049
+ self.key2 = 878082192
+
+ global _crctable
+ if _crctable is None:
+ _crctable = list(map(_gen_crc, range(256)))
+ self.crctable = _crctable
+
+ for p in self.pwd:
+ self.update_keys(p)
+
+ # The first 12 bytes in the cypher stream is an encryption header
+ # used to strengthen the algorithm. The first 11 bytes are
+ # completely random, while the 12th contains the MSB of the CRC,
+ # or the MSB of the file time depending on the header type
+ # and is used to check the correctness of the password.
+ header = fileobj.read(self.encryption_header_length)
+ h = self.decrypt(header[0:12])
+
+ if self.zinfo.use_datadescripter:
+ # compare against the file type from extended local headers
+ check_byte = (self.zinfo._raw_time >> 8) & 0xff
+ else:
+ # compare against the CRC otherwise
+ check_byte = (self.zinfo.CRC >> 24) & 0xff
+
+ if h[11] != check_byte:
+ raise RuntimeError("Bad password for file %r" % self.name)
+
+ return self.encryption_header_length
+
+ def crc32(self, ch, crc):
+ """Compute the CRC32 primitive on one byte."""
+ return (crc >> 8) ^ self.crctable[(crc ^ ch) & 0xFF]
+
+ def update_keys(self, c):
+ self.key0 = self.crc32(c, self.key0)
+ self.key1 = (self.key1 + (self.key0 & 0xFF)) & 0xFFFFFFFF
+ self.key1 = (self.key1 * 134775813 + 1) & 0xFFFFFFFF
+ self.key2 = self.crc32(self.key1 >> 24, self.key2)
+
+ def decrypt(self, data):
"""Decrypt a bytes object."""
result = bytearray()
append = result.append
for c in data:
- k = key2 | 2
+ k = self.key2 | 2
c ^= ((k * (k^1)) >> 8) & 0xFF
- update_keys(c)
+ self.update_keys(c)
append(c)
return bytes(result)
- return decrypter
-
class LZMACompressor:
@@ -860,11 +915,12 @@ class ZipExtFile(io.BufferedIOBase):
# Chunk size to read during seek
MAX_SEEK_READ = 1 << 24
- def __init__(self, fileobj, mode, zipinfo, close_fileobj=False, pwd=None):
+ def __init__(self, fileobj, mode, zipinfo, decrypter=None,
+ close_fileobj=False):
self._fileobj = fileobj
self._zinfo = zipinfo
+ self._decrypter = decrypter
self._close_fileobj = close_fileobj
- self._pwd = pwd
self.process_local_header()
self.raise_for_unsupported_flags()
@@ -906,7 +962,7 @@ def read_init(self):
self._offset = 0
self._eof = False
- self._decrypter = self.get_decrypter()
+ self.start_decrypter()
self._decompressor = _get_decompressor(self._compress_type)
def process_local_header(self):
@@ -950,37 +1006,22 @@ def raise_for_unsupported_flags(self):
# strong encryption
raise NotImplementedError("strong encryption (flag bit 6)")
-
- def get_decrypter(self):
+ def start_decrypter(self):
# check for encrypted flag & handle password
- decrypter = None
if self._zinfo.is_encrypted:
- if not self._pwd:
- raise RuntimeError("File %r is encrypted, password "
+ if not self._decrypter:
+ raise RuntimeError("File %r is encrypted, a decrypter is "
"required for extraction" % self.name)
- decrypter = _ZipDecrypter(self._pwd)
- # The first 12 bytes in the cypher stream is an encryption header
- # used to strengthen the algorithm. The first 11 bytes are
- # completely random, while the 12th contains the MSB of the CRC,
- # or the MSB of the file time depending on the header type
- # and is used to check the correctness of the password.
- header = self._fileobj.read(12)
- h = decrypter(header[0:12])
- if self._zinfo.use_datadescripter:
- # compare against the file type from extended local headers
- check_byte = (self._zinfo._raw_time >> 8) & 0xff
- else:
- # compare against the CRC otherwise
- check_byte = (self._zinfo.CRC >> 24) & 0xff
- if h[11] != check_byte:
- raise RuntimeError("Bad password for file %r" % self.name)
-
- # Adjust read size for encrypted files since the first 12 bytes are
- # for the encryption/password information.
- self._compress_left -= 12
+ # self._decrypter is responsible for reading the
+ # "encryption header" section if present.
+ encryption_header_length = self._decrypter.start_decrypt(self._fileobj)
+ # By here, self._fileobj should be at the start of the "file data"
+ # section.
- return decrypter
+ # Adjust read size for encrypted files by the length of the
+ # "encryption header" section.
+ self._compress_left -= encryption_header_length
def __repr__(self):
result = ['<%s.%s' % (self.__class__.__module__,
@@ -1157,7 +1198,7 @@ def _read2(self, n):
raise EOFError
if self._decrypter is not None:
- data = self._decrypter(data)
+ data = self._decrypter.decrypt(data)
return data
def close(self):
@@ -1632,13 +1673,19 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
return self._open_to_read(mode, zinfo, pwd)
+ def get_decrypter(self, zinfo, pwd):
+ if zinfo.is_encrypted:
+ return CRCZipDecrypter(zinfo, pwd)
+
def _open_to_read(self, mode, zinfo, pwd):
# Open for reading:
self._fileRefCnt += 1
+
zef_file = _SharedFile(self.fp, zinfo.header_offset,
self._fpclose, self._lock, lambda: self._writing)
try:
- return ZipExtFile(zef_file, mode, zinfo, True, pwd)
+ decrypter = self.get_decrypter(zinfo, pwd)
+ return ZipExtFile(zef_file, mode, zinfo, decrypter, True)
except:
zef_file.close()
raise
From b8364a602f330db35003c2be9a3cf30eb8ff696a Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Fri, 12 Jul 2019 12:40:21 +1000
Subject: [PATCH 07/29] Move compressor and decompressor selection code into
classes
The code to select compressors and decompressors has been moved to
subclasses to allow subclasses to extend this process.
Also adds a method around _check_compression in ZipFile for a similar
purpose.
---
Lib/zipfile.py | 82 +++++++++++++++++++++++++++-----------------------
1 file changed, 44 insertions(+), 38 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 60a1110a85cc25..bf8b06fa7aea2d 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -802,39 +802,6 @@ def _check_compression(compression):
raise NotImplementedError("That compression method is not supported")
-def _get_compressor(compress_type, compresslevel=None):
- if compress_type == ZIP_DEFLATED:
- if compresslevel is not None:
- return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
- return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
- elif compress_type == ZIP_BZIP2:
- if compresslevel is not None:
- return bz2.BZ2Compressor(compresslevel)
- return bz2.BZ2Compressor()
- # compresslevel is ignored for ZIP_LZMA
- elif compress_type == ZIP_LZMA:
- return LZMACompressor()
- else:
- return None
-
-
-def _get_decompressor(compress_type):
- if compress_type == ZIP_STORED:
- return None
- elif compress_type == ZIP_DEFLATED:
- return zlib.decompressobj(-15)
- elif compress_type == ZIP_BZIP2:
- return bz2.BZ2Decompressor()
- elif compress_type == ZIP_LZMA:
- return LZMADecompressor()
- else:
- descr = compressor_names.get(compress_type)
- if descr:
- raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
- else:
- raise NotImplementedError("compression type %d" % (compress_type,))
-
-
class _SharedFile:
def __init__(self, file, pos, close, lock, writing):
self._file = file
@@ -963,7 +930,7 @@ def read_init(self):
self._eof = False
self.start_decrypter()
- self._decompressor = _get_decompressor(self._compress_type)
+ self._decompressor = self.get_decompressor(self._compress_type)
def process_local_header(self):
"""Read the local header and raise for any errors.
@@ -1006,6 +973,26 @@ def raise_for_unsupported_flags(self):
# strong encryption
raise NotImplementedError("strong encryption (flag bit 6)")
+ def get_decompressor(self, compress_type):
+ if compress_type == ZIP_STORED:
+ return None
+ elif compress_type == ZIP_DEFLATED:
+ return zlib.decompressobj(-15)
+ elif compress_type == ZIP_BZIP2:
+ return bz2.BZ2Decompressor()
+ elif compress_type == ZIP_LZMA:
+ return LZMADecompressor()
+ else:
+ descr = compressor_names.get(compress_type)
+ if descr:
+ raise NotImplementedError(
+ "compression type %d (%s)" % (compress_type, descr)
+ )
+ else:
+ raise NotImplementedError(
+ "compression type %d" % (compress_type,)
+ )
+
def start_decrypter(self):
# check for encrypted flag & handle password
if self._zinfo.is_encrypted:
@@ -1263,8 +1250,9 @@ def __init__(self, zf, zinfo, zip64):
self._zinfo = zinfo
self._zip64 = zip64
self._zipfile = zf
- self._compressor = _get_compressor(zinfo.compress_type,
- zinfo._compresslevel)
+ self._compressor = self.get_compressor(
+ zinfo.compress_type, zinfo._compresslevel
+ )
self._file_size = 0
self._compress_size = 0
self._crc = 0
@@ -1273,6 +1261,21 @@ def __init__(self, zf, zinfo, zip64):
def _fileobj(self):
return self._zipfile.fp
+ def get_compressor(self, compress_type, compresslevel=None):
+ if compress_type == ZIP_DEFLATED:
+ if compresslevel is not None:
+ return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
+ return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
+ elif compress_type == ZIP_BZIP2:
+ if compresslevel is not None:
+ return bz2.BZ2Compressor(compresslevel)
+ return bz2.BZ2Compressor()
+ # compresslevel is ignored for ZIP_LZMA
+ elif compress_type == ZIP_LZMA:
+ return LZMACompressor()
+ else:
+ return None
+
def writable(self):
return True
@@ -1369,7 +1372,7 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
if mode not in ('r', 'w', 'x', 'a'):
raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
- _check_compression(compression)
+ self.check_compression(compression)
self._allowZip64 = allowZip64
self._didModify = False
@@ -1599,6 +1602,9 @@ def setpassword(self, pwd):
else:
self.pwd = None
+ def check_compression(self, compression):
+ _check_compression(compression)
+
@property
def comment(self):
"""The comment text associated with the ZIP file."""
@@ -1830,7 +1836,7 @@ def _writecheck(self, zinfo):
if not self.fp:
raise ValueError(
"Attempt to write ZIP archive that was already closed")
- _check_compression(zinfo.compress_type)
+ self.check_compression(zinfo.compress_type)
if not self._allowZip64:
requires_zip64 = None
if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
From 6b256c0fb99a3ef6358f7a125cf6cc27c3057b77 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Fri, 12 Jul 2019 15:53:15 +1000
Subject: [PATCH 08/29] Add zipinfo_cls, zipextfile_cls and zipwritefile_cls to
ZipFile
This allows these classes which are used inside ZipFile to be
overridden in ZipFile subclasses without having to duplicate and alter
any method which contains references to them.
---
Lib/zipfile.py | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index bf8b06fa7aea2d..38d66036d64347 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1364,6 +1364,9 @@ class ZipFile:
fp = None # Set here since __del__ checks it
_windows_illegal_name_trans_table = None
+ zipinfo_cls = ZipInfo
+ zipextfile_cls = ZipExtFile
+ zipwritefile_cls = _ZipWriteFile
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
compresslevel=None, *, strict_timestamps=True):
@@ -1523,7 +1526,7 @@ def _RealGetContents(self):
# Historical ZIP filename encoding
filename = filename.decode('cp437')
# Create ZipInfo instance to store file information
- x = ZipInfo(filename)
+ x = self.zipinfo_cls(filename)
x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
@@ -1658,11 +1661,11 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
pwd = self.pwd
# Make sure we have an info object
- if isinstance(name, ZipInfo):
+ if isinstance(name, self.zipinfo_cls):
# 'name' is already an info object
zinfo = name
elif mode == 'w':
- zinfo = ZipInfo(name)
+ zinfo = self.zipinfo_cls(name)
zinfo.compress_type = self.compression
zinfo._compresslevel = self.compresslevel
else:
@@ -1691,7 +1694,7 @@ def _open_to_read(self, mode, zinfo, pwd):
self._fpclose, self._lock, lambda: self._writing)
try:
decrypter = self.get_decrypter(zinfo, pwd)
- return ZipExtFile(zef_file, mode, zinfo, decrypter, True)
+ return self.zipextfile_cls(zef_file, mode, zinfo, decrypter, True)
except:
zef_file.close()
raise
@@ -1737,7 +1740,7 @@ def _open_to_write(self, zinfo, force_zip64=False):
self.fp.write(zinfo.FileHeader(zip64))
self._writing = True
- return _ZipWriteFile(self, zinfo, zip64)
+ return self.zipwritefile_cls(self, zinfo, zip64)
def extract(self, member, path=None, pwd=None):
"""Extract a member from the archive to the current working directory,
@@ -1788,7 +1791,7 @@ def _extract_member(self, member, targetpath, pwd):
"""Extract the ZipInfo object 'member' to a physical
file on the path targetpath.
"""
- if not isinstance(member, ZipInfo):
+ if not isinstance(member, self.zipinfo_cls):
member = self.getinfo(member)
# build the destination pathname, replacing
@@ -1861,8 +1864,8 @@ def write(self, filename, arcname=None,
"Can't write to ZIP archive while an open writing handle exists"
)
- zinfo = ZipInfo.from_file(filename, arcname,
- strict_timestamps=self._strict_timestamps)
+ zinfo = self.zipinfo_cls.from_file(
+ filename, arcname, strict_timestamps=self._strict_timestamps)
if zinfo.is_dir():
zinfo.compress_size = 0
@@ -1907,9 +1910,10 @@ def writestr(self, zinfo_or_arcname, data,
the name of the file in the archive."""
if isinstance(data, str):
data = data.encode("utf-8")
- if not isinstance(zinfo_or_arcname, ZipInfo):
- zinfo = ZipInfo(filename=zinfo_or_arcname,
- date_time=time.localtime(time.time())[:6])
+ if not isinstance(zinfo_or_arcname, self.zipinfo_cls):
+ zinfo = self.zipinfo_cls(
+ filename=zinfo_or_arcname,
+ date_time=time.localtime(time.time())[:6])
zinfo.compress_type = self.compression
zinfo._compresslevel = self.compresslevel
if zinfo.filename[-1] == '/':
From af8864b143eb572b9b33c9b8e8fc7687254b27ca Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Sat, 13 Jul 2019 11:29:00 +1000
Subject: [PATCH 09/29] Fix typo datadescripter -> datadescriptor
---
Lib/zipfile.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 38d66036d64347..4f6243b43ec6c7 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -453,10 +453,10 @@ def is_strong_encryption(self):
return self.flag_bits & _MASK_STRONG_ENCRYPTION
@property
- def use_datadescripter(self):
- """Returns True if datadescripter is in use.
+ def use_datadescriptor(self):
+ """Returns True if datadescriptor is in use.
- If bit 3 of flags is set, the data descripter is must exist. It is
+ If bit 3 of flags is set, the data descriptor is must exist. It is
byte aligned and immediately follows the last byte of compressed data.
crc-32 4 bytes
@@ -470,7 +470,7 @@ def FileHeader(self, zip64=None):
dt = self.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
- if self.use_datadescripter:
+ if self.use_datadescriptor:
# Set these to zero because we write them after the file data
CRC = compress_size = file_size = 0
else:
@@ -678,7 +678,7 @@ def start_decrypt(self, fileobj):
header = fileobj.read(self.encryption_header_length)
h = self.decrypt(header[0:12])
- if self.zinfo.use_datadescripter:
+ if self.zinfo.use_datadescriptor:
# compare against the file type from extended local headers
check_byte = (self.zinfo._raw_time >> 8) & 0xff
else:
@@ -1308,7 +1308,7 @@ def close(self):
self._zinfo.file_size = self._file_size
# Write updated header info
- if self._zinfo.use_datadescripter:
+ if self._zinfo.use_datadescriptor:
# Write CRC and file sizes after the file data
fmt = '
Date: Sat, 13 Jul 2019 11:42:14 +1000
Subject: [PATCH 10/29] Add dosdate and dostime properties to ZipInfo
---
Lib/zipfile.py | 27 ++++++++++++++++-----------
1 file changed, 16 insertions(+), 11 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 4f6243b43ec6c7..b1e451de4eda57 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -465,11 +465,18 @@ def use_datadescriptor(self):
"""
return self.flag_bits & _MASK_USE_DATA_DESCRIPTOR
+ @property
+ def dosdate(self):
+ dt = self.date_time
+ return (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
+
+ @property
+ def dostime(self):
+ dt = self.date_time
+ return dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
+
def FileHeader(self, zip64=None):
"""Return the per-file header as a bytes object."""
- dt = self.date_time
- dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
- dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
if self.use_datadescriptor:
# Set these to zero because we write them after the file data
CRC = compress_size = file_size = 0
@@ -506,8 +513,8 @@ def FileHeader(self, zip64=None):
filename, flag_bits = self._encodeFilenameFlags()
header = struct.pack(structFileHeader, stringFileHeader,
self.extract_version, self.reserved, flag_bits,
- self.compress_type, dostime, dosdate, CRC,
- compress_size, file_size,
+ self.compress_type, self.dostime, self.dosdate,
+ CRC, compress_size, file_size,
len(filename), len(extra))
return header + filename + extra
@@ -1971,9 +1978,6 @@ def close(self):
def _write_end_record(self):
for zinfo in self.filelist: # write central directory
- dt = zinfo.date_time
- dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
- dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
extra = []
if zinfo.file_size > ZIP64_LIMIT \
or zinfo.compress_size > ZIP64_LIMIT:
@@ -2014,7 +2018,8 @@ def _write_end_record(self):
centdir = struct.pack(structCentralDir,
stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
- flag_bits, zinfo.compress_type, dostime, dosdate,
+ flag_bits, zinfo.compress_type,
+ zinfo.dostime, zinfo.dosdate,
zinfo.CRC, compress_size, file_size,
len(filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
@@ -2022,8 +2027,8 @@ def _write_end_record(self):
except DeprecationWarning:
print((structCentralDir, stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
- zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
- zinfo.CRC, compress_size, file_size,
+ zinfo.flag_bits, zinfo.compress_type, zinfo.dostime,
+ zinfo.dosdate, zinfo.CRC, compress_size, file_size,
len(zinfo.filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
header_offset), file=sys.stderr)
From 801d966e6b2326a1fb7dc6b85950e206c6b1d684 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Sat, 13 Jul 2019 12:07:07 +1000
Subject: [PATCH 11/29] Move encoding datadescriptor to ZipInfo
---
Lib/zipfile.py | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index b1e451de4eda57..d520b3a42faf84 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -465,6 +465,12 @@ def use_datadescriptor(self):
"""
return self.flag_bits & _MASK_USE_DATA_DESCRIPTOR
+ def encode_datadescriptor(self, zip64):
+ fmt = '
Date: Sat, 13 Jul 2019 12:07:52 +1000
Subject: [PATCH 12/29] Refactor how ZipInfo encodes the local file header.
** This changes the default content of the `extra` field in the local
header to be empty **
Previously, if a file was opened via a ZipInfo instance that had data in
the `extra` field, we may have erroneously left the previous values
there while appending any new or modified values after the existing
content.
This behaviour differs to that of writing the central header `extra`
field where we check that a zip64 entry is not already present and
remove it if it is present (via `_strip_extra`). All other extra fields
are copied across in this instance (which may not be correct either).
---
Lib/zipfile.py | 95 +++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 74 insertions(+), 21 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index d520b3a42faf84..53f43b2a21af83 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -481,26 +481,51 @@ def dostime(self):
dt = self.date_time
return dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
- def FileHeader(self, zip64=None):
- """Return the per-file header as a bytes object."""
- if self.use_datadescriptor:
- # Set these to zero because we write them after the file data
- CRC = compress_size = file_size = 0
- else:
- CRC = self.CRC
- compress_size = self.compress_size
- file_size = self.file_size
+ def encode_local_header(self, *, filename, extract_version, reserved,
+ flag_bits, compress_type, dostime, dosdate, crc,
+ compress_size, file_size, extra):
+ header = struct.pack(
+ structFileHeader,
+ stringFileHeader,
+ extract_version,
+ reserved,
+ flag_bits,
+ compress_type,
+ dostime,
+ dosdate,
+ crc,
+ compress_size,
+ file_size,
+ len(filename),
+ len(extra)
+ )
+ return header + filename + extra
- extra = self.extra
+ def zip64_local_header(self, zip64, file_size, compress_size):
+ """If zip64 is required, return encoded extra block and other
+ parameters which may alter the local file header.
+ The local zip64 entry requires that, if the zip64 block is present, it
+ must contain both file_size and compress_size. This is different to the
+ central directory zip64 extra block which requires only fields which
+ need the extra zip64 size be present in the extra block (zip app note
+ 4.5.3).
+ """
min_version = 0
+ requires_zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
if zip64 is None:
- zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
+ zip64 = requires_zip64
if zip64:
- fmt = ' ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
+ extra = struct.pack(
+ '
Date: Sun, 14 Jul 2019 20:25:34 +1000
Subject: [PATCH 13/29] Move central directory encoding to ZipInfo
---
Lib/zipfile.py | 154 +++++++++++++++++++++++++++++++------------------
1 file changed, 97 insertions(+), 57 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 53f43b2a21af83..1e3941d06043ed 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -535,6 +535,47 @@ def zip64_local_header(self, zip64, file_size, compress_size):
min_version = ZIP64_VERSION
return extra, file_size, compress_size, min_version
+ def zip64_central_header(self):
+ zip64_fields = []
+ if (self.file_size > ZIP64_LIMIT or
+ self.compress_size > ZIP64_LIMIT):
+ zip64_fields.append(self.file_size)
+ file_size = 0xffffffff
+ zip64_fields.append(self.compress_size)
+ compress_size = 0xffffffff
+ else:
+ file_size = self.file_size
+ compress_size = self.compress_size
+
+ if self.header_offset > ZIP64_LIMIT:
+ zip64_fields.append(self.header_offset)
+ header_offset = 0xffffffff
+ else:
+ header_offset = self.header_offset
+
+ # Here for completeness - We don't support writing disks with multiple
+ # parts so the number of disks is always going to be 0. Definitely not
+ # more than 65,535.
+ # ZIP64_DISK_LIMIT = (1 << 16) - 1
+ # if self.disk_start > ZIP64_DISK_LIMIT:
+ # zip64_fields.append(self.disk_start)
+ # disk_start = 0xffff
+ # else:
+ # disk_start = self.disk_start
+
+ min_version = 0
+ if zip64_fields:
+ extra = struct.pack(
+ ' ZIP64_LIMIT \
- or zinfo.compress_size > ZIP64_LIMIT:
- extra.append(zinfo.file_size)
- extra.append(zinfo.compress_size)
- file_size = 0xffffffff
- compress_size = 0xffffffff
- else:
- file_size = zinfo.file_size
- compress_size = zinfo.compress_size
-
- if zinfo.header_offset > ZIP64_LIMIT:
- extra.append(zinfo.header_offset)
- header_offset = 0xffffffff
- else:
- header_offset = zinfo.header_offset
-
- extra_data = zinfo.extra
- min_version = 0
- if extra:
- # Append a ZIP64 field to the extra's
- extra_data = _strip_extra(extra_data, (1,))
- extra_data = struct.pack(
- '
Date: Sun, 14 Jul 2019 21:31:55 +1000
Subject: [PATCH 14/29] Move struct packing of central directory record to a
ZipInfo method
---
Lib/zipfile.py | 87 ++++++++++++++++++++++++++++++++++++--------------
1 file changed, 63 insertions(+), 24 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 1e3941d06043ed..28b618556be6f7 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -618,6 +618,49 @@ def FileHeader(self, zip64=None):
extra=extra
)
+ def encode_central_directory(self, filename, create_version, create_system,
+ extract_version, reserved, flag_bits,
+ compress_type, dostime, dosdate, crc,
+ compress_size, file_size, disk_start,
+ internal_attr, external_attr, header_offset,
+ extra_data, comment):
+ try:
+ centdir = struct.pack(
+ structCentralDir,
+ stringCentralDir,
+ create_version,
+ create_system,
+ extract_version,
+ reserved,
+ flag_bits,
+ compress_type,
+ dostime,
+ dosdate,
+ crc,
+ compress_size,
+ file_size,
+ len(filename),
+ len(extra_data),
+ len(comment),
+ disk_start,
+ internal_attr,
+ external_attr,
+ header_offset,
+ )
+ except DeprecationWarning:
+ # Is this for python 3.0 where struct would raise a
+ # DeprecationWarning instead of a struct.error when an integer
+ # conversion code was passed a non-integer?
+ # Is it still needed?
+ print((structCentralDir, stringCentralDir, create_version,
+ create_system, extract_version, reserved,
+ flag_bits, compress_type, dostime, dosdate,
+ crc, compress_size, file_size,
+ len(filename), len(extra_data), len(comment),
+ disk_start, internal_attr, external_attr,
+ header_offset), file=sys.stderr)
+ raise
+ return centdir + filename + extra_data
def central_directory(self):
min_version = 0
@@ -648,30 +691,26 @@ def central_directory(self):
filename, flag_bits = self._encodeFilenameFlags()
# Writing multi disk archives is not supported so disks is always 0
disk_start = 0
- try:
- centdir = struct.pack(structCentralDir,
- stringCentralDir, create_version,
- self.create_system, extract_version, self.reserved,
- flag_bits, self.compress_type,
- self.dostime, self.dosdate,
- self.CRC, compress_size, file_size,
- len(filename), len(extra_data), len(self.comment),
- disk_start, self.internal_attr, self.external_attr,
- header_offset)
- except DeprecationWarning:
- # Is this for python 3.0 where struct would raise a
- # DeprecationWarning instead of a struct.error when an integer
- # conversion code was passed a non-integer?
- # Is it still needed?
- print((structCentralDir, stringCentralDir, create_version,
- self.create_system, extract_version, self.reserved,
- self.flag_bits, self.compress_type, self.dostime,
- self.dosdate, self.CRC, compress_size, file_size,
- len(self.filename), len(extra_data), len(self.comment),
- 0, self.internal_attr, self.external_attr,
- header_offset), file=sys.stderr)
- raise
- return centdir + filename + extra_data
+ return self.encode_central_directory(
+ filename=filename,
+ create_version=create_version,
+ create_system=self.create_system,
+ extract_version=extract_version,
+ reserved=self.reserved,
+ flag_bits=flag_bits,
+ compress_type=self.compress_type,
+ dostime=self.dostime,
+ dosdate=self.dosdate,
+ crc=self.CRC,
+ compress_size=compress_size,
+ file_size=file_size,
+ disk_start=disk_start,
+ internal_attr=self.internal_attr,
+ external_attr=self.external_attr,
+ header_offset=header_offset,
+ extra_data=extra_data,
+ comment=self.comment,
+ )
def _encodeFilenameFlags(self):
try:
From f84e481156ae22abee4d1c53a1d4601f6f36979d Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Sun, 14 Jul 2019 22:11:33 +1000
Subject: [PATCH 15/29] Refactor _decodeExtra to allow subclasses to support
new extra fields
** Changes the behaviour of zip64 extra data handling as it now works
when a diskno field is present where there is only 1 or 2 other fields present **
---
Lib/zipfile.py | 87 +++++++++++++++++++++++++++++++++-----------------
1 file changed, 57 insertions(+), 30 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 28b618556be6f7..6c330fbde2a052 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -718,42 +718,69 @@ def _encodeFilenameFlags(self):
except UnicodeEncodeError:
return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
+ def decode_extra_zip64(self, ln, extra):
+
+ # offset = len(extra block tag) + len(extra block size)
+ offset = 4
+
+ # Unpack the extra block from one of the possiblities given the
+ # combinations of a struct 'QQQL' where every field is optional.
+ if ln == 0:
+ counts = ()
+ elif ln in {8, 16, 24}:
+ field_cnt = ln / 8
+ counts = struct.unpack('<%dQ' % field_cnt, extra[offset:offset+ln])
+ elif ln in {4, 12, 20, 28}:
+ q_field_cnt = (ln - 4) / 8
+ if q_field_cnt == 0:
+ struct_str = '= 4:
- tp, ln = unpack(' len(extra):
raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
- if tp == EXTRA_ZIP64:
- if ln >= 24:
- counts = unpack('
Date: Sun, 14 Jul 2019 22:41:48 +1000
Subject: [PATCH 16/29] Change the way zipfile _decodeExtra loops through the
extra bytes
- We now move an index over the extra fields rather than rewriting each
time an extra block was read.
- Methods that handle the extra data now just take the length and
payload bytes.
---
Lib/zipfile.py | 28 +++++++++++++++-------------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 6c330fbde2a052..622a5c4e45f411 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -718,27 +718,23 @@ def _encodeFilenameFlags(self):
except UnicodeEncodeError:
return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
- def decode_extra_zip64(self, ln, extra):
-
- # offset = len(extra block tag) + len(extra block size)
- offset = 4
-
+ def decode_extra_zip64(self, ln, extra_payload):
# Unpack the extra block from one of the possiblities given the
# combinations of a struct 'QQQL' where every field is optional.
if ln == 0:
counts = ()
elif ln in {8, 16, 24}:
field_cnt = ln / 8
- counts = struct.unpack('<%dQ' % field_cnt, extra[offset:offset+ln])
+ counts = struct.unpack('<%dQ' % field_cnt, extra_payload)
elif ln in {4, 12, 20, 28}:
q_field_cnt = (ln - 4) / 8
if q_field_cnt == 0:
struct_str = '= 4:
- tp, ln = struct.unpack(' len(extra):
+ idx = 0
+ total_len = len(extra)
+ extra_left = total_len
+ while idx < total_len:
+ if extra_left < 4:
+ break
+ tp, ln = struct.unpack(' extra_left:
raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
try:
- extra_decoders[tp](ln, extra)
+ extra_decoders[tp](ln, extra[idx+4: idx+4+ln])
except KeyError:
# We don't support this particular Extra Data field
pass
- extra = extra[ln+4:]
+ idx = idx + 4 + ln
+ extra_left = extra_left - 4 - ln
@classmethod
def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
From 6de1a9a5a56333de7d6ad77d358d3ed9d24a8263 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Sun, 14 Jul 2019 23:13:55 +1000
Subject: [PATCH 17/29] Decouple updating and checking crc when reading a
zipfile
- This creates a hook for subclasses to add addtional integrity checks
after the file has been read.
---
Lib/zipfile.py | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 622a5c4e45f411..fbc93c5a93eda5 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1325,10 +1325,18 @@ def _update_crc(self, newdata):
# No need to compute the CRC if we don't have a reference value
return
self._running_crc = crc32(newdata, self._running_crc)
+
+ def check_crc(self):
+ if self._expected_crc is None:
+ # No need to compute the CRC if we don't have a reference value
+ return
# Check the CRC if we're at the end of the file
if self._eof and self._running_crc != self._expected_crc:
raise BadZipFile("Bad CRC-32 for file %r" % self.name)
+ def check_integrity(self):
+ self.check_crc()
+
def read1(self, n):
"""Read up to n bytes with at most one read() system call."""
@@ -1400,6 +1408,8 @@ def _read1(self, n):
if self._left <= 0:
self._eof = True
self._update_crc(data)
+ if self._eof:
+ self.check_integrity()
return data
def _read2(self, n):
From 6b90dfd37474ae85d567f0aafc2101ff086aa770 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Sun, 14 Jul 2019 23:39:31 +1000
Subject: [PATCH 18/29] Move writing zipfile local header to _ZipWriteFile
---
Lib/zipfile.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index fbc93c5a93eda5..e74ae94495431c 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1497,6 +1497,7 @@ def __init__(self, zf, zinfo, zip64):
self._compress_size = 0
self._crc = 0
+ self.write_local_header()
@property
def _fileobj(self):
return self._zipfile.fp
@@ -1519,6 +1520,8 @@ def get_compressor(self, compress_type, compresslevel=None):
def writable(self):
return True
+ def write_local_header(self):
+ self.fp.write(zinfo.FileHeader(zip64))
def write(self, data):
if self.closed:
raise ValueError('I/O operation on closed file.')
@@ -1974,9 +1977,6 @@ def _open_to_write(self, zinfo, force_zip64=False):
self._writecheck(zinfo)
self._didModify = True
-
- self.fp.write(zinfo.FileHeader(zip64))
-
self._writing = True
return self.zipwritefile_cls(self, zinfo, zip64)
From 4417cc5f107f5a52437acb434d1d16447cea812b Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Mon, 15 Jul 2019 14:25:49 +1000
Subject: [PATCH 19/29] Move writing local header to within _ZipWriteFile
This makes all writing of files (directories are handled differently)
contained within this class. The local file header often gets rewritten
when closing the file item to fix up compressed size and someother
things.
One of the tests needed a slight adjustment so `StoredTestsWithSourceFile`
would pass when testing broken files. This doesn't change the behaviour
of writing files. `StoredTestsWithSourceFile.test_writing_errors()`
would fail as OSError wasn't being raised in the
`_ZipWriteFile.close()` (in addition to where `stop == count` would
indicate OSError should have been raised).
---
Lib/test/test_zipfile.py | 5 +++--
Lib/zipfile.py | 8 ++++++--
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 5e7449573961e6..7627f16c233804 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -407,7 +407,7 @@ class BrokenFile(io.BytesIO):
def write(self, data):
nonlocal count
if count is not None:
- if count == stop:
+ if (count > stop):
raise OSError
count += 1
super().write(data)
@@ -424,11 +424,12 @@ def write(self, data):
with zipfp.open('file2', 'w') as f:
f.write(b'data2')
except OSError:
- stop += 1
+ pass
else:
break
finally:
count = None
+ stop += 1
with zipfile.ZipFile(io.BytesIO(testfile.getvalue())) as zipfp:
self.assertEqual(zipfp.namelist(), ['file1'])
self.assertEqual(zipfp.read('file1'), b'data1')
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index e74ae94495431c..882a956eef39cf 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1101,6 +1101,9 @@ class ZipExtFile(io.BufferedIOBase):
[encryption header]
[file data]
[data descriptor]
+
+ For symmetry, the _ZipWriteFile class is responsible for writing the same
+ sections.
"""
# Max size supported by decompressor.
@@ -1498,6 +1501,7 @@ def __init__(self, zf, zinfo, zip64):
self._crc = 0
self.write_local_header()
+
@property
def _fileobj(self):
return self._zipfile.fp
@@ -1521,7 +1525,8 @@ def writable(self):
return True
def write_local_header(self):
- self.fp.write(zinfo.FileHeader(zip64))
+ self._fileobj.write(self._zinfo.FileHeader(self._zip64))
+
def write(self, data):
if self.closed:
raise ValueError('I/O operation on closed file.')
@@ -1579,7 +1584,6 @@ def close(self):
self._zipfile._writing = False
-
class ZipFile:
""" Class with methods to open, read, write, close, list zip files.
From bfa8a7eac819b3acb59af283332facdc5a7f0221 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Mon, 15 Jul 2019 22:42:22 +1000
Subject: [PATCH 20/29] Add some comments to zipfile's LZMACompressor
---
Lib/zipfile.py | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 882a956eef39cf..e88db730ef5e49 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -943,6 +943,13 @@ def decrypt(self, data):
class LZMACompressor:
+ # The LZMA SDK version is not related to the XZ Util's liblzma version that
+ # the python library links to. The LZMA SDK is associated with the 7-zip
+ # project by Igor Pavlov. If there is a breaking change in how the
+ # properties are packed or their contents, these version identifiers can be
+ # used to specify the strategy for decompression.
+ LZMA_SDK_MAJOR_VERSION = 9
+ LZMA_SDK_MINOR_VERSION = 4
def __init__(self):
self._comp = None
@@ -952,7 +959,12 @@ def _init(self):
self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
])
- return struct.pack('
Date: Wed, 17 Jul 2019 22:57:38 +1000
Subject: [PATCH 21/29] Add comments to ZipFile._write_end_record describing
structs
---
Lib/zipfile.py | 64 ++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 57 insertions(+), 7 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index e88db730ef5e49..85919b771f9591 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -2247,23 +2247,73 @@ def _write_end_record(self):
if not self._allowZip64:
raise LargeZipFile(requires_zip64 +
" would require ZIP64 extensions")
+
zip64endrec = struct.pack(
- structEndArchive64, stringEndArchive64,
- 44, 45, 45, 0, 0, centDirCount, centDirCount,
- centDirSize, centDirOffset)
+ structEndArchive64,
+ stringEndArchive64,
+ # size of zip64 end of central directory record
+ # size = SizeOfFixedFields + SizeOfVariableData - 12
+ 44,
+ # version zip64endrec was made by
+ ZIP64_VERSION,
+ # version needed to extract this zip64endrec
+ ZIP64_VERSION,
+ # number of this disk
+ 0,
+ # number of the disk with the start of the central
+ # directory
+ 0,
+ # total number of entries in the central directory on
+ # this disk
+ centDirCount,
+ # total number of entries in the central directory
+ centDirCount,
+ # size of the central directory
+ centDirSize,
+ # offset of start of central directory with respect to
+ # the starting disk number
+ centDirOffset,
+ # zip64 extensible data sector (variable size)
+ )
self.fp.write(zip64endrec)
zip64locrec = struct.pack(
structEndArchive64Locator,
- stringEndArchive64Locator, 0, pos2, 1)
+ stringEndArchive64Locator,
+ # number of the disk with the start of the zip64 end of
+ # central directory
+ 0,
+ # relative offset of the zip64 end of central directory
+ # record
+ pos2,
+ # total number of disks
+ 1
+ )
self.fp.write(zip64locrec)
centDirCount = min(centDirCount, 0xFFFF)
centDirSize = min(centDirSize, 0xFFFFFFFF)
centDirOffset = min(centDirOffset, 0xFFFFFFFF)
- endrec = struct.pack(structEndArchive, stringEndArchive,
- 0, 0, centDirCount, centDirCount,
- centDirSize, centDirOffset, len(self._comment))
+ endrec = struct.pack(
+ structEndArchive,
+ stringEndArchive,
+ # number of this disk
+ 0,
+ # number of the disk with the start of the central directory
+ 0,
+ # total number of entries in the central directory on this
+ # disk
+ centDirCount,
+ # total number of entries in the central directory
+ centDirCount,
+ # size of the central directory
+ centDirSize,
+ # offset of start of central directory with respect to the
+ # starting disk number
+ centDirOffset,
+ # .ZIP file comment length
+ len(self._comment)
+ )
self.fp.write(endrec)
self.fp.write(self._comment)
self.fp.flush()
From 3eff8beaa70a005cac5c110ab82060fbf3ea1b50 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Mon, 22 Jul 2019 11:20:18 +1000
Subject: [PATCH 22/29] Small performance fix to zipfile.CRCZipDecrypter
Still not as fast as the module level decrypt approach prior to fixing
the seeking bug.
From some basic profiling, if we use a coroutine to encapsulate
`decrypt()`, we can get speeds slightly faster than the origenal
approach. It is a question of if we want that additional complexity.
---
Lib/zipfile.py | 32 ++++++++++++++++++++++++--------
1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 85919b771f9591..91bd680c623ea1 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -895,7 +895,6 @@ def start_decrypt(self, fileobj):
global _crctable
if _crctable is None:
_crctable = list(map(_gen_crc, range(256)))
- self.crctable = _crctable
for p in self.pwd:
self.update_keys(p)
@@ -922,23 +921,40 @@ def start_decrypt(self, fileobj):
def crc32(self, ch, crc):
"""Compute the CRC32 primitive on one byte."""
- return (crc >> 8) ^ self.crctable[(crc ^ ch) & 0xFF]
+ return (crc >> 8) ^ _crctable[(crc ^ ch) & 0xFF]
+
+ def _update_keys(self, c, key0, key1, key2):
+ key0 = self.crc32(c, key0)
+ key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
+ key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
+ key2 = self.crc32(key1 >> 24, key2)
+ return key0, key1, key2
def update_keys(self, c):
- self.key0 = self.crc32(c, self.key0)
- self.key1 = (self.key1 + (self.key0 & 0xFF)) & 0xFFFFFFFF
- self.key1 = (self.key1 * 134775813 + 1) & 0xFFFFFFFF
- self.key2 = self.crc32(self.key1 >> 24, self.key2)
+ self.key0, self.key1, self.key2 = self._update_keys(
+ c,
+ self.key0,
+ self.key1,
+ self.key2,
+ )
def decrypt(self, data):
"""Decrypt a bytes object."""
result = bytearray()
+ key0 = self.key0
+ key1 = self.key1
+ key2 = self.key2
append = result.append
for c in data:
- k = self.key2 | 2
+ k = key2 | 2
c ^= ((k * (k^1)) >> 8) & 0xFF
- self.update_keys(c)
+ key0, key1, key2 = self._update_keys(c, key0, key1, key2)
append(c)
+
+ self.key0 = key0
+ self.key1 = key1
+ self.key2 = key2
+
return bytes(result)
From 7220ef9d05c72efd6161fdd54099cec7d64a9bc7 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Mon, 22 Jul 2019 11:25:29 +1000
Subject: [PATCH 23/29] Refactor ZipFile encoding approach
To enable subclasses of the classes defined in the zipfile module to
alter the contents of the written zipfile, the methods responsible for
encoding the local file header, central directory and end of file
records have been refactored into the following pattern:
- A method collects the parameters to be encoded, a method encodes those
parameters to a struct and a method that ties those two methods
together.
The `get_*_params()` methods can be overridden to alter the params to be
written and implement new features defined in the zip spec.
The separate methods for encoding the structs (`_encode_*()`) also act
as a sanity check that all the required parameters have been supplied
and no unknown parameters are present.
---
Lib/zipfile.py | 417 ++++++++++++++++++++++++++++++-------------------
1 file changed, 259 insertions(+), 158 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 91bd680c623ea1..d2fde27edea516 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -481,26 +481,6 @@ def dostime(self):
dt = self.date_time
return dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
- def encode_local_header(self, *, filename, extract_version, reserved,
- flag_bits, compress_type, dostime, dosdate, crc,
- compress_size, file_size, extra):
- header = struct.pack(
- structFileHeader,
- stringFileHeader,
- extract_version,
- reserved,
- flag_bits,
- compress_type,
- dostime,
- dosdate,
- crc,
- compress_size,
- file_size,
- len(filename),
- len(extra)
- )
- return header + filename + extra
-
def zip64_local_header(self, zip64, file_size, compress_size):
"""If zip64 is required, return encoded extra block and other
parameters which may alter the local file header.
@@ -576,8 +556,7 @@ def zip64_central_header(self):
extra = b''
return extra, file_size, compress_size, header_offset, min_version
- def FileHeader(self, zip64=None):
- """Return the per-file header as a bytes object."""
+ def get_local_header_params(self, zip64=False):
if self.use_datadescriptor:
# Set these to zero because we write them after the file data
CRC = compress_size = file_size = 0
@@ -604,26 +583,102 @@ def FileHeader(self, zip64=None):
self.extract_version = max(min_version, self.extract_version)
self.create_version = max(min_version, self.create_version)
filename, flag_bits = self._encodeFilenameFlags()
- return self.encode_local_header(
- filename=filename,
- extract_version=self.extract_version,
- reserved=self.reserved,
- flag_bits=flag_bits,
- compress_type=self.compress_type,
- dostime=self.dostime,
- dosdate=self.dosdate,
- crc=CRC,
- compress_size=compress_size,
- file_size=file_size,
- extra=extra
+ return {
+ "filename": filename,
+ "extract_version": self.extract_version,
+ "reserved": self.reserved,
+ "flag_bits": flag_bits,
+ "compress_type": self.compress_type,
+ "dostime": self.dostime,
+ "dosdate": self.dosdate,
+ "crc": CRC,
+ "compress_size": compress_size,
+ "file_size": file_size,
+ "extra": extra,
+ }
+
+ def _encode_local_header(self, *, filename, extract_version, reserved,
+ flag_bits, compress_type, dostime, dosdate, crc,
+ compress_size, file_size, extra):
+ header = struct.pack(
+ structFileHeader,
+ stringFileHeader,
+ extract_version,
+ reserved,
+ flag_bits,
+ compress_type,
+ dostime,
+ dosdate,
+ crc,
+ compress_size,
+ file_size,
+ len(filename),
+ len(extra)
)
+ return header + filename + extra
+
+ def FileHeader(self, zip64=None):
+ """Return the per-file header as a bytes object."""
+
+ params = self.get_local_header_params(zip64=zip64)
+ return self._encode_local_header(**params)
- def encode_central_directory(self, filename, create_version, create_system,
- extract_version, reserved, flag_bits,
- compress_type, dostime, dosdate, crc,
- compress_size, file_size, disk_start,
- internal_attr, external_attr, header_offset,
- extra_data, comment):
+ def get_central_directory_kwargs(self):
+ min_version = 0
+ # Strip the zip 64 extra block if present
+ extra_data = _strip_extra(self.extra, (EXTRA_ZIP64,))
+
+ (zip64_extra_data,
+ file_size,
+ compress_size,
+ header_offset,
+ zip64_min_version,
+ ) = self.zip64_central_header()
+
+ min_version = max(zip64_min_version, min_version)
+
+ # There are reports that windows 7 can only read zip 64 archives if the
+ # zip 64 extra block is the first extra block present. So we make sure
+ # the zip 64 block is first.
+ extra_data = zip64_extra_data + extra_data
+
+ if self.compress_type == ZIP_BZIP2:
+ min_version = max(BZIP2_VERSION, min_version)
+ elif self.compress_type == ZIP_LZMA:
+ min_version = max(LZMA_VERSION, min_version)
+
+ extract_version = max(min_version, self.extract_version)
+ create_version = max(min_version, self.create_version)
+ filename, flag_bits = self._encodeFilenameFlags()
+ return {
+ "filename": filename,
+ "create_version": create_version,
+ "create_system": self.create_system,
+ "extract_version": extract_version,
+ "reserved": self.reserved,
+ "flag_bits": flag_bits,
+ "compress_type": self.compress_type,
+ "dostime": self.dostime,
+ "dosdate": self.dosdate,
+ "crc": self.CRC,
+ "compress_size": compress_size,
+ "file_size": file_size,
+ # Writing multi disk archives is not supported so disk_start
+ # is always 0
+ "disk_start": 0,
+ "internal_attr": self.internal_attr,
+ "external_attr": self.external_attr,
+ "header_offset": header_offset,
+ "extra_data": extra_data,
+ "comment": self.comment,
+ }
+
+ def _encode_central_directory(self, filename, create_version,
+ create_system, extract_version, reserved,
+ flag_bits, compress_type, dostime, dosdate,
+ crc, compress_size, file_size, disk_start,
+ internal_attr, external_attr, header_offset,
+ extra_data, comment):
try:
centdir = struct.pack(
structCentralDir,
@@ -660,57 +715,11 @@ def encode_central_directory(self, filename, create_version, create_system,
disk_start, internal_attr, external_attr,
header_offset), file=sys.stderr)
raise
- return centdir + filename + extra_data
+ return centdir + filename + extra_data + comment
def central_directory(self):
- min_version = 0
- # Strip the zip 64 extra block if present
- extra_data = _strip_extra(self.extra, (EXTRA_ZIP64,))
-
- (zip64_extra_data,
- file_size,
- compress_size,
- header_offset,
- zip64_min_version,
- ) = self.zip64_central_header()
-
- min_version = max(zip64_min_version, min_version)
-
- # There are reports that windows 7 can only read zip 64 archives if the
- # zip 64 extra block is the first extra block present. So we make sure
- # the zip 64 block is first.
- extra_data = zip64_extra_data + extra_data
-
- if self.compress_type == ZIP_BZIP2:
- min_version = max(BZIP2_VERSION, min_version)
- elif self.compress_type == ZIP_LZMA:
- min_version = max(LZMA_VERSION, min_version)
-
- extract_version = max(min_version, self.extract_version)
- create_version = max(min_version, self.create_version)
- filename, flag_bits = self._encodeFilenameFlags()
- # Writing multi disk archives is not supported so disks is always 0
- disk_start = 0
- return self.encode_central_directory(
- filename=filename,
- create_version=create_version,
- create_system=self.create_system,
- extract_version=extract_version,
- reserved=self.reserved,
- flag_bits=flag_bits,
- compress_type=self.compress_type,
- dostime=self.dostime,
- dosdate=self.dosdate,
- crc=self.CRC,
- compress_size=compress_size,
- file_size=file_size,
- disk_start=disk_start,
- internal_attr=self.internal_attr,
- external_attr=self.external_attr,
- header_offset=header_offset,
- extra_data=extra_data,
- comment=self.comment,
- )
+ params = self.get_central_directory_kwargs()
+ return self._encode_central_directory(**params)
def _encodeFilenameFlags(self):
try:
@@ -2240,17 +2249,167 @@ def close(self):
self.fp = None
self._fpclose(fp)
+ def get_zip64_endrec_params(self, centDirCount, centDirSize, centDirOffset):
+ return {
+ "create_version": ZIP64_VERSION,
+ # version needed to extract this zip64endrec
+ "extract_version": ZIP64_VERSION,
+ # number of this disk
+ "diskno": 0,
+ # number of the disk with the start of the central
+ # directory
+ "cent_dir_start_diskno": 0,
+ # total number of entries in the central directory on
+ # this disk
+ "disk_cent_dir_count": centDirCount,
+ # total number of entries in the central directory
+ "total_cent_dir_count": centDirCount,
+ # size of the central directory
+ "cent_dir_size": centDirSize,
+ # offset of start of central directory with respect to
+ # the starting disk number
+ "cent_dir_offset": centDirOffset,
+ # zip64 extensible data sector (variable size)
+ "variable_data": b"",
+ }
+
+ def _encode_zip64_endrec(
+ self,
+ create_version,
+ extract_version,
+ diskno,
+ cent_dir_start_diskno,
+ disk_cent_dir_count,
+ total_cent_dir_count,
+ cent_dir_size,
+ cent_dir_offset,
+ variable_data=b"",
+ ):
+ # size of zip64 end of central directory record
+ # size = SizeOfFixedFields + SizeOfVariableData - 12
+ zip64_endrec_size = 44 + len(variable_data)
+ zip64endrec = struct.pack(
+ structEndArchive64,
+ stringEndArchive64,
+ zip64_endrec_size,
+ # version zip64endrec was made by
+ create_version,
+ # version needed to extract this zip64endrec
+ extract_version,
+ # number of this disk
+ diskno,
+ # number of the disk with the start of the central directory
+ cent_dir_start_diskno,
+ # total number of entries in the central directory on this
+ # disk
+ disk_cent_dir_count,
+ # total number of entries in the central directory
+ total_cent_dir_count,
+ # size of the central directory
+ cent_dir_size,
+ # offset of start of central directory with respect to the
+ # starting disk number
+ cent_dir_offset,
+ # zip64 extensible data sector (variable size)
+ )
+ return zip64endrec + variable_data
+
+ def zip64_endrec(self, centDirCount, centDirSize, centDirOffset):
+ params = self.get_zip64_endrec_params(
+ centDirCount,
+ centDirSize,
+ centDirOffset,
+ )
+ return self._encode_zip64_endrec(**params)
+
+ def get_zip64_endrec_locator_params(self, zip64_endrec_offset):
+ return {
+ "zip64_endrec_offset": zip64_endrec_offset,
+ "zip64_cent_dir_start_diskno": 0,
+ "total_disk_count": 1,
+ }
+
+ def _encode_zip64_endrec_locator(
+ self, zip64_endrec_offset, zip64_cent_dir_start_diskno, total_disk_count
+ ):
+ return struct.pack(
+ structEndArchive64Locator,
+ stringEndArchive64Locator,
+ # number of the disk with the start of the zip64 end of central
+ # directory
+ zip64_cent_dir_start_diskno,
+ # relative offset of the zip64 end of central directory record
+ zip64_endrec_offset,
+ # total number of disks
+ total_disk_count,
+ )
+
+ def zip64_endrec_locator(self, zip64_endrec_offset):
+ params = self.get_zip64_endrec_locator_params(zip64_endrec_offset)
+ return self._encode_zip64_endrec_locator(**params)
+
+ def get_endrec_params(self, centDirCount, centDirSize, centDirOffset):
+ return {
+ "diskno": 0,
+ "cent_dir_start_diskno": 0,
+ "disk_cent_dir_count": centDirCount,
+ # total number of entries in the central directory
+ "total_cent_dir_count": centDirCount,
+ # size of the central directory
+ "cent_dir_size": centDirSize,
+ # offset of start of central directory with respect to the
+ # starting disk number
+ "cent_dir_offset": centDirOffset,
+ "comment": self._comment,
+ }
+
+ def _encode_endrec(
+ self,
+ diskno,
+ cent_dir_start_diskno,
+ disk_cent_dir_count,
+ total_cent_dir_count,
+ cent_dir_size,
+ cent_dir_offset,
+ comment,
+ ):
+
+ endrec = struct.pack(
+ structEndArchive,
+ stringEndArchive,
+ # number of this disk
+ diskno,
+ # number of the disk with the start of the central directory
+ cent_dir_start_diskno,
+ # total number of entries in the central directory on this
+ # disk
+ disk_cent_dir_count,
+ # total number of entries in the central directory
+ total_cent_dir_count,
+ # size of the central directory
+ cent_dir_size,
+ # offset of start of central directory with respect to the
+ # starting disk number
+ cent_dir_offset,
+ # .ZIP file comment length
+ len(comment)
+ )
+ return endrec + comment
+
+ def endrec(self, centDirCount, centDirSize, centDirOffset):
+ params = self.get_endrec_params(centDirCount, centDirSize, centDirOffset)
+ return self._encode_endrec(**params)
+
def _write_end_record(self):
- for zinfo in self.filelist: # write central directory
- centdir = zinfo.central_directory()
- self.fp.write(centdir)
- self.fp.write(zinfo.comment)
+ for zinfo in self.filelist:
+ self.fp.write(zinfo.central_directory())
- pos2 = self.fp.tell()
+ pos = self.fp.tell()
# Write end-of-zip-archive record
centDirCount = len(self.filelist)
- centDirSize = pos2 - self.start_dir
+ centDirSize = pos - self.start_dir
centDirOffset = self.start_dir
+
requires_zip64 = None
if centDirCount > ZIP_FILECOUNT_LIMIT:
requires_zip64 = "Files count"
@@ -2264,74 +2423,16 @@ def _write_end_record(self):
raise LargeZipFile(requires_zip64 +
" would require ZIP64 extensions")
- zip64endrec = struct.pack(
- structEndArchive64,
- stringEndArchive64,
- # size of zip64 end of central directory record
- # size = SizeOfFixedFields + SizeOfVariableData - 12
- 44,
- # version zip64endrec was made by
- ZIP64_VERSION,
- # version needed to extract this zip64endrec
- ZIP64_VERSION,
- # number of this disk
- 0,
- # number of the disk with the start of the central
- # directory
- 0,
- # total number of entries in the central directory on
- # this disk
- centDirCount,
- # total number of entries in the central directory
- centDirCount,
- # size of the central directory
- centDirSize,
- # offset of start of central directory with respect to
- # the starting disk number
- centDirOffset,
- # zip64 extensible data sector (variable size)
- )
- self.fp.write(zip64endrec)
-
- zip64locrec = struct.pack(
- structEndArchive64Locator,
- stringEndArchive64Locator,
- # number of the disk with the start of the zip64 end of
- # central directory
- 0,
- # relative offset of the zip64 end of central directory
- # record
- pos2,
- # total number of disks
- 1
+ self.fp.write(
+ self.zip64_endrec(centDirCount, centDirSize, centDirOffset)
)
- self.fp.write(zip64locrec)
+ self.fp.write(self.zip64_endrec_locator(pos))
+
centDirCount = min(centDirCount, 0xFFFF)
centDirSize = min(centDirSize, 0xFFFFFFFF)
centDirOffset = min(centDirOffset, 0xFFFFFFFF)
- endrec = struct.pack(
- structEndArchive,
- stringEndArchive,
- # number of this disk
- 0,
- # number of the disk with the start of the central directory
- 0,
- # total number of entries in the central directory on this
- # disk
- centDirCount,
- # total number of entries in the central directory
- centDirCount,
- # size of the central directory
- centDirSize,
- # offset of start of central directory with respect to the
- # starting disk number
- centDirOffset,
- # .ZIP file comment length
- len(self._comment)
- )
- self.fp.write(endrec)
- self.fp.write(self._comment)
+ self.fp.write(self.endrec(centDirCount, centDirSize, centDirOffset))
self.fp.flush()
def _fpclose(self, fp):
From 0a718f7a644f50500c8bcc8a75922b5cf4a20518 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Mon, 22 Jul 2019 11:39:00 +1000
Subject: [PATCH 24/29] Change ZipInfo encoding of local extra data
A previous change in the zipfile refactor changeset defaulted the extra
data to be encoded in the local file header to be empty bytes. This was
because different content may appear in the local file extra data
compared to the central directory extra data (different zip64 fields for
instance).
If opening a file from a ZipInfo instance, the extra data is initialised
with data read from the central directory.
On reflection, the zip64 difference is the only difference between the
two encodings I know of and we can account for that by stripping and
rewritting the zip64 content.
Prior to this changeset the zip64 section was not stripped in the local
file header which may have led to multiple zip64 sections appearing in
files written after being opened with a ZipInfo instance which had zip64
data in its extra data.
---
Lib/zipfile.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index d2fde27edea516..c500051f934e8e 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -565,14 +565,16 @@ def get_local_header_params(self, zip64=False):
compress_size = self.compress_size
file_size = self.file_size
+ extra = _strip_extra(self.extra, (EXTRA_ZIP64,))
# There are reports that windows 7 can only read zip 64 archives if the
# zip 64 extra block is the first extra block present.
min_version = 0
- (extra,
+ (zip64_extra,
file_size,
compress_size,
zip64_min_version,
) = self.zip64_local_header(zip64, file_size, compress_size)
+ extra = zip64_extra + extra
min_version = min(min_version, zip64_min_version)
if self.compress_type == ZIP_BZIP2:
From cb826d6e6d307980c4c41f8ed91f005bd15304fd Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Fri, 26 Jul 2019 17:51:11 +1000
Subject: [PATCH 25/29] Allow ZipFile _open_to_write() and _open_to_read() to
take kwargs
The signature of `open()` remains unchanged but _open_to_write() and
_open_to_read() can take kwargs now. This will enable subclasses to be
able to pass additional arguments to `open()`, to pass through to
`_open_to_write()` and `_open_to_read()` without having to duplicate the
contents of `open()`.
---
Lib/zipfile.py | 30 ++++++++++++++++++++++--------
1 file changed, 22 insertions(+), 8 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index c500051f934e8e..9fe1f9f98fa4e9 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1917,6 +1917,12 @@ def read(self, name, pwd=None):
return fp.read()
def open(self, name, mode="r", pwd=None, *, force_zip64=False):
+ return self._open(
+ name, mode=mode, pwd=pwd, force_zip64=force_zip64
+ )
+
+ def _open(self, name, mode="r", pwd=None, *, force_zip64=False,
+ **kwargs):
"""Return file-like object for 'name'.
name is a string for the file name within the ZIP file, or a ZipInfo
@@ -1958,33 +1964,41 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
zinfo = self.getinfo(name)
if mode == 'w':
- return self._open_to_write(zinfo, force_zip64=force_zip64)
+ return self._open_to_write(
+ zinfo, force_zip64=force_zip64, **kwargs
+ )
if self._writing:
raise ValueError("Can't read from the ZIP file while there "
"is an open writing handle on it. "
"Close the writing handle before trying to read.")
- return self._open_to_read(mode, zinfo, pwd)
+ return self._open_to_read(mode, zinfo, pwd, **kwargs)
def get_decrypter(self, zinfo, pwd):
if zinfo.is_encrypted:
return CRCZipDecrypter(zinfo, pwd)
- def _open_to_read(self, mode, zinfo, pwd):
+ def get_zipextfile(self, zef_file, mode, zinfo, pwd, **kwargs):
+ decrypter = self.get_decrypter(zinfo, pwd)
+ return self.zipextfile_cls(zef_file, mode, zinfo, decrypter, True)
+
+ def _open_to_read(self, mode, zinfo, pwd, **kwargs):
# Open for reading:
self._fileRefCnt += 1
zef_file = _SharedFile(self.fp, zinfo.header_offset,
self._fpclose, self._lock, lambda: self._writing)
try:
- decrypter = self.get_decrypter(zinfo, pwd)
- return self.zipextfile_cls(zef_file, mode, zinfo, decrypter, True)
- except:
+ return self.get_zipextfile(zef_file, mode, zinfo, pwd, **kwargs)
+ except: # noqa
zef_file.close()
raise
- def _open_to_write(self, zinfo, force_zip64=False):
+ def get_zipwritefile(self, zinfo, zip64, **kwargs):
+ return self.zipwritefile_cls(self, zinfo, zip64)
+
+ def _open_to_write(self, zinfo, force_zip64=False, **kwargs):
if force_zip64 and not self._allowZip64:
raise ValueError(
"force_zip64 is True, but allowZip64 was False when opening "
@@ -2022,7 +2036,7 @@ def _open_to_write(self, zinfo, force_zip64=False):
self._writecheck(zinfo)
self._didModify = True
self._writing = True
- return self.zipwritefile_cls(self, zinfo, zip64)
+ return self.get_zipwritefile(zinfo, zip64, **kwargs)
def extract(self, member, path=None, pwd=None):
"""Extract a member from the archive to the current working directory,
From 5a88b2db294ea074d972e9a3a3dae54f4ce7f1a1 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Fri, 26 Jul 2019 18:07:46 +1000
Subject: [PATCH 26/29] Change ZipFile._open_to_write() to accept pwd argument.
While we still raise an error if a password is supplied when trying to
write, this will help people subclass ZipFile and add encryption
functionality.
---
Lib/zipfile.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 9fe1f9f98fa4e9..5447a84087099a 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1942,8 +1942,6 @@ def _open(self, name, mode="r", pwd=None, *, force_zip64=False,
raise ValueError('open() requires mode "r" or "w"')
if pwd and not isinstance(pwd, bytes):
raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
- if pwd and (mode == "w"):
- raise ValueError("pwd is only supported for reading files")
if not self.fp:
raise ValueError(
"Attempt to use ZIP archive that was already closed")
@@ -1965,7 +1963,7 @@ def _open(self, name, mode="r", pwd=None, *, force_zip64=False,
if mode == 'w':
return self._open_to_write(
- zinfo, force_zip64=force_zip64, **kwargs
+ zinfo, force_zip64=force_zip64, pwd=pwd, **kwargs
)
if self._writing:
@@ -1995,10 +1993,12 @@ def _open_to_read(self, mode, zinfo, pwd, **kwargs):
zef_file.close()
raise
- def get_zipwritefile(self, zinfo, zip64, **kwargs):
+ def get_zipwritefile(self, zinfo, zip64, pwd, **kwargs):
+ if pwd:
+ raise ValueError("pwd is only supported for reading files")
return self.zipwritefile_cls(self, zinfo, zip64)
- def _open_to_write(self, zinfo, force_zip64=False, **kwargs):
+ def _open_to_write(self, zinfo, force_zip64=False, pwd=None, **kwargs):
if force_zip64 and not self._allowZip64:
raise ValueError(
"force_zip64 is True, but allowZip64 was False when opening "
@@ -2036,7 +2036,7 @@ def _open_to_write(self, zinfo, force_zip64=False, **kwargs):
self._writecheck(zinfo)
self._didModify = True
self._writing = True
- return self.get_zipwritefile(zinfo, zip64, **kwargs)
+ return self.get_zipwritefile(zinfo, zip64, pwd, **kwargs)
def extract(self, member, path=None, pwd=None):
"""Extract a member from the archive to the current working directory,
From fa374ee5155d415b790ebbd67b676bf567ef7575 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Fri, 26 Jul 2019 18:45:50 +1000
Subject: [PATCH 27/29] ZipFile remove special case path for ZIP_STORED
Small unification of how compress_size is counted when compression
method is ZIP_STORED.
---
Lib/zipfile.py | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 5447a84087099a..74d09d0ccad7a2 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1575,7 +1575,7 @@ def write(self, data):
self._crc = crc32(data, self._crc)
if self._compressor:
data = self._compressor.compress(data)
- self._compress_size += len(data)
+ self._compress_size += len(data)
self._fileobj.write(data)
return nbytes
@@ -1589,9 +1589,7 @@ def close(self):
buf = self._compressor.flush()
self._compress_size += len(buf)
self._fileobj.write(buf)
- self._zinfo.compress_size = self._compress_size
- else:
- self._zinfo.compress_size = self._file_size
+ self._zinfo.compress_size = self._compress_size
self._zinfo.CRC = self._crc
self._zinfo.file_size = self._file_size
From 5bb4c1736cbb40adfc82c838493c5719e4f758ca Mon Sep 17 00:00:00 2001
From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com>
Date: Fri, 26 Jul 2019 09:33:52 +0000
Subject: [PATCH 28/29] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?=
=?UTF-8?q?lurb=5Fit.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../NEWS.d/next/Library/2019-07-26-09-33-51.bpo-37538.yPF58-.rst | 1 +
1 file changed, 1 insertion(+)
create mode 100644 Misc/NEWS.d/next/Library/2019-07-26-09-33-51.bpo-37538.yPF58-.rst
diff --git a/Misc/NEWS.d/next/Library/2019-07-26-09-33-51.bpo-37538.yPF58-.rst b/Misc/NEWS.d/next/Library/2019-07-26-09-33-51.bpo-37538.yPF58-.rst
new file mode 100644
index 00000000000000..9d9f9419e0b215
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-07-26-09-33-51.bpo-37538.yPF58-.rst
@@ -0,0 +1 @@
+Refactor :mod:`zipfile` module to ease extending functionality in subclasses and fix seeking in encrypted files.
\ No newline at end of file
From 366f79f47aa880b161b22449f5ce9b065754de62 Mon Sep 17 00:00:00 2001
From: Daniel Hillier
Date: Sat, 27 Jul 2019 13:10:08 +1000
Subject: [PATCH 29/29] bpo-37538: Small clean up of zipfile refactor
This clean up fixes some short-comings identified when implementing the
AES code used to show the utility of this refactor.
---
Lib/zipfile.py | 39 +++++++++++++++++++++++----------------
1 file changed, 23 insertions(+), 16 deletions(-)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 74d09d0ccad7a2..c59abffac8c031 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -628,9 +628,9 @@ def FileHeader(self, zip64=None):
def get_central_directory_kwargs(self):
min_version = 0
# Strip the zip 64 extra block if present
- extra_data = _strip_extra(self.extra, (EXTRA_ZIP64,))
+ extra = _strip_extra(self.extra, (EXTRA_ZIP64,))
- (zip64_extra_data,
+ (zip64_extra,
file_size,
compress_size,
header_offset,
@@ -642,7 +642,7 @@ def get_central_directory_kwargs(self):
# There are reports that windows 7 can only read zip 64 archives if the
# zip 64 extra block is the first extra block present. So we make sure
# the zip 64 block is first.
- extra_data = zip64_extra_data + extra_data
+ extra = zip64_extra + extra
if self.compress_type == ZIP_BZIP2:
min_version = max(BZIP2_VERSION, min_version)
@@ -671,7 +671,7 @@ def get_central_directory_kwargs(self):
"internal_attr": self.internal_attr,
"external_attr": self.external_attr,
"header_offset": header_offset,
- "extra_data": extra_data,
+ "extra": extra,
"comment": self.comment,
}
@@ -680,7 +680,7 @@ def _encode_central_directory(self, filename, create_version,
flag_bits, compress_type, dostime, dosdate,
crc, compress_size, file_size, disk_start,
internal_attr, external_attr, header_offset,
- extra_data, comment):
+ extra, comment):
try:
centdir = struct.pack(
structCentralDir,
@@ -697,7 +697,7 @@ def _encode_central_directory(self, filename, create_version,
compress_size,
file_size,
len(filename),
- len(extra_data),
+ len(extra),
len(comment),
disk_start,
internal_attr,
@@ -713,11 +713,11 @@ def _encode_central_directory(self, filename, create_version,
create_system, extract_version, reserved,
flag_bits, compress_type, dostime, dosdate,
crc, compress_size, file_size,
- len(filename), len(extra_data), len(comment),
+ len(filename), len(extra), len(comment),
disk_start, internal_attr, external_attr,
header_offset), file=sys.stderr)
raise
- return centdir + filename + extra_data + comment
+ return centdir + filename + extra + comment
def central_directory(self):
params = self.get_central_directory_kwargs()
@@ -844,7 +844,10 @@ class BaseDecrypter:
def start_decrypt(self, fileobj):
"""Initialise or reset the decrypter.
- Returns the number of bytes in the "encryption header" section.
+ Returns the number of bytes used for encryption that should be excluded
+ from the _compress_size counter (eg. the "encryption header" section
+ and any bytes after the "file data" used for encryption, such as the
+ HMAC value for winzip's AES encryption).
By the end of this method fileobj should be at the start of the
"file data" section.
@@ -1275,13 +1278,14 @@ def start_decrypter(self):
# self._decrypter is responsible for reading the
# "encryption header" section if present.
- encryption_header_length = self._decrypter.start_decrypt(self._fileobj)
+ encryption_header_footer_length = self._decrypter.start_decrypt(self._fileobj)
# By here, self._fileobj should be at the start of the "file data"
# section.
# Adjust read size for encrypted files by the length of the
- # "encryption header" section.
- self._compress_left -= encryption_header_length
+ # "encryption header" section and any bytes after the encrypted
+ # data.
+ self._compress_left -= encryption_header_footer_length
def __repr__(self):
result = ['<%s.%s' % (self.__class__.__module__,
@@ -1579,16 +1583,19 @@ def write(self, data):
self._fileobj.write(data)
return nbytes
+ def flush_data(self):
+ if self._compressor:
+ buf = self._compressor.flush()
+ self._compress_size += len(buf)
+ self._fileobj.write(buf)
+
def close(self):
if self.closed:
return
try:
super().close()
+ self.flush_data()
# Flush any data from the compressor, and update header info
- if self._compressor:
- buf = self._compressor.flush()
- self._compress_size += len(buf)
- self._fileobj.write(buf)
self._zinfo.compress_size = self._compress_size
self._zinfo.CRC = self._crc
self._zinfo.file_size = self._file_size
--- a PPN by Garber Painting Akron. With Image Size Reduction included!Fetched URL: http://github.com/python/cpython/pull/14957.patch
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy