Content-Length: 459451 | pFad | http://github.com/python/cpython/pull/14957/commits/00c87ee4958d64cf2c539f476f5d84c704aac648

0F bpo-37538: Zipfile refactor by danifus · Pull Request #14957 · python/cpython · GitHub
Skip to content

bpo-37538: Zipfile refactor #14957

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
a0db1c9
Add descriptive global variables for general purpose bit flags
danifus Jul 10, 2019
6710baf
Add global variable for zip64 extra data header id
danifus Jul 10, 2019
3777389
Add flag properties to ZipInfo
danifus Jul 10, 2019
f435f08
Restructure how ZipExtFile gets created from ZipFile.open
danifus Jul 10, 2019
ca41137
Fix bug when seeking on encrypted zip files
danifus Jul 10, 2019
00c87ee
Refactor _ZipDecrypter with a BaseZipDecrypter class
danifus Jul 11, 2019
b8364a6
Move compressor and decompressor selection code into classes
danifus Jul 12, 2019
6b256c0
Add zipinfo_cls, zipextfile_cls and zipwritefile_cls to ZipFile
danifus Jul 12, 2019
af8864b
Fix typo datadescripter -> datadescriptor
danifus Jul 13, 2019
42c4be6
Add dosdate and dostime properties to ZipInfo
danifus Jul 13, 2019
801d966
Move encoding datadescriptor to ZipInfo
danifus Jul 13, 2019
46604e0
Refactor how ZipInfo encodes the local file header.
danifus Jul 13, 2019
7d28d8f
Move central directory encoding to ZipInfo
danifus Jul 14, 2019
c784d7f
Move struct packing of central directory record to a ZipInfo method
danifus Jul 14, 2019
f84e481
Refactor _decodeExtra to allow subclasses to support new extra fields
danifus Jul 14, 2019
1a07518
Change the way zipfile _decodeExtra loops through the extra bytes
danifus Jul 14, 2019
6de1a9a
Decouple updating and checking crc when reading a zipfile
danifus Jul 14, 2019
6b90dfd
Move writing zipfile local header to _ZipWriteFile
danifus Jul 14, 2019
4417cc5
Move writing local header to within _ZipWriteFile
danifus Jul 15, 2019
bfa8a7e
Add some comments to zipfile's LZMACompressor
danifus Jul 15, 2019
a211abe
Add comments to ZipFile._write_end_record describing structs
danifus Jul 17, 2019
3eff8be
Small performance fix to zipfile.CRCZipDecrypter
danifus Jul 22, 2019
7220ef9
Refactor ZipFile encoding approach
danifus Jul 22, 2019
0a718f7
Change ZipInfo encoding of local extra data
danifus Jul 22, 2019
cb826d6
Allow ZipFile _open_to_write() and _open_to_read() to take kwargs
danifus Jul 26, 2019
5a88b2d
Change ZipFile._open_to_write() to accept pwd argument.
danifus Jul 26, 2019
fa374ee
ZipFile remove special case path for ZIP_STORED
danifus Jul 26, 2019
5bb4c17
📜🤖 Added by blurb_it.
blurb-it[bot] Jul 26, 2019
366f79f
bpo-37538: Small clean up of zipfile refactor
danifus Jul 27, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refactor _ZipDecrypter with a BaseZipDecrypter class
** This undoes the previous __init__ method change a few commits ago **
  • Loading branch information
danifus committed Jul 26, 2019
commit 00c87ee4958d64cf2c539f476f5d84c704aac648
177 changes: 112 additions & 65 deletions Lib/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,26 @@ def is_dir(self):
return self.filename[-1] == '/'


class BaseDecrypter:

def start_decrypt(self, fileobj):
"""Initialise or reset the decrypter.

Returns the number of bytes in the "encryption header" section.

By the end of this method fileobj should be at the start of the
"file data" section.
"""
raise NotImplementedError(
"Subclasses of BaseDecrypter must provide a start_decrypt() method"
)

def decrypt(self, data):
raise NotImplementedError(
"Subclasses of BaseDecrypter must provide a decrypt() method"
)


# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
# internal keys. We noticed that a direct implementation is faster than
# relying on binascii.crc32().
Expand All @@ -611,51 +631,86 @@ def _gen_crc(crc):
crc >>= 1
return crc

# ZIP supports a password-based form of encryption. Even though known
# plaintext attacks have been found against it, it is still useful
# to be able to get data out of such a file.
#
# Usage:
# zd = _ZipDecrypter(mypwd)
# plain_bytes = zd(cypher_bytes)

def _ZipDecrypter(pwd):
key0 = 305419896
key1 = 591751049
key2 = 878082192

global _crctable
if _crctable is None:
_crctable = list(map(_gen_crc, range(256)))
crctable = _crctable

def crc32(ch, crc):
"""Compute the CRC32 primitive on one byte."""
return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]

def update_keys(c):
nonlocal key0, key1, key2
key0 = crc32(c, key0)
key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
key2 = crc32(key1 >> 24, key2)
class CRCZipDecrypter(BaseDecrypter):
"""PKWARE Encryption Decrypter

ZIP supports a password-based form of encryption. Even though known
plaintext attacks have been found against it, it is still useful
to be able to get data out of such a file.

Usage:
zd = CRCZipDecrypter(zinfo, mypwd)
zd.start_decrypt(fileobj)
plain_bytes = zd.decrypt(cypher_bytes)
"""

for p in pwd:
update_keys(p)
encryption_header_length = 12

def decrypter(data):
def __init__(self, zinfo, pwd):
self.zinfo = zinfo
self.name = zinfo.filename

if not pwd:
raise RuntimeError("File %r is encrypted, a password is "
"required for extraction" % self.name)
self.pwd = pwd

def start_decrypt(self, fileobj):

self.key0 = 305419896
self.key1 = 591751049
self.key2 = 878082192

global _crctable
if _crctable is None:
_crctable = list(map(_gen_crc, range(256)))
self.crctable = _crctable

for p in self.pwd:
self.update_keys(p)

# The first 12 bytes in the cypher stream is an encryption header
# used to strengthen the algorithm. The first 11 bytes are
# completely random, while the 12th contains the MSB of the CRC,
# or the MSB of the file time depending on the header type
# and is used to check the correctness of the password.
header = fileobj.read(self.encryption_header_length)
h = self.decrypt(header[0:12])

if self.zinfo.use_datadescripter:
# compare against the file type from extended local headers
check_byte = (self.zinfo._raw_time >> 8) & 0xff
else:
# compare against the CRC otherwise
check_byte = (self.zinfo.CRC >> 24) & 0xff

if h[11] != check_byte:
raise RuntimeError("Bad password for file %r" % self.name)

return self.encryption_header_length

def crc32(self, ch, crc):
"""Compute the CRC32 primitive on one byte."""
return (crc >> 8) ^ self.crctable[(crc ^ ch) & 0xFF]

def update_keys(self, c):
self.key0 = self.crc32(c, self.key0)
self.key1 = (self.key1 + (self.key0 & 0xFF)) & 0xFFFFFFFF
self.key1 = (self.key1 * 134775813 + 1) & 0xFFFFFFFF
self.key2 = self.crc32(self.key1 >> 24, self.key2)

def decrypt(self, data):
"""Decrypt a bytes object."""
result = bytearray()
append = result.append
for c in data:
k = key2 | 2
k = self.key2 | 2
c ^= ((k * (k^1)) >> 8) & 0xFF
update_keys(c)
self.update_keys(c)
append(c)
return bytes(result)

return decrypter


class LZMACompressor:

Expand Down Expand Up @@ -860,11 +915,12 @@ class ZipExtFile(io.BufferedIOBase):
# Chunk size to read during seek
MAX_SEEK_READ = 1 << 24

def __init__(self, fileobj, mode, zipinfo, close_fileobj=False, pwd=None):
def __init__(self, fileobj, mode, zipinfo, decrypter=None,
close_fileobj=False):
self._fileobj = fileobj
self._zinfo = zipinfo
self._decrypter = decrypter
self._close_fileobj = close_fileobj
self._pwd = pwd

self.process_local_header()
self.raise_for_unsupported_flags()
Expand Down Expand Up @@ -906,7 +962,7 @@ def read_init(self):
self._offset = 0
self._eof = False

self._decrypter = self.get_decrypter()
self.start_decrypter()
self._decompressor = _get_decompressor(self._compress_type)

def process_local_header(self):
Expand Down Expand Up @@ -950,37 +1006,22 @@ def raise_for_unsupported_flags(self):
# strong encryption
raise NotImplementedError("strong encryption (flag bit 6)")


def get_decrypter(self):
def start_decrypter(self):
# check for encrypted flag & handle password
decrypter = None
if self._zinfo.is_encrypted:
if not self._pwd:
raise RuntimeError("File %r is encrypted, password "
if not self._decrypter:
raise RuntimeError("File %r is encrypted, a decrypter is "
"required for extraction" % self.name)

decrypter = _ZipDecrypter(self._pwd)
# The first 12 bytes in the cypher stream is an encryption header
# used to strengthen the algorithm. The first 11 bytes are
# completely random, while the 12th contains the MSB of the CRC,
# or the MSB of the file time depending on the header type
# and is used to check the correctness of the password.
header = self._fileobj.read(12)
h = decrypter(header[0:12])
if self._zinfo.use_datadescripter:
# compare against the file type from extended local headers
check_byte = (self._zinfo._raw_time >> 8) & 0xff
else:
# compare against the CRC otherwise
check_byte = (self._zinfo.CRC >> 24) & 0xff
if h[11] != check_byte:
raise RuntimeError("Bad password for file %r" % self.name)

# Adjust read size for encrypted files since the first 12 bytes are
# for the encryption/password information.
self._compress_left -= 12
# self._decrypter is responsible for reading the
# "encryption header" section if present.
encryption_header_length = self._decrypter.start_decrypt(self._fileobj)
# By here, self._fileobj should be at the start of the "file data"
# section.

return decrypter
# Adjust read size for encrypted files by the length of the
# "encryption header" section.
self._compress_left -= encryption_header_length

def __repr__(self):
result = ['<%s.%s' % (self.__class__.__module__,
Expand Down Expand Up @@ -1157,7 +1198,7 @@ def _read2(self, n):
raise EOFError

if self._decrypter is not None:
data = self._decrypter(data)
data = self._decrypter.decrypt(data)
return data

def close(self):
Expand Down Expand Up @@ -1632,13 +1673,19 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):

return self._open_to_read(mode, zinfo, pwd)

def get_decrypter(self, zinfo, pwd):
if zinfo.is_encrypted:
return CRCZipDecrypter(zinfo, pwd)

def _open_to_read(self, mode, zinfo, pwd):
# Open for reading:
self._fileRefCnt += 1

zef_file = _SharedFile(self.fp, zinfo.header_offset,
self._fpclose, self._lock, lambda: self._writing)
try:
return ZipExtFile(zef_file, mode, zinfo, True, pwd)
decrypter = self.get_decrypter(zinfo, pwd)
return ZipExtFile(zef_file, mode, zinfo, decrypter, True)
except:
zef_file.close()
raise
Expand Down








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/python/cpython/pull/14957/commits/00c87ee4958d64cf2c539f476f5d84c704aac648

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy