Skip to content

Commit 7e8883a

Browse files
[3.13] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) (#123410)
gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) Applies changes from zipp 3.20.1 and jaraco/zippGH-124 (cherry picked from commit 2231286) Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
1 parent aca6511 commit 7e8883a

File tree

3 files changed

+87
-71
lines changed

3 files changed

+87
-71
lines changed

Lib/test/test_zipfile/_path/test_path.py

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pickle
66
import stat
77
import sys
8+
import time
89
import unittest
910
import zipfile
1011
import zipfile._path
@@ -592,7 +593,11 @@ def test_getinfo_missing(self, alpharep):
592593

593594
def test_malformed_paths(self):
594595
"""
595-
Path should handle malformed paths.
596+
Path should handle malformed paths gracefully.
597+
598+
Paths with leading slashes are not visible.
599+
600+
Paths with dots are treated like regular files.
596601
"""
597602
data = io.BytesIO()
598603
zf = zipfile.ZipFile(data, "w")
@@ -601,15 +606,71 @@ def test_malformed_paths(self):
601606
zf.writestr("../parent.txt", b"content")
602607
zf.filename = ''
603608
root = zipfile.Path(zf)
604-
assert list(map(str, root.iterdir())) == [
605-
'one-slash.txt',
606-
'two-slash.txt',
607-
'parent.txt',
608-
]
609+
assert list(map(str, root.iterdir())) == ['../']
610+
assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
611+
612+
def test_unsupported_names(self):
613+
"""
614+
Path segments with special characters are readable.
615+
616+
On some platforms or file systems, characters like
617+
``:`` and ``?`` are not allowed, but they are valid
618+
in the zip file.
619+
"""
620+
data = io.BytesIO()
621+
zf = zipfile.ZipFile(data, "w")
622+
zf.writestr("path?", b"content")
623+
zf.writestr("V: NMS.flac", b"fLaC...")
624+
zf.filename = ''
625+
root = zipfile.Path(zf)
626+
contents = root.iterdir()
627+
assert next(contents).name == 'path?'
628+
assert next(contents).name == 'V: NMS.flac'
629+
assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
630+
631+
def test_backslash_not_separator(self):
632+
"""
633+
In a zip file, backslashes are not separators.
634+
"""
635+
data = io.BytesIO()
636+
zf = zipfile.ZipFile(data, "w")
637+
zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
638+
zf.filename = ''
639+
root = zipfile.Path(zf)
640+
(first,) = root.iterdir()
641+
assert not first.is_dir()
642+
assert first.name == 'foo\\bar'
609643

610644
@pass_alpharep
611645
def test_interface(self, alpharep):
612646
from importlib.resources.abc import Traversable
613647

614648
zf = zipfile.Path(alpharep)
615649
assert isinstance(zf, Traversable)
650+
651+
652+
class DirtyZipInfo(zipfile.ZipInfo):
653+
"""
654+
Bypass name sanitization.
655+
"""
656+
657+
def __init__(self, filename, *args, **kwargs):
658+
super().__init__(filename, *args, **kwargs)
659+
self.filename = filename
660+
661+
@classmethod
662+
def for_name(cls, name, archive):
663+
"""
664+
Construct the same way that ZipFile.writestr does.
665+
666+
TODO: extract this functionality and re-use
667+
"""
668+
self = cls(filename=name, date_time=time.localtime(time.time())[:6])
669+
self.compress_type = archive.compression
670+
self.compress_level = archive.compresslevel
671+
if self.filename.endswith('/'): # pragma: no cover
672+
self.external_attr = 0o40775 << 16 # drwxrwxr-x
673+
self.external_attr |= 0x10 # MS-DOS directory flag
674+
else:
675+
self.external_attr = 0o600 << 16 # ?rw-------
676+
return self

Lib/zipfile/_path/__init__.py

Lines changed: 17 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
"""
2+
A Path-like interface for zipfiles.
3+
4+
This codebase is shared between zipfile.Path in the stdlib
5+
and zipp in PyPI. See
6+
https://github.com/python/importlib_metadata/wiki/Development-Methodology
7+
for more detail.
8+
"""
9+
110
import io
211
import posixpath
312
import zipfile
@@ -36,7 +45,7 @@ def _parents(path):
3645
def _ancestry(path):
3746
"""
3847
Given a path with elements separated by
39-
posixpath.sep, generate all elements of that path
48+
posixpath.sep, generate all elements of that path.
4049
4150
>>> list(_ancestry('b/d'))
4251
['b/d', 'b']
@@ -48,9 +57,14 @@ def _ancestry(path):
4857
['b']
4958
>>> list(_ancestry(''))
5059
[]
60+
61+
Multiple separators are treated like a single.
62+
63+
>>> list(_ancestry('//b//d///f//'))
64+
['//b//d///f', '//b//d', '//b']
5165
"""
5266
path = path.rstrip(posixpath.sep)
53-
while path and path != posixpath.sep:
67+
while path.rstrip(posixpath.sep):
5468
yield path
5569
path, tail = posixpath.split(path)
5670

@@ -85,69 +99,7 @@ def __setstate__(self, state):
8599
super().__init__(*args, **kwargs)
86100

87101

88-
class SanitizedNames:
89-
"""
90-
ZipFile mix-in to ensure names are sanitized.
91-
"""
92-
93-
def namelist(self):
94-
return list(map(self._sanitize, super().namelist()))
95-
96-
@staticmethod
97-
def _sanitize(name):
98-
r"""
99-
Ensure a relative path with posix separators and no dot names.
100-
101-
Modeled after
102-
https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
103-
but provides consistent cross-platform behavior.
104-
105-
>>> san = SanitizedNames._sanitize
106-
>>> san('/foo/bar')
107-
'foo/bar'
108-
>>> san('//foo.txt')
109-
'foo.txt'
110-
>>> san('foo/.././bar.txt')
111-
'foo/bar.txt'
112-
>>> san('foo../.bar.txt')
113-
'foo../.bar.txt'
114-
>>> san('\\foo\\bar.txt')
115-
'foo/bar.txt'
116-
>>> san('D:\\foo.txt')
117-
'D/foo.txt'
118-
>>> san('\\\\server\\share\\file.txt')
119-
'server/share/file.txt'
120-
>>> san('\\\\?\\GLOBALROOT\\Volume3')
121-
'?/GLOBALROOT/Volume3'
122-
>>> san('\\\\.\\PhysicalDrive1\\root')
123-
'PhysicalDrive1/root'
124-
125-
Retain any trailing slash.
126-
>>> san('abc/')
127-
'abc/'
128-
129-
Raises a ValueError if the result is empty.
130-
>>> san('../..')
131-
Traceback (most recent call last):
132-
...
133-
ValueError: Empty filename
134-
"""
135-
136-
def allowed(part):
137-
return part and part not in {'..', '.'}
138-
139-
# Remove the drive letter.
140-
# Don't use ntpath.splitdrive, because that also strips UNC paths
141-
bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
142-
clean = bare.replace('\\', '/')
143-
parts = clean.split('/')
144-
joined = '/'.join(filter(allowed, parts))
145-
if not joined:
146-
raise ValueError("Empty filename")
147-
return joined + '/' * name.endswith('/')
148-
149-
150-
class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile):
102+
class CompleteDirs(InitializedState, zipfile.ZipFile):
151103
"""
152104
A ZipFile subclass that ensures that implied directories
153105
are always included in the namelist.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
2+
causing infinite loops (gh-122905) without breaking contents using
3+
legitimate characters.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy