From cad857753382aa8dc7e6b788879550995454a53d Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Fri, 24 Mar 2023 17:50:16 +0800 Subject: [PATCH 01/11] Add `ZipFile.remove()` (#51067) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a revision of commit 659eb048cc9cac73c46349eb29845bc5cd630f09 (PR #19358), notably with following changes: - Add documentation and tests. - Raise `ValueError` for a bad mode, as in other methods. - Support multi-member removal in `_remove_members()`. - Support non-physical removal in `_remove_members()`. - Move physical file data in chunks to prevent excessive memory usage on large files. - Fix missing entry in `self.NameToInfo` when removing a duplicated archive name. - Also update `ZipInfo._end_offset` for physically moved files. Co-authored-by: Éric (cherry picked from commit e6bc82a3995f42df1c666c8889a2e4e4f938ff67 (PR #103033)) --- Doc/library/zipfile.rst | 17 +++ Lib/test/test_zipfile/test_core.py | 233 +++++++++++++++++++++++++++++ Lib/test/test_zipfile64.py | 63 ++++++++ Lib/zipfile/__init__.py | 90 +++++++++++ 4 files changed, 403 insertions(+) diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 6a4fa67332e179..bd6eefe0442bf8 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -518,6 +518,23 @@ ZipFile Objects .. versionadded:: 3.11 +.. method:: ZipFile.remove(zinfo_or_arcname) + + Removes a member from the archive. *zinfo_or_arcname* is either the full + path of the member, or a :class:`ZipInfo` instance. + + The archive must be opened with mode ``'w'``, ``'x'`` or ``'a'``. + + Calling :meth:`remove` on a closed ZipFile will raise a :exc:`ValueError`. + + .. note:: + + Removing a member in an archive may involve a move of many internal data + records, which can be I/O intensive for a large ZIP file. + + .. versionadded:: next + + The following data attributes are also available: .. attribute:: ZipFile.filename diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index ada96813709aea..09acc9b47e6708 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -1360,6 +1360,239 @@ class LzmaWriterTests(AbstractWriterTests, unittest.TestCase): class ZstdWriterTests(AbstractWriterTests, unittest.TestCase): compression = zipfile.ZIP_ZSTANDARD +class AbstractRemoveTests: + + def _test_removing_indexes(self, test_files, indexes): + """Test underlying _remove_members() for removing members at given + indexes.""" + # calculate the expected results + expected_files = [] + with zipfile.ZipFile(TESTFN, 'w') as zh: + for i, (file, data) in enumerate(test_files): + if i not in indexes: + zh.writestr(file, data) + expected_files.append(file) + expected_size = os.path.getsize(TESTFN) + + # prepare the test zip + with zipfile.ZipFile(TESTFN, 'w') as zh: + for file, data in test_files: + zh.writestr(file, data) + + # do the removal and check the result + with zipfile.ZipFile(TESTFN, 'a') as zh: + members = {zh.infolist()[i] for i in indexes} + zh._remove_members(members) + + # make sure internal caches have reflected the change + # and are consistent + self.assertEqual(zh.namelist(), expected_files) + for file, _ in test_files: + if file in zh.namelist(): + self.assertEqual(zh.getinfo(file).filename, file) + else: + with self.assertRaises(KeyError): + zh.getinfo(file) + + self.assertIsNone(zh.testzip()) + self.assertEqual(os.path.getsize(TESTFN), expected_size) + + def _test_removing_combinations(self, test_files, n=None): + """Test underlying _remove_members() for removing random combinations + of members.""" + ln = len(test_files) + if n is None: + # iterate n from 1 to all + for n in range(1, ln + 1): + for indexes in itertools.combinations(range(ln), n): + with self.subTest(remove=indexes): + self._test_removing_indexes(test_files, indexes) + else: + for indexes in itertools.combinations(range(ln), n): + with self.subTest(remove=indexes): + self._test_removing_indexes(test_files, indexes) + + def test_basic(self): + # Test underlying _remove_members() for removing random combinations of members. + test_files = [ + ('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'), + ('file1.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'), + ('file2.txt', b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'), + ] + + self._test_removing_combinations(test_files) + + def test_duplicated_arcname(self): + # Test underlying _remove_members() for removing any one of random duplicated members. + dupl_file = 'file.txt' + test_files = [ + ('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'), + ('file1.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'), + ('file2.txt', b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'), + ] + + ln = len(test_files) + for n in range(2, ln + 1): + for dups in itertools.combinations(range(ln), n): + files = [] + for i, (file, data) in enumerate(test_files): + file_ = dupl_file if i in dups else file + files.append((file_, data)) + + for index in dups: + indexes = [index] + with self.subTest(dups=dups, indexes=indexes): + self._test_removing_indexes(files, indexes) + + def test_non_physical(self): + # Test underlying _remove_members() for non-physical removing. + test_files = [ + ('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'), + ('file1.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'), + ('file2.txt', b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'), + ] + + ln = len(test_files) + for n in range(1, ln + 1): + for indexes in itertools.combinations(range(ln), n): + with self.subTest(remove=indexes): + # prepare the test zip + expected = {} + with zipfile.ZipFile(TESTFN, 'w') as zh: + for i, (file, data) in enumerate(test_files): + zh.writestr(file, data) + if i not in indexes: + expected[file] = zh.getinfo(file).header_offset + + # do the removal and check the result + with zipfile.ZipFile(TESTFN, 'a') as zh: + members = {zh.infolist()[i] for i in indexes} + zh._remove_members(members, remove_physical=False) + self.assertEqual(zh.namelist(), list(expected)) + for file, offset in expected.items(): + self.assertEqual(zh.getinfo(file).header_offset, offset) + self.assertIsNone(zh.testzip()) + + def test_verify(self): + # Test if params are passed to underlying _remove_members() correctly, + # or never passed if conditions not met. + file0 = 'file0.txt' + file = 'datafile.txt' + data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem' + + # closed: error and do nothing + with zipfile.ZipFile(TESTFN, 'w') as zh: + zh.writestr(file, data) + with zipfile.ZipFile(TESTFN, 'a') as zh: + zh.close() + with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: + with self.assertRaises(ValueError): + zh.remove(file) + mock_fn.assert_not_called() + + # writing: error and do nothing + with zipfile.ZipFile(TESTFN, 'w') as zh: + zh.writestr(file, data) + with zipfile.ZipFile(TESTFN, 'a') as zh: + with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: + with zh.open(file0, 'w') as fh: + with self.assertRaises(ValueError): + zh.remove(file) + mock_fn.assert_not_called() + + # mode 'r': error and do nothing + with zipfile.ZipFile(TESTFN, 'r') as zh: + with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: + with self.assertRaises(ValueError): + zh.remove(file) + mock_fn.assert_not_called() + + # mode 'a': the most general use case + with zipfile.ZipFile(TESTFN, 'w') as zh: + zh.writestr(file, data) + # -- remove with arcname + with zipfile.ZipFile(TESTFN, 'a') as zh: + with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: + zh.remove(file) + mock_fn.assert_called_once_with({zh.getinfo(file)}) + # -- remove with zinfo + with zipfile.ZipFile(TESTFN, 'a') as zh: + with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: + zinfo = zh.getinfo(file) + zh.remove(zinfo) + mock_fn.assert_called_once_with({zinfo}) + # -- remove with nonexist arcname + with zipfile.ZipFile(TESTFN, 'a') as zh: + with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: + with self.assertRaises(KeyError): + zh.remove('nonexist.file') + mock_fn.assert_not_called() + # -- remove with nonexist zinfo (even if same name) + with zipfile.ZipFile(TESTFN, 'a') as zh: + with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: + zinfo = zipfile.ZipInfo(file) + with self.assertRaises(KeyError): + zh.remove(zinfo) + mock_fn.assert_not_called() + + # mode 'w': like 'a'; allows removing a just written member + with zipfile.ZipFile(TESTFN, 'w') as zh: + zh.writestr(file, data) + with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: + zh.remove(file) + mock_fn.assert_called_once_with({zh.getinfo(file)}) + + # mode 'x': like 'w' + os.remove(TESTFN) + with zipfile.ZipFile(TESTFN, 'x') as zh: + zh.writestr(file, data) + with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: + zh.remove(file) + mock_fn.assert_called_once_with({zh.getinfo(file)}) + + def test_zip64(self): + # Test if members use zip64. + file = 'datafile.txt' + file1 = 'pre.txt' + file2 = 'post.txt' + data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem' + data1 = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit' + data2 = b'Duis aute irure dolor in reprehenderit in voluptate velit esse' + with zipfile.ZipFile(TESTFN, 'w') as zh: + with zh.open(file1, 'w', force_zip64=True) as fh: + fh.write(data1) + with zh.open(file2, 'w', force_zip64=True) as fh: + fh.write(data2) + expected_size = os.path.getsize(TESTFN) + + with zipfile.ZipFile(TESTFN, 'w') as zh: + with zh.open(file1, 'w', force_zip64=True) as fh: + fh.write(data1) + with zh.open(file, 'w', force_zip64=True) as fh: + fh.write(data) + with zh.open(file2, 'w', force_zip64=True) as fh: + fh.write(data2) + with zipfile.ZipFile(TESTFN, 'a') as zh: + zh.remove(file) + self.assertIsNone(zh.testzip()) + self.assertEqual(os.path.getsize(TESTFN), expected_size) + +class StoredRemoveTests(AbstractRemoveTests, unittest.TestCase): + compression = zipfile.ZIP_STORED + +@requires_zlib() +class DeflateRemoveTests(AbstractRemoveTests, unittest.TestCase): + compression = zipfile.ZIP_DEFLATED + +@requires_bz2() +class Bzip2RemoveTests(AbstractRemoveTests, unittest.TestCase): + compression = zipfile.ZIP_BZIP2 + +@requires_lzma() +class LzmaRemoveTests(AbstractRemoveTests, unittest.TestCase): + compression = zipfile.ZIP_LZMA + + class PyZipFileTests(unittest.TestCase): def assertCompiledIn(self, name, namelist): if name + 'o' not in namelist: diff --git a/Lib/test/test_zipfile64.py b/Lib/test/test_zipfile64.py index 2e1affe0252858..84d1862f08bf31 100644 --- a/Lib/test/test_zipfile64.py +++ b/Lib/test/test_zipfile64.py @@ -87,6 +87,69 @@ def tearDown(self): os_helper.unlink(TESTFN2) +class TestRemove(unittest.TestCase): + def setUp(self): + # Create test data. + line_gen = ("Test of zipfile line %d." % i for i in range(1000000)) + self.data = '\n'.join(line_gen).encode('ascii') + + def _write_large_file(self, fh): + # It will contain enough copies of self.data to reach about 8 GiB. + filecount = 8*1024**3 // len(self.data) + + next_time = time.monotonic() + _PRINT_WORKING_MSG_INTERVAL + for num in range(filecount): + fh.write(self.data) + # Print still working message since this test can be really slow + if next_time <= time.monotonic(): + next_time = time.monotonic() + _PRINT_WORKING_MSG_INTERVAL + print(( + ' writing %d of %d, be patient...' % + (num, filecount)), file=sys.__stdout__) + sys.__stdout__.flush() + + def test_remove_large_file(self): + # Try the temp file. If we do TESTFN2, then it hogs + # gigabytes of disk space for the duration of the test. + with TemporaryFile() as f: + self._test_remove_large_file(f) + self.assertFalse(f.closed) + + def _test_remove_large_file(self, f): + file = 'datafile.txt' + file1 = 'dummy.txt' + data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem' + with zipfile.ZipFile(f, 'w') as zh: + with zh.open(file1, 'w', force_zip64=True) as fh: + self._write_large_file(fh) + zh.writestr(file, data) + + with zipfile.ZipFile(f, 'a') as zh: + zh.remove(file1) + self.assertIsNone(zh.testzip()) + + def test_remove_before_large_file(self): + # Try the temp file. If we do TESTFN2, then it hogs + # gigabytes of disk space for the duration of the test. + with TemporaryFile() as f: + self._test_remove_before_large_file(f) + self.assertFalse(f.closed) + + def _test_remove_before_large_file(self, f): + file = 'datafile.txt' + file1 = 'dummy.txt' + data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem' + with zipfile.ZipFile(f, 'w') as zh: + zh.writestr(file, data) + with zh.open(file1, 'w', force_zip64=True) as fh: + self._write_large_file(fh) + expected_size = zh.getinfo(file1).file_size + + with zipfile.ZipFile(f, 'a') as zh: + zh.remove(file) + self.assertIsNone(zh.testzip()) + + class OtherTests(unittest.TestCase): def testMoreThan64kFiles(self): # This test checks that more than 64k files can be added to an archive, diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 18caeb3e04a2b5..6c4af17050b892 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1866,6 +1866,32 @@ def extractall(self, path=None, members=None, pwd=None): for zipinfo in members: self._extract_member(zipinfo, path, pwd) + def remove(self, zinfo_or_arcname): + """Remove a member from the archive.""" + + if self.mode not in ('w', 'x', 'a'): + raise ValueError("remove() requires mode 'w', 'x', or 'a'") + if not self.fp: + raise ValueError( + "Attempt to write to ZIP archive that was already closed") + if self._writing: + raise ValueError( + "Can't write to ZIP archive while an open writing handle exists." + ) + + # Make sure we have an existing info object + if isinstance(zinfo_or_arcname, ZipInfo): + zinfo = zinfo_or_arcname + # make sure zinfo exists + if zinfo not in self.filelist: + raise KeyError( + 'There is no item %r in the archive' % zinfo_or_arcname) + else: + # get the info object + zinfo = self.getinfo(zinfo_or_arcname) + + return self._remove_members({zinfo}) + @classmethod def _sanitize_windows_name(cls, arcname, pathsep): """Replace bad characters and remove trailing dots from parts.""" @@ -1930,6 +1956,70 @@ def _extract_member(self, member, targetpath, pwd): return targetpath + def _remove_members(self, members, *, remove_physical=True, chunk_size=2**20): + """Remove members in a zip file. + + All members (as zinfo) should exist in the zip; otherwise the zip file + will erroneously end in an inconsistent state. + """ + fp = self.fp + entry_offset = 0 + member_seen = False + + # get a sorted filelist by header offset, in case the dir order + # doesn't match the actual entry order + filelist = sorted(self.filelist, key=lambda x: x.header_offset) + for i, info in enumerate(filelist): + is_member = info in members + + if not (member_seen or is_member): + continue + + # get the total size of the entry + try: + offset = filelist[i + 1].header_offset + except IndexError: + offset = self.start_dir + entry_size = offset - info.header_offset + + if is_member: + member_seen = True + entry_offset += entry_size + + # update caches + self.filelist.remove(info) + try: + del self.NameToInfo[info.filename] + except KeyError: + pass + continue + + # update the header and move entry data to the new position + if remove_physical: + old_header_offset = info.header_offset + info.header_offset -= entry_offset + read_size = 0 + while read_size < entry_size: + fp.seek(old_header_offset + read_size) + data = fp.read(min(entry_size - read_size, chunk_size)) + fp.seek(info.header_offset + read_size) + fp.write(data) + fp.flush() + read_size += len(data) + + # Avoid missing entry if entries have a duplicated name. + # Reverse the order as NameToInfo normally stores the last added one. + for info in reversed(self.filelist): + self.NameToInfo.setdefault(info.filename, info) + + # update state + if remove_physical: + self.start_dir -= entry_offset + self._didModify = True + + # seek to the start of the central dir + fp.seek(self.start_dir) + def _writecheck(self, zinfo): """Check for errors before writing a file to the archive.""" if zinfo.filename in self.NameToInfo: From 96a07a26b273cd414f48f2bfcef579b337399289 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Wed, 21 May 2025 23:13:23 +0800 Subject: [PATCH 02/11] Fix and improve tests --- Lib/test/test_zipfile/test_core.py | 70 ++++++++++++++---------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 09acc9b47e6708..f89b3942cad854 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -1362,12 +1362,12 @@ class ZstdWriterTests(AbstractWriterTests, unittest.TestCase): class AbstractRemoveTests: - def _test_removing_indexes(self, test_files, indexes): + def _test_removing_members(self, test_files, indexes): """Test underlying _remove_members() for removing members at given indexes.""" # calculate the expected results expected_files = [] - with zipfile.ZipFile(TESTFN, 'w') as zh: + with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: for i, (file, data) in enumerate(test_files): if i not in indexes: zh.writestr(file, data) @@ -1375,12 +1375,12 @@ def _test_removing_indexes(self, test_files, indexes): expected_size = os.path.getsize(TESTFN) # prepare the test zip - with zipfile.ZipFile(TESTFN, 'w') as zh: + with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: for file, data in test_files: zh.writestr(file, data) # do the removal and check the result - with zipfile.ZipFile(TESTFN, 'a') as zh: + with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh: members = {zh.infolist()[i] for i in indexes} zh._remove_members(members) @@ -1401,19 +1401,14 @@ def _test_removing_combinations(self, test_files, n=None): """Test underlying _remove_members() for removing random combinations of members.""" ln = len(test_files) - if n is None: - # iterate n from 1 to all - for n in range(1, ln + 1): - for indexes in itertools.combinations(range(ln), n): - with self.subTest(remove=indexes): - self._test_removing_indexes(test_files, indexes) - else: + for n in (range(1, ln + 1) if n is None else (n,)): for indexes in itertools.combinations(range(ln), n): with self.subTest(remove=indexes): - self._test_removing_indexes(test_files, indexes) + self._test_removing_members(test_files, indexes) def test_basic(self): - # Test underlying _remove_members() for removing random combinations of members. + """Test underlying _remove_members() for removing random combinations + of members.""" test_files = [ ('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'), ('file1.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'), @@ -1423,7 +1418,8 @@ def test_basic(self): self._test_removing_combinations(test_files) def test_duplicated_arcname(self): - # Test underlying _remove_members() for removing any one of random duplicated members. + """Test underlying _remove_members() for removing any one of random + duplicated members.""" dupl_file = 'file.txt' test_files = [ ('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'), @@ -1441,11 +1437,11 @@ def test_duplicated_arcname(self): for index in dups: indexes = [index] - with self.subTest(dups=dups, indexes=indexes): - self._test_removing_indexes(files, indexes) + with self.subTest(dups=dups, remove=indexes): + self._test_removing_members(files, indexes) def test_non_physical(self): - # Test underlying _remove_members() for non-physical removing. + """Test underlying _remove_members() for non-physical removing.""" test_files = [ ('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'), ('file1.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'), @@ -1458,14 +1454,14 @@ def test_non_physical(self): with self.subTest(remove=indexes): # prepare the test zip expected = {} - with zipfile.ZipFile(TESTFN, 'w') as zh: + with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: for i, (file, data) in enumerate(test_files): zh.writestr(file, data) if i not in indexes: expected[file] = zh.getinfo(file).header_offset # do the removal and check the result - with zipfile.ZipFile(TESTFN, 'a') as zh: + with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh: members = {zh.infolist()[i] for i in indexes} zh._remove_members(members, remove_physical=False) self.assertEqual(zh.namelist(), list(expected)) @@ -1474,16 +1470,16 @@ def test_non_physical(self): self.assertIsNone(zh.testzip()) def test_verify(self): - # Test if params are passed to underlying _remove_members() correctly, - # or never passed if conditions not met. + """Test if params are passed to underlying _remove_members() correctly, + or never passed if conditions not met.""" file0 = 'file0.txt' file = 'datafile.txt' data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem' # closed: error and do nothing - with zipfile.ZipFile(TESTFN, 'w') as zh: + with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: zh.writestr(file, data) - with zipfile.ZipFile(TESTFN, 'a') as zh: + with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh: zh.close() with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: with self.assertRaises(ValueError): @@ -1491,9 +1487,9 @@ def test_verify(self): mock_fn.assert_not_called() # writing: error and do nothing - with zipfile.ZipFile(TESTFN, 'w') as zh: + with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: zh.writestr(file, data) - with zipfile.ZipFile(TESTFN, 'a') as zh: + with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh: with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: with zh.open(file0, 'w') as fh: with self.assertRaises(ValueError): @@ -1501,34 +1497,34 @@ def test_verify(self): mock_fn.assert_not_called() # mode 'r': error and do nothing - with zipfile.ZipFile(TESTFN, 'r') as zh: + with zipfile.ZipFile(TESTFN, 'r', self.compression) as zh: with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: with self.assertRaises(ValueError): zh.remove(file) mock_fn.assert_not_called() # mode 'a': the most general use case - with zipfile.ZipFile(TESTFN, 'w') as zh: + with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: zh.writestr(file, data) # -- remove with arcname - with zipfile.ZipFile(TESTFN, 'a') as zh: + with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh: with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: zh.remove(file) mock_fn.assert_called_once_with({zh.getinfo(file)}) # -- remove with zinfo - with zipfile.ZipFile(TESTFN, 'a') as zh: + with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh: with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: zinfo = zh.getinfo(file) zh.remove(zinfo) mock_fn.assert_called_once_with({zinfo}) # -- remove with nonexist arcname - with zipfile.ZipFile(TESTFN, 'a') as zh: + with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh: with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: with self.assertRaises(KeyError): zh.remove('nonexist.file') mock_fn.assert_not_called() # -- remove with nonexist zinfo (even if same name) - with zipfile.ZipFile(TESTFN, 'a') as zh: + with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh: with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: zinfo = zipfile.ZipInfo(file) with self.assertRaises(KeyError): @@ -1536,7 +1532,7 @@ def test_verify(self): mock_fn.assert_not_called() # mode 'w': like 'a'; allows removing a just written member - with zipfile.ZipFile(TESTFN, 'w') as zh: + with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: zh.writestr(file, data) with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: zh.remove(file) @@ -1544,35 +1540,35 @@ def test_verify(self): # mode 'x': like 'w' os.remove(TESTFN) - with zipfile.ZipFile(TESTFN, 'x') as zh: + with zipfile.ZipFile(TESTFN, 'x', self.compression) as zh: zh.writestr(file, data) with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: zh.remove(file) mock_fn.assert_called_once_with({zh.getinfo(file)}) def test_zip64(self): - # Test if members use zip64. + """Test if members use zip64.""" file = 'datafile.txt' file1 = 'pre.txt' file2 = 'post.txt' data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem' data1 = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit' data2 = b'Duis aute irure dolor in reprehenderit in voluptate velit esse' - with zipfile.ZipFile(TESTFN, 'w') as zh: + with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: with zh.open(file1, 'w', force_zip64=True) as fh: fh.write(data1) with zh.open(file2, 'w', force_zip64=True) as fh: fh.write(data2) expected_size = os.path.getsize(TESTFN) - with zipfile.ZipFile(TESTFN, 'w') as zh: + with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: with zh.open(file1, 'w', force_zip64=True) as fh: fh.write(data1) with zh.open(file, 'w', force_zip64=True) as fh: fh.write(data) with zh.open(file2, 'w', force_zip64=True) as fh: fh.write(data2) - with zipfile.ZipFile(TESTFN, 'a') as zh: + with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh: zh.remove(file) self.assertIsNone(zh.testzip()) self.assertEqual(os.path.getsize(TESTFN), expected_size) From 602b909b5a9b653e3db14bdf5a0eeae6091b8d7f Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Wed, 21 May 2025 19:06:03 +0800 Subject: [PATCH 03/11] Update `ZipInfo._end_offset` --- Lib/zipfile/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 6c4af17050b892..dda1ff071920b1 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1998,6 +1998,8 @@ def _remove_members(self, members, *, remove_physical=True, chunk_size=2**20): if remove_physical: old_header_offset = info.header_offset info.header_offset -= entry_offset + if info._end_offset is not None: + info._end_offset -= entry_offset read_size = 0 while read_size < entry_size: fp.seek(old_header_offset + read_size) From bb90f48f60ddf8244097fc8b274cc5ebde052f94 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Thu, 22 May 2025 00:13:34 +0800 Subject: [PATCH 04/11] No more allow mode 'w'/'x' - File is not truncated in mode 'w'/'x', which results non-shrinked file. - This cannot be simply resolved by adding truncation for mode 'w'/'x', which may be used on an unseekable file buffer and truncation is not allowed. --- Doc/library/zipfile.rst | 2 +- Lib/test/test_zipfile/test_core.py | 32 ++++++++++++++++-------------- Lib/zipfile/__init__.py | 9 ++++++--- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index bd6eefe0442bf8..4f566168938056 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -523,7 +523,7 @@ ZipFile Objects Removes a member from the archive. *zinfo_or_arcname* is either the full path of the member, or a :class:`ZipInfo` instance. - The archive must be opened with mode ``'w'``, ``'x'`` or ``'a'``. + The archive must be opened with mode ``'a'``. Calling :meth:`remove` on a closed ZipFile will raise a :exc:`ValueError`. diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index f89b3942cad854..ff67aa045f06de 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -1503,6 +1503,23 @@ def test_verify(self): zh.remove(file) mock_fn.assert_not_called() + # mode 'w': error and do nothing + with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: + zh.writestr(file, data) + with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: + with self.assertRaises(ValueError): + zh.remove(file) + mock_fn.assert_not_called() + + # mode 'x': error and do nothing + os.remove(TESTFN) + with zipfile.ZipFile(TESTFN, 'x', self.compression) as zh: + zh.writestr(file, data) + with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: + with self.assertRaises(ValueError): + zh.remove(file) + mock_fn.assert_not_called() + # mode 'a': the most general use case with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: zh.writestr(file, data) @@ -1531,21 +1548,6 @@ def test_verify(self): zh.remove(zinfo) mock_fn.assert_not_called() - # mode 'w': like 'a'; allows removing a just written member - with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: - zh.writestr(file, data) - with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: - zh.remove(file) - mock_fn.assert_called_once_with({zh.getinfo(file)}) - - # mode 'x': like 'w' - os.remove(TESTFN) - with zipfile.ZipFile(TESTFN, 'x', self.compression) as zh: - zh.writestr(file, data) - with mock.patch('zipfile.ZipFile._remove_members') as mock_fn: - zh.remove(file) - mock_fn.assert_called_once_with({zh.getinfo(file)}) - def test_zip64(self): """Test if members use zip64.""" file = 'datafile.txt' diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index dda1ff071920b1..3069e8697cf8c2 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1867,10 +1867,13 @@ def extractall(self, path=None, members=None, pwd=None): self._extract_member(zipinfo, path, pwd) def remove(self, zinfo_or_arcname): - """Remove a member from the archive.""" + """Remove a member from the archive. - if self.mode not in ('w', 'x', 'a'): - raise ValueError("remove() requires mode 'w', 'x', or 'a'") + The archive must be open with mode 'a', since mode 'w'/'x' may be used + on an unseekable file buffer, which disallows truncation.""" + + if self.mode != 'a': + raise ValueError("remove() requires mode 'a'") if not self.fp: raise ValueError( "Attempt to write to ZIP archive that was already closed") From 2d74501e033d90d00dc70b1ca1df18a0267fd01e Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Wed, 21 May 2025 23:28:44 +0800 Subject: [PATCH 05/11] Test for zip64 more thoroughly --- Lib/test/test_zipfile/test_core.py | 37 +++++++++--------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index ff67aa045f06de..ae59ae421a09bc 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -1362,7 +1362,7 @@ class ZstdWriterTests(AbstractWriterTests, unittest.TestCase): class AbstractRemoveTests: - def _test_removing_members(self, test_files, indexes): + def _test_removing_members(self, test_files, indexes, force_zip64=False): """Test underlying _remove_members() for removing members at given indexes.""" # calculate the expected results @@ -1370,14 +1370,16 @@ def _test_removing_members(self, test_files, indexes): with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: for i, (file, data) in enumerate(test_files): if i not in indexes: - zh.writestr(file, data) + with zh.open(file, 'w', force_zip64=force_zip64) as fh: + fh.write(data) expected_files.append(file) expected_size = os.path.getsize(TESTFN) # prepare the test zip with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: for file, data in test_files: - zh.writestr(file, data) + with zh.open(file, 'w', force_zip64=force_zip64) as fh: + fh.write(data) # do the removal and check the result with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh: @@ -1550,30 +1552,13 @@ def test_verify(self): def test_zip64(self): """Test if members use zip64.""" - file = 'datafile.txt' - file1 = 'pre.txt' - file2 = 'post.txt' - data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem' - data1 = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit' - data2 = b'Duis aute irure dolor in reprehenderit in voluptate velit esse' - with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: - with zh.open(file1, 'w', force_zip64=True) as fh: - fh.write(data1) - with zh.open(file2, 'w', force_zip64=True) as fh: - fh.write(data2) - expected_size = os.path.getsize(TESTFN) + test_files = [ + ('pre.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'), + ('datafile', b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'), + ('post.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'), + ] - with zipfile.ZipFile(TESTFN, 'w', self.compression) as zh: - with zh.open(file1, 'w', force_zip64=True) as fh: - fh.write(data1) - with zh.open(file, 'w', force_zip64=True) as fh: - fh.write(data) - with zh.open(file2, 'w', force_zip64=True) as fh: - fh.write(data2) - with zipfile.ZipFile(TESTFN, 'a', self.compression) as zh: - zh.remove(file) - self.assertIsNone(zh.testzip()) - self.assertEqual(os.path.getsize(TESTFN), expected_size) + self._test_removing_members(test_files, [1], force_zip64=True) class StoredRemoveTests(AbstractRemoveTests, unittest.TestCase): compression = zipfile.ZIP_STORED From 63d56e5bc9bbd2e1cad7d61b53fad2379c5bce5c Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Thu, 22 May 2025 06:57:48 +0800 Subject: [PATCH 06/11] Add tests for zstd --- Lib/test/test_zipfile/test_core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index ae59ae421a09bc..259bda57d99e14 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -1575,6 +1575,10 @@ class Bzip2RemoveTests(AbstractRemoveTests, unittest.TestCase): class LzmaRemoveTests(AbstractRemoveTests, unittest.TestCase): compression = zipfile.ZIP_LZMA +@requires_zstd() +class ZstdRemoveTests(AbstractRemoveTests, unittest.TestCase): + compression = zipfile.ZIP_ZSTANDARD + class PyZipFileTests(unittest.TestCase): def assertCompiledIn(self, name, namelist): From ab3a1871623238aa7288d10f90aedd7675be2822 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Thu, 22 May 2025 17:32:27 +0800 Subject: [PATCH 07/11] Remove unneeded seek - The seek will be automatically called in `ZipFile.close`. --- Lib/zipfile/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 3069e8697cf8c2..282701abe869a3 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -2022,9 +2022,6 @@ def _remove_members(self, members, *, remove_physical=True, chunk_size=2**20): self.start_dir -= entry_offset self._didModify = True - # seek to the start of the central dir - fp.seek(self.start_dir) - def _writecheck(self, zinfo): """Check for errors before writing a file to the archive.""" if zinfo.filename in self.NameToInfo: From 1c074c5c87815931bbc81da6081bdf44531c9097 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Thu, 22 May 2025 17:38:13 +0800 Subject: [PATCH 08/11] Add lock for thread safety --- Lib/zipfile/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 282701abe869a3..02e80b390eec7f 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1893,7 +1893,8 @@ def remove(self, zinfo_or_arcname): # get the info object zinfo = self.getinfo(zinfo_or_arcname) - return self._remove_members({zinfo}) + with self._lock: + return self._remove_members({zinfo}) @classmethod def _sanitize_windows_name(cls, arcname, pathsep): From 8b4f70955544ccb18351a96cedf60851f26b4b63 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 22 May 2025 12:52:38 +0000 Subject: [PATCH 09/11] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2025-05-22-12-52-35.gh-issue-51067.tJxGGF.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-12-52-35.gh-issue-51067.tJxGGF.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-12-52-35.gh-issue-51067.tJxGGF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-12-52-35.gh-issue-51067.tJxGGF.rst new file mode 100644 index 00000000000000..0502d7fed164c6 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-12-52-35.gh-issue-51067.tJxGGF.rst @@ -0,0 +1 @@ +Add `ZipFile.remove()` From 7b7f54a1cfe8c89cd2c6452c113fb815334e51cc Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Fri, 23 May 2025 22:12:47 +0800 Subject: [PATCH 10/11] Suppress duplicated name warning during generating test files --- Lib/test/test_zipfile/test_core.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 259bda57d99e14..0b457bda6b3347 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -1440,7 +1440,10 @@ def test_duplicated_arcname(self): for index in dups: indexes = [index] with self.subTest(dups=dups, remove=indexes): - self._test_removing_members(files, indexes) + import warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + self._test_removing_members(files, indexes) def test_non_physical(self): """Test underlying _remove_members() for non-physical removing.""" From c95ef5308bb958c563eeba67b5dddddb709b73f5 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Sat, 24 May 2025 18:10:51 +0800 Subject: [PATCH 11/11] Fix rst format --- .../2025-05-22-12-52-35.gh-issue-51067.tJxGGF.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-12-52-35.gh-issue-51067.tJxGGF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-12-52-35.gh-issue-51067.tJxGGF.rst index 0502d7fed164c6..6a696828991836 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-12-52-35.gh-issue-51067.tJxGGF.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-12-52-35.gh-issue-51067.tJxGGF.rst @@ -1 +1 @@ -Add `ZipFile.remove()` +Add ``ZipFile.remove()`` pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy