cpython: 334c01aa7f93 (original) (raw)
Mercurial > cpython
changeset 93711:334c01aa7f93
Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects returned by ZipFile.open() can now operate independently of the ZipFile even if the ZipFile was created by passing in a file-like object as the first argument to the constructor. [#14099]
Serhiy Storchaka storchaka@gmail.com | |
---|---|
date | Wed, 03 Dec 2014 09:14:36 +0200 |
parents | fd80195b920f(current diff)e5bb3044402b(diff) |
children | 7509a0607c40 |
files | Lib/test/test_zipfile.py Lib/zipfile.py Misc/NEWS |
diffstat | 4 files changed, 166 insertions(+), 73 deletions(-)[+] [-] Doc/library/zipfile.rst 10 Lib/test/test_zipfile.py 121 Lib/zipfile.py 103 Misc/NEWS 5 |
line wrap: on
line diff
--- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -219,14 +219,8 @@ ZipFile Objects .. note::
If the ZipFile was created by passing in a file-like object as the first[](#l1.7)
argument to the constructor, then the object returned by :meth:`.open` shares the[](#l1.8)
ZipFile's file pointer. Under these circumstances, the object returned by[](#l1.9)
:meth:`.open` should not be used after any additional operations are performed[](#l1.10)
on the ZipFile object. If the ZipFile was created by passing in a string (the[](#l1.11)
filename) as the first argument to the constructor, then :meth:`.open` will[](#l1.12)
create a new file object that will be held by the ZipExtFile, allowing it to[](#l1.13)
operate independently of the ZipFile.[](#l1.14)
Objects returned by :meth:`.open` can operate independently of the[](#l1.15)
ZipFile.[](#l1.16)
--- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -1,3 +1,4 @@ +import contextlib import io import os import sys @@ -25,6 +26,9 @@ SMALL_TEST_DATA = [('_ziptest1', '1q2w3e ('ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'), ('ziptest2dir/ziptest3dir/ziptest4dir/_ziptest3', '6y7u8i9o0p')] +def getrandbytes(size):
+ def get_files(test): yield TESTFN2 with TemporaryFile() as f: @@ -289,7 +293,7 @@ class AbstractTestsWithSourceFile: # than requested. for test_size in (1, 4095, 4096, 4097, 16384): file_size = test_size + 1
junk = getrandbits(8 * file_size).to_bytes(file_size, 'little')[](#l2.22)
junk = getrandbytes(file_size)[](#l2.23) with zipfile.ZipFile(io.BytesIO(), "w", self.compression) as zipf:[](#l2.24) zipf.writestr('foo', junk)[](#l2.25) with zipf.open('foo', 'r') as fp:[](#l2.26)
@@ -1666,46 +1670,111 @@ class LzmaTestsWithRandomBinaryFiles(Abs @requires_zlib class TestsWithMultipleOpens(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
cls.data1 = b'111' + getrandbytes(10000)[](#l2.34)
cls.data2 = b'222' + getrandbytes(10000)[](#l2.35)
with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_DEFLATED) as zipfp:[](#l2.39)
zipfp.writestr('ones', '1'*FIXEDTEST_SIZE)[](#l2.40)
zipfp.writestr('twos', '2'*FIXEDTEST_SIZE)[](#l2.41)
with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipfp:[](#l2.42)
zipfp.writestr('ones', self.data1)[](#l2.43)
zipfp.writestr('twos', self.data2)[](#l2.44)
def test_same_file(self): # Verify that (when the ZipFile is in control of creating file objects) # multiple open() calls can be made without interfering with each other.
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:[](#l2.49)
with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2:[](#l2.50)
data1 = zopen1.read(500)[](#l2.51)
data2 = zopen2.read(500)[](#l2.52)
data1 += zopen1.read(500)[](#l2.53)
data2 += zopen2.read(500)[](#l2.54)
self.assertEqual(data1, data2)[](#l2.55)
for f in get_files(self):[](#l2.56)
self.make_test_archive(f)[](#l2.57)
with zipfile.ZipFile(f, mode="r") as zipf:[](#l2.58)
with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2:[](#l2.59)
data1 = zopen1.read(500)[](#l2.60)
data2 = zopen2.read(500)[](#l2.61)
data1 += zopen1.read()[](#l2.62)
data2 += zopen2.read()[](#l2.63)
self.assertEqual(data1, data2)[](#l2.64)
self.assertEqual(data1, self.data1)[](#l2.65)
def test_different_file(self): # Verify that (when the ZipFile is in control of creating file objects) # multiple open() calls can be made without interfering with each other.
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:[](#l2.70)
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:[](#l2.71)
data1 = zopen1.read(500)[](#l2.72)
data2 = zopen2.read(500)[](#l2.73)
data1 += zopen1.read(500)[](#l2.74)
data2 += zopen2.read(500)[](#l2.75)
self.assertEqual(data1, b'1'*FIXEDTEST_SIZE)[](#l2.76)
self.assertEqual(data2, b'2'*FIXEDTEST_SIZE)[](#l2.77)
for f in get_files(self):[](#l2.78)
self.make_test_archive(f)[](#l2.79)
with zipfile.ZipFile(f, mode="r") as zipf:[](#l2.80)
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:[](#l2.81)
data1 = zopen1.read(500)[](#l2.82)
data2 = zopen2.read(500)[](#l2.83)
data1 += zopen1.read()[](#l2.84)
data2 += zopen2.read()[](#l2.85)
self.assertEqual(data1, self.data1)[](#l2.86)
self.assertEqual(data2, self.data2)[](#l2.87)
def test_interleaved(self): # Verify that (when the ZipFile is in control of creating file objects) # multiple open() calls can be made without interfering with each other.
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:[](#l2.92)
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:[](#l2.93)
for f in get_files(self):[](#l2.94)
self.make_test_archive(f)[](#l2.95)
with zipfile.ZipFile(f, mode="r") as zipf:[](#l2.96)
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:[](#l2.97)
data1 = zopen1.read(500)[](#l2.98)
data2 = zopen2.read(500)[](#l2.99)
data1 += zopen1.read()[](#l2.100)
data2 += zopen2.read()[](#l2.101)
self.assertEqual(data1, self.data1)[](#l2.102)
self.assertEqual(data2, self.data2)[](#l2.103)
- def test_read_after_close(self):
for f in get_files(self):[](#l2.106)
self.make_test_archive(f)[](#l2.107)
with contextlib.ExitStack() as stack:[](#l2.108)
with zipfile.ZipFile(f, 'r') as zipf:[](#l2.109)
zopen1 = stack.enter_context(zipf.open('ones'))[](#l2.110)
zopen2 = stack.enter_context(zipf.open('twos'))[](#l2.111) data1 = zopen1.read(500)[](#l2.112) data2 = zopen2.read(500)[](#l2.113)
data1 += zopen1.read(500)[](#l2.114)
data2 += zopen2.read(500)[](#l2.115)
self.assertEqual(data1, b'1'*FIXEDTEST_SIZE)[](#l2.116)
self.assertEqual(data2, b'2'*FIXEDTEST_SIZE)[](#l2.117)
data1 += zopen1.read()[](#l2.118)
data2 += zopen2.read()[](#l2.119)
self.assertEqual(data1, self.data1)[](#l2.120)
self.assertEqual(data2, self.data2)[](#l2.121)
- def test_read_after_write(self):
for f in get_files(self):[](#l2.124)
with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zipf:[](#l2.125)
zipf.writestr('ones', self.data1)[](#l2.126)
zipf.writestr('twos', self.data2)[](#l2.127)
with zipf.open('ones') as zopen1:[](#l2.128)
data1 = zopen1.read(500)[](#l2.129)
self.assertEqual(data1, self.data1[:500])[](#l2.130)
with zipfile.ZipFile(f, 'r') as zipf:[](#l2.131)
data1 = zipf.read('ones')[](#l2.132)
data2 = zipf.read('twos')[](#l2.133)
self.assertEqual(data1, self.data1)[](#l2.134)
self.assertEqual(data2, self.data2)[](#l2.135)
- def test_write_after_read(self):
for f in get_files(self):[](#l2.138)
with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipf:[](#l2.139)
zipf.writestr('ones', self.data1)[](#l2.140)
with zipf.open('ones') as zopen1:[](#l2.141)
zopen1.read(500)[](#l2.142)
zipf.writestr('twos', self.data2)[](#l2.143)
with zipfile.ZipFile(f, 'r') as zipf:[](#l2.144)
data1 = zipf.read('ones')[](#l2.145)
data2 = zipf.read('twos')[](#l2.146)
self.assertEqual(data1, self.data1)[](#l2.147)
self.assertEqual(data2, self.data2)[](#l2.148)
- def test_many_opens(self):
# Verify that read() and open() promptly close the file descriptor,[](#l2.151)
# and don't rely on the garbage collector to free resources.[](#l2.152)
self.make_test_archive(TESTFN2)[](#l2.153)
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:[](#l2.154)
for x in range(100):[](#l2.155)
zipf.read('ones')[](#l2.156)
with zipf.open('ones') as zopen1:[](#l2.157)
pass[](#l2.158)
with open(os.devnull) as f:[](#l2.159)
self.assertLess(f.fileno(), 100)[](#l2.160)
def tearDown(self): unlink(TESTFN2)
--- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -646,6 +646,25 @@ def _get_decompressor(compress_type): raise NotImplementedError("compression type %d" % (compress_type,)) +class _SharedFile:
- def init(self, file, pos, close):
self._file = file[](#l3.9)
self._pos = pos[](#l3.10)
self._close = close[](#l3.11)
- def read(self, n=-1):
self._file.seek(self._pos)[](#l3.14)
data = self._file.read(n)[](#l3.15)
self._pos = self._file.tell()[](#l3.16)
return data[](#l3.17)
- def close(self):
if self._file is not None:[](#l3.20)
fileobj = self._file[](#l3.21)
self._file = None[](#l3.22)
self._close(fileobj)[](#l3.23)
+ + class ZipExtFile(io.BufferedIOBase): """File-like object for reading an archive member. Is returned by ZipFile.open(). @@ -945,7 +964,7 @@ class ZipFile: self.NameToInfo = {} # Find file info given name self.filelist = [] # List of ZipInfo instances for archive self.compression = compression # Method of compression
self.mode = key = mode.replace('b', '')[0][](#l3.33)
self.mode = mode[](#l3.34) self.pwd = None[](#l3.35) self._comment = b''[](#l3.36)
@@ -954,28 +973,33 @@ class ZipFile: # No, it's a filename self._filePassed = 0 self.filename = file
modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}[](#l3.42)
try:[](#l3.43)
self.fp = io.open(file, modeDict[mode])[](#l3.44)
except OSError:[](#l3.45)
if mode == 'a':[](#l3.46)
mode = key = 'w'[](#l3.47)
self.fp = io.open(file, modeDict[mode])[](#l3.48)
else:[](#l3.49)
modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',[](#l3.50)
'r+b': 'w+b', 'w+b': 'wb'}[](#l3.51)
filemode = modeDict[mode][](#l3.52)
while True:[](#l3.53)
try:[](#l3.54)
self.fp = io.open(file, filemode)[](#l3.55)
except OSError:[](#l3.56)
if filemode in modeDict:[](#l3.57)
filemode = modeDict[filemode][](#l3.58)
continue[](#l3.59) raise[](#l3.60)
break[](#l3.61) else:[](#l3.62) self._filePassed = 1[](#l3.63) self.fp = file[](#l3.64) self.filename = getattr(file, 'name', None)[](#l3.65)
self._fileRefCnt = 1[](#l3.66)
if key == 'r':[](#l3.69)
if mode == 'r':[](#l3.70) self._RealGetContents()[](#l3.71)
elif key == 'w':[](#l3.72)
elif mode == 'w':[](#l3.73) # set the modified flag so central directory gets written[](#l3.74) # even if no files are added to the archive[](#l3.75) self._didModify = True[](#l3.76)
elif key == 'a':[](#l3.77)
self.start_dir = 0[](#l3.78)
elif mode == 'a':[](#l3.79) try:[](#l3.80) # See if file is a zip file[](#l3.81) self._RealGetContents()[](#l3.82)
@@ -988,13 +1012,13 @@ class ZipFile: # set the modified flag so central directory gets written # even if no files are added to the archive self._didModify = True
self.start_dir = self.fp.tell()[](#l3.87) else:[](#l3.88) raise RuntimeError('Mode must be "r", "w" or "a"')[](#l3.89) except:[](#l3.90) fp = self.fp[](#l3.91) self.fp = None[](#l3.92)
if not self._filePassed:[](#l3.93)
fp.close()[](#l3.94)
self._fpclose(fp)[](#l3.95) raise[](#l3.96)
def enter(self): @@ -1181,23 +1205,17 @@ class ZipFile: raise RuntimeError( "Attempt to read ZIP archive that was already closed")
# Only open a new file for instances where we were not[](#l3.103)
# given a file object in the constructor[](#l3.104)
if self._filePassed:[](#l3.105)
zef_file = self.fp[](#l3.106)
# Make sure we have an info object[](#l3.107)
if isinstance(name, ZipInfo):[](#l3.108)
# 'name' is already an info object[](#l3.109)
zinfo = name[](#l3.110) else:[](#l3.111)
zef_file = io.open(self.filename, 'rb')[](#l3.112)
# Get info object for name[](#l3.113)
zinfo = self.getinfo(name)[](#l3.114)
self._fileRefCnt += 1[](#l3.116)
zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose)[](#l3.117) try:[](#l3.118)
# Make sure we have an info object[](#l3.119)
if isinstance(name, ZipInfo):[](#l3.120)
# 'name' is already an info object[](#l3.121)
zinfo = name[](#l3.122)
else:[](#l3.123)
# Get info object for name[](#l3.124)
zinfo = self.getinfo(name)[](#l3.125)
zef_file.seek(zinfo.header_offset, 0)[](#l3.126)
- # Skip the file header: fheader = zef_file.read(sizeFileHeader) if len(fheader) != sizeFileHeader: @@ -1256,11 +1274,9 @@ class ZipFile: if h[11] != check_byte: raise RuntimeError("Bad password for file", name)
return ZipExtFile(zef_file, mode, zinfo, zd,[](#l3.135)
close_fileobj=not self._filePassed)[](#l3.136)
return ZipExtFile(zef_file, mode, zinfo, zd, True)[](#l3.137) except:[](#l3.138)
if not self._filePassed:[](#l3.139)
zef_file.close()[](#l3.140)
zef_file.close()[](#l3.141) raise[](#l3.142)
def extract(self, member, path=None, pwd=None): @@ -1394,6 +1410,7 @@ class ZipFile: zinfo.file_size = st.st_size zinfo.flag_bits = 0x00
self.fp.seek(self.start_dir, 0)[](#l3.149) zinfo.header_offset = self.fp.tell() # Start of header bytes[](#l3.150) if zinfo.compress_type == ZIP_LZMA:[](#l3.151) # Compressed data includes an end-of-stream (EOS) marker[](#l3.152)
@@ -1410,6 +1427,7 @@ class ZipFile: self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo self.fp.write(zinfo.FileHeader(False))
self.start_dir = self.fp.tell()[](#l3.157) return[](#l3.158)
cmpr = _get_compressor(zinfo.compress_type) @@ -1448,10 +1466,10 @@ class ZipFile: raise RuntimeError('Compressed size larger than uncompressed size') # Seek backwards and write file header (which will now include # correct CRC and file sizes)
position = self.fp.tell() # Preserve current position in file[](#l3.165)
self.start_dir = self.fp.tell() # Preserve current position in file[](#l3.166) self.fp.seek(zinfo.header_offset, 0)[](#l3.167) self.fp.write(zinfo.FileHeader(zip64))[](#l3.168)
self.fp.seek(position, 0)[](#l3.169)
self.fp.seek(self.start_dir, 0)[](#l3.170) self.filelist.append(zinfo)[](#l3.171) self.NameToInfo[zinfo.filename] = zinfo[](#l3.172)
@@ -1480,6 +1498,7 @@ class ZipFile: "Attempt to write to ZIP archive that was already closed") zinfo.file_size = len(data) # Uncompressed size
self.fp.seek(self.start_dir, 0)[](#l3.178) zinfo.header_offset = self.fp.tell() # Start of header data[](#l3.179) if compress_type is not None:[](#l3.180) zinfo.compress_type = compress_type[](#l3.181)
@@ -1508,6 +1527,7 @@ class ZipFile: self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, zinfo.file_size)) self.fp.flush()
self.start_dir = self.fp.tell()[](#l3.186) self.filelist.append(zinfo)[](#l3.187) self.NameToInfo[zinfo.filename] = zinfo[](#l3.188)
@@ -1523,7 +1543,7 @@ class ZipFile: try: if self.mode in ("w", "a") and self._didModify: # write ending records
pos1 = self.fp.tell()[](#l3.194)
self.fp.seek(self.start_dir, 0)[](#l3.195) for zinfo in self.filelist: # write central directory[](#l3.196) dt = zinfo.date_time[](#l3.197) dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2][](#l3.198)
@@ -1589,8 +1609,8 @@ class ZipFile: pos2 = self.fp.tell() # Write end-of-zip-archive record centDirCount = len(self.filelist)
centDirSize = pos2 - pos1[](#l3.203)
centDirOffset = pos1[](#l3.204)
centDirSize = pos2 - self.start_dir[](#l3.205)
centDirOffset = self.start_dir[](#l3.206) requires_zip64 = None[](#l3.207) if centDirCount > ZIP_FILECOUNT_LIMIT:[](#l3.208) requires_zip64 = "Files count"[](#l3.209)
@@ -1626,8 +1646,13 @@ class ZipFile: finally: fp = self.fp self.fp = None
if not self._filePassed:[](#l3.214)
fp.close()[](#l3.215)
self._fpclose(fp)[](#l3.216)
- def _fpclose(self, fp):
assert self._fileRefCnt > 0[](#l3.219)
self._fileRefCnt -= 1[](#l3.220)
if not self._fileRefCnt and not self._filePassed:[](#l3.221)
fp.close()[](#l3.222)
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -194,6 +194,11 @@ Core and Builtins Library ------- +- Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects