(original) (raw)
changeset: 104353:9a99a88301ef branch: 2.7 parent: 104346:47d5bf5a846f user: Serhiy Storchaka storchaka@gmail.com date: Fri Oct 07 23:12:53 2016 +0300 files: Lib/test/test_zipfile.py Lib/zipfile.py Misc/NEWS description: Issue #26293: Fixed writing ZIP files that starts not from the start of the file. Offsets in ZIP file now are relative to the start of the archive in conforming to the specification. diff -r 47d5bf5a846f -r 9a99a88301ef Lib/test/test_zipfile.py --- a/Lib/test/test_zipfile.py Fri Oct 07 21:51:09 2016 +0300 +++ b/Lib/test/test_zipfile.py Fri Oct 07 23:12:53 2016 +0300 @@ -344,6 +344,49 @@ f.seek(len(data)) with zipfile.ZipFile(f, "r") as zipfp: self.assertEqual(zipfp.namelist(), [TESTFN]) + self.assertEqual(zipfp.read(TESTFN), self.data) + with open(TESTFN2, 'rb') as f: + self.assertEqual(f.read(len(data)), data) + zipfiledata = f.read() + with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN]) + self.assertEqual(zipfp.read(TESTFN), self.data) + + def test_read_concatenated_zip_file(self): + with io.BytesIO() as bio: + with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp: + zipfp.write(TESTFN, TESTFN) + zipfiledata = bio.getvalue() + data = b'I am not a ZipFile!'*10 + with open(TESTFN2, 'wb') as f: + f.write(data) + f.write(zipfiledata) + + with zipfile.ZipFile(TESTFN2) as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN]) + self.assertEqual(zipfp.read(TESTFN), self.data) + + def test_append_to_concatenated_zip_file(self): + with io.BytesIO() as bio: + with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp: + zipfp.write(TESTFN, TESTFN) + zipfiledata = bio.getvalue() + data = b'I am not a ZipFile!'*1000000 + with open(TESTFN2, 'wb') as f: + f.write(data) + f.write(zipfiledata) + + with zipfile.ZipFile(TESTFN2, 'a') as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN]) + zipfp.writestr('strfile', self.data) + + with open(TESTFN2, 'rb') as f: + self.assertEqual(f.read(len(data)), data) + zipfiledata = f.read() + with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN, 'strfile']) + self.assertEqual(zipfp.read(TESTFN), self.data) + self.assertEqual(zipfp.read('strfile'), self.data) def test_ignores_newline_at_end(self): with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp: diff -r 47d5bf5a846f -r 9a99a88301ef Lib/zipfile.py --- a/Lib/zipfile.py Fri Oct 07 21:51:09 2016 +0300 +++ b/Lib/zipfile.py Fri Oct 07 23:12:53 2016 +0300 @@ -772,6 +772,7 @@ # set the modified flag so central directory gets written # even if no files are added to the archive self._didModify = True + self._start_disk = self.fp.tell() elif key == 'a': try: # See if file is a zip file @@ -785,6 +786,7 @@ # set the modified flag so central directory gets written # even if no files are added to the archive self._didModify = True + self._start_disk = self.fp.tell() else: raise RuntimeError('Mode must be "r", "w" or "a"') except: @@ -815,17 +817,18 @@ offset_cd = endrec[_ECD_OFFSET] # offset of central directory self._comment = endrec[_ECD_COMMENT] # archive comment - # "concat" is zero, unless zip was concatenated to another file - concat = endrec[_ECD_LOCATION] - size_cd - offset_cd + # self._start_disk: Position of the start of ZIP archive + # It is zero, unless ZIP was concatenated to another file + self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd if endrec[_ECD_SIGNATURE] == stringEndArchive64: # If Zip64 extension structures are present, account for them - concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) + self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator) if self.debug > 2: - inferred = concat + offset_cd - print "given, inferred, offset", offset_cd, inferred, concat + inferred = self._start_disk + offset_cd + print "given, inferred, offset", offset_cd, inferred, self._start_disk # self.start_dir: Position of start of central directory - self.start_dir = offset_cd + concat + self.start_dir = offset_cd + self._start_disk fp.seek(self.start_dir, 0) data = fp.read(size_cd) fp = cStringIO.StringIO(data) @@ -855,7 +858,7 @@ t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) x._decodeExtra() - x.header_offset = x.header_offset + concat + x.header_offset = x.header_offset + self._start_disk x.filename = x._decodeFilename() self.filelist.append(x) self.NameToInfo[x.filename] = x @@ -1198,7 +1201,7 @@ raise RuntimeError('Compressed size larger than uncompressed size') # Seek backwards and write file header (which will now include # correct CRC and file sizes) - position = self.fp.tell() # Preserve current position in file + position = self.fp.tell() # Preserve current position in file self.fp.seek(zinfo.header_offset, 0) self.fp.write(zinfo.FileHeader(zip64)) self.fp.seek(position, 0) @@ -1284,11 +1287,10 @@ file_size = zinfo.file_size compress_size = zinfo.compress_size - if zinfo.header_offset > ZIP64_LIMIT: - extra.append(zinfo.header_offset) + header_offset = zinfo.header_offset - self._start_disk + if header_offset > ZIP64_LIMIT: + extra.append(header_offset) header_offset = 0xffffffffL - else: - header_offset = zinfo.header_offset extra_data = zinfo.extra if extra: @@ -1332,7 +1334,7 @@ # Write end-of-zip-archive record centDirCount = len(self.filelist) centDirSize = pos2 - pos1 - centDirOffset = pos1 + centDirOffset = pos1 - self._start_disk requires_zip64 = None if centDirCount > ZIP_FILECOUNT_LIMIT: requires_zip64 = "Files count" diff -r 47d5bf5a846f -r 9a99a88301ef Misc/NEWS --- a/Misc/NEWS Fri Oct 07 21:51:09 2016 +0300 +++ b/Misc/NEWS Fri Oct 07 23:12:53 2016 +0300 @@ -49,6 +49,10 @@ Library ------- +- Issue #26293: Fixed writing ZIP files that starts not from the start of the + file. Offsets in ZIP file now are relative to the start of the archive in + conforming to the specification. + - Fix possible integer overflows and crashes in the mmap module with unusual usage patterns. /storchaka@gmail.com