(original) (raw)
changeset: 103534:0bac85e355b5 user: Ćukasz Langa lukasz@langa.pl date: Fri Sep 09 19:48:14 2016 -0700 files: Lib/tarfile.py Misc/NEWS description: Issue #27199: TarFile expose copyfileobj bufsize to improve throughput Patch by Jason Fried. diff -r 63ae310b60ff -r 0bac85e355b5 Lib/tarfile.py --- a/Lib/tarfile.py Fri Sep 09 19:48:47 2016 -0700 +++ b/Lib/tarfile.py Fri Sep 09 19:48:14 2016 -0700 @@ -228,21 +228,21 @@ signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) return unsigned_chksum, signed_chksum -def copyfileobj(src, dst, length=None, exception=OSError): +def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None): """Copy length bytes from fileobj src to fileobj dst. If length is None, copy the entire content. """ + bufsize = bufsize or 16 * 1024 if length == 0: return if length is None: - shutil.copyfileobj(src, dst) + shutil.copyfileobj(src, dst, bufsize) return - BUFSIZE = 16 * 1024 - blocks, remainder = divmod(length, BUFSIZE) + blocks, remainder = divmod(length, bufsize) for b in range(blocks): - buf = src.read(BUFSIZE) - if len(buf) < BUFSIZE: + buf = src.read(bufsize) + if len(buf) < bufsize: raise exception("unexpected end of data") dst.write(buf) @@ -1403,7 +1403,8 @@ def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, - errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None): + errors="surrogateescape", pax_headers=None, debug=None, + errorlevel=None, copybufsize=None): """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to read from an existing archive, 'a' to append data to an existing file or 'w' to create a new file overwriting an existing one. `mode' @@ -1459,6 +1460,7 @@ self.errorlevel = errorlevel # Init datastructures. + self.copybufsize = copybufsize self.closed = False self.members = [] # list of members as TarInfo objects self._loaded = False # flag if all members have been read @@ -1558,7 +1560,7 @@ saved_pos = fileobj.tell() try: return func(name, "r", fileobj, **kwargs) - except (ReadError, CompressionError) as e: + except (ReadError, CompressionError): if fileobj is not None: fileobj.seek(saved_pos) continue @@ -1963,10 +1965,10 @@ buf = tarinfo.tobuf(self.format, self.encoding, self.errors) self.fileobj.write(buf) self.offset += len(buf) - + bufsize=self.copybufsize # If there's data to follow, append it. if fileobj is not None: - copyfileobj(fileobj, self.fileobj, tarinfo.size) + copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize) blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) if remainder > 0: self.fileobj.write(NUL * (BLOCKSIZE - remainder)) @@ -2148,15 +2150,16 @@ """ source = self.fileobj source.seek(tarinfo.offset_data) + bufsize = self.copybufsize with bltn_open(targetpath, "wb") as target: if tarinfo.sparse is not None: for offset, size in tarinfo.sparse: target.seek(offset) - copyfileobj(source, target, size, ReadError) + copyfileobj(source, target, size, ReadError, bufsize) target.seek(tarinfo.size) target.truncate() else: - copyfileobj(source, target, tarinfo.size, ReadError) + copyfileobj(source, target, tarinfo.size, ReadError, bufsize) def makeunknown(self, tarinfo, targetpath): """Make a file from a TarInfo object with an unknown type @@ -2235,7 +2238,7 @@ os.lchown(targetpath, u, g) else: os.chown(targetpath, u, g) - except OSError as e: + except OSError: raise ExtractError("could not change owner") def chmod(self, tarinfo, targetpath): @@ -2244,7 +2247,7 @@ if hasattr(os, 'chmod'): try: os.chmod(targetpath, tarinfo.mode) - except OSError as e: + except OSError: raise ExtractError("could not change mode") def utime(self, tarinfo, targetpath): @@ -2254,7 +2257,7 @@ return try: os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) - except OSError as e: + except OSError: raise ExtractError("could not change modification time") #-------------------------------------------------------------------------- diff -r 63ae310b60ff -r 0bac85e355b5 Misc/NEWS --- a/Misc/NEWS Fri Sep 09 19:48:47 2016 -0700 +++ b/Misc/NEWS Fri Sep 09 19:48:14 2016 -0700 @@ -10,6 +10,9 @@ Core and Builtins ----------------- +- Issue #27199: In tarfile, expose copyfileobj bufsize to improve throughput. + Patch by Jason Fried. + - Issue #27948: In f-strings, only allow backslashes inside the braces (where the expressions are). This is a breaking change from the 3.6 alpha releases, where backslashes are allowed anywhere in an /lukasz@langa.pl