bpo-34043: Optimize tarfile uncompress performance (GH-8089) · python/cpython@8d13091 (original) (raw)

Original file line number Diff line number Diff line change
@@ -513,21 +513,10 @@ def seek(self, pos=0):
513 513 raise StreamError("seeking backwards is not allowed")
514 514 return self.pos
515 515
516 -def read(self, size=None):
517 -"""Return the next size number of bytes from the stream.
518 - If size is not defined, return all bytes of the stream
519 - up to EOF.
520 - """
521 -if size is None:
522 -t = []
523 -while True:
524 -buf = self._read(self.bufsize)
525 -if not buf:
526 -break
527 -t.append(buf)
528 -buf = b"".join(t)
529 -else:
530 -buf = self._read(size)
516 +def read(self, size):
517 +"""Return the next size number of bytes from the stream."""
518 +assert size is not None
519 +buf = self._read(size)
531 520 self.pos += len(buf)
532 521 return buf
533 522
@@ -540,9 +529,14 @@ def _read(self, size):
540 529 c = len(self.dbuf)
541 530 t = [self.dbuf]
542 531 while c < size:
543 -buf = self.__read(self.bufsize)
544 -if not buf:
545 -break
532 +# Skip underlying buffer to avoid unaligned double buffering.
533 +if self.buf:
534 +buf = self.buf
535 +self.buf = b""
536 +else:
537 +buf = self.fileobj.read(self.bufsize)
538 +if not buf:
539 +break
546 540 try:
547 541 buf = self.cmp.decompress(buf)
548 542 except self.exception: