gh-95534: Improve gzip reading speed by 10% (#97664) · python/cpython@eae7dad (original) (raw)
`@@ -944,6 +944,173 @@ def choose_lines(source, number, seed=None, generator=random):
`
944
944
`"""
`
945
945
``
946
946
``
``
947
`+
class ZlibDecompressorTest():
`
``
948
`+
Test adopted from test_bz2.py
`
``
949
`+
TEXT = HAMLET_SCENE
`
``
950
`+
DATA = zlib.compress(HAMLET_SCENE)
`
``
951
`+
BAD_DATA = b"Not a valid deflate block"
`
``
952
`+
def test_Constructor(self):
`
``
953
`+
self.assertRaises(TypeError, zlib._ZlibDecompressor, 42)
`
``
954
+
``
955
`+
def testDecompress(self):
`
``
956
`+
zlibd = zlib._ZlibDecompressor()
`
``
957
`+
self.assertRaises(TypeError, zlibd.decompress)
`
``
958
`+
text = zlibd.decompress(self.DATA)
`
``
959
`+
self.assertEqual(text, self.TEXT)
`
``
960
+
``
961
`+
def testDecompressChunks10(self):
`
``
962
`+
zlibd = zlib._ZlibDecompressor()
`
``
963
`+
text = b''
`
``
964
`+
n = 0
`
``
965
`+
while True:
`
``
966
`+
str = self.DATA[n*10:(n+1)*10]
`
``
967
`+
if not str:
`
``
968
`+
break
`
``
969
`+
text += zlibd.decompress(str)
`
``
970
`+
n += 1
`
``
971
`+
self.assertEqual(text, self.TEXT)
`
``
972
+
``
973
`+
def testDecompressUnusedData(self):
`
``
974
`+
zlibd = zlib._ZlibDecompressor()
`
``
975
`+
unused_data = b"this is unused data"
`
``
976
`+
text = zlibd.decompress(self.DATA+unused_data)
`
``
977
`+
self.assertEqual(text, self.TEXT)
`
``
978
`+
self.assertEqual(zlibd.unused_data, unused_data)
`
``
979
+
``
980
`+
def testEOFError(self):
`
``
981
`+
zlibd = zlib._ZlibDecompressor()
`
``
982
`+
text = zlibd.decompress(self.DATA)
`
``
983
`+
self.assertRaises(EOFError, zlibd.decompress, b"anything")
`
``
984
`+
self.assertRaises(EOFError, zlibd.decompress, b"")
`
``
985
+
``
986
`+
@support.skip_if_pgo_task
`
``
987
`+
@bigmemtest(size=_4G + 100, memuse=3.3)
`
``
988
`+
def testDecompress4G(self, size):
`
``
989
`+
"Test zlib._ZlibDecompressor.decompress() with >4GiB input"
`
``
990
`+
blocksize = 10 * 1024 * 1024
`
``
991
`+
block = random.randbytes(blocksize)
`
``
992
`+
try:
`
``
993
`+
data = block * (size // blocksize + 1)
`
``
994
`+
compressed = zlib.compress(data)
`
``
995
`+
zlibd = zlib._ZlibDecompressor()
`
``
996
`+
decompressed = zlibd.decompress(compressed)
`
``
997
`+
self.assertTrue(decompressed == data)
`
``
998
`+
finally:
`
``
999
`+
data = None
`
``
1000
`+
compressed = None
`
``
1001
`+
decompressed = None
`
``
1002
+
``
1003
`+
def testPickle(self):
`
``
1004
`+
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
`
``
1005
`+
with self.assertRaises(TypeError):
`
``
1006
`+
pickle.dumps(zlib._ZlibDecompressor(), proto)
`
``
1007
+
``
1008
`+
def testDecompressorChunksMaxsize(self):
`
``
1009
`+
zlibd = zlib._ZlibDecompressor()
`
``
1010
`+
max_length = 100
`
``
1011
`+
out = []
`
``
1012
+
``
1013
`+
Feed some input
`
``
1014
`+
len_ = len(self.BIG_DATA) - 64
`
``
1015
`+
out.append(zlibd.decompress(self.BIG_DATA[:len_],
`
``
1016
`+
max_length=max_length))
`
``
1017
`+
self.assertFalse(zlibd.needs_input)
`
``
1018
`+
self.assertEqual(len(out[-1]), max_length)
`
``
1019
+
``
1020
`+
Retrieve more data without providing more input
`
``
1021
`+
out.append(zlibd.decompress(b'', max_length=max_length))
`
``
1022
`+
self.assertFalse(zlibd.needs_input)
`
``
1023
`+
self.assertEqual(len(out[-1]), max_length)
`
``
1024
+
``
1025
`+
Retrieve more data while providing more input
`
``
1026
`+
out.append(zlibd.decompress(self.BIG_DATA[len_:],
`
``
1027
`+
max_length=max_length))
`
``
1028
`+
self.assertLessEqual(len(out[-1]), max_length)
`
``
1029
+
``
1030
`+
Retrieve remaining uncompressed data
`
``
1031
`+
while not zlibd.eof:
`
``
1032
`+
out.append(zlibd.decompress(b'', max_length=max_length))
`
``
1033
`+
self.assertLessEqual(len(out[-1]), max_length)
`
``
1034
+
``
1035
`+
out = b"".join(out)
`
``
1036
`+
self.assertEqual(out, self.BIG_TEXT)
`
``
1037
`+
self.assertEqual(zlibd.unused_data, b"")
`
``
1038
+
``
1039
`+
def test_decompressor_inputbuf_1(self):
`
``
1040
`+
Test reusing input buffer after moving existing
`
``
1041
`+
contents to beginning
`
``
1042
`+
zlibd = zlib._ZlibDecompressor()
`
``
1043
`+
out = []
`
``
1044
+
``
1045
`+
Create input buffer and fill it
`
``
1046
`+
self.assertEqual(zlibd.decompress(self.DATA[:100],
`
``
1047
`+
max_length=0), b'')
`
``
1048
+
``
1049
`+
Retrieve some results, freeing capacity at beginning
`
``
1050
`+
of input buffer
`
``
1051
`+
out.append(zlibd.decompress(b'', 2))
`
``
1052
+
``
1053
`+
Add more data that fits into input buffer after
`
``
1054
`+
moving existing data to beginning
`
``
1055
`+
out.append(zlibd.decompress(self.DATA[100:105], 15))
`
``
1056
+
``
1057
`+
Decompress rest of data
`
``
1058
`+
out.append(zlibd.decompress(self.DATA[105:]))
`
``
1059
`+
self.assertEqual(b''.join(out), self.TEXT)
`
``
1060
+
``
1061
`+
def test_decompressor_inputbuf_2(self):
`
``
1062
`+
Test reusing input buffer by appending data at the
`
``
1063
`+
end right away
`
``
1064
`+
zlibd = zlib._ZlibDecompressor()
`
``
1065
`+
out = []
`
``
1066
+
``
1067
`+
Create input buffer and empty it
`
``
1068
`+
self.assertEqual(zlibd.decompress(self.DATA[:200],
`
``
1069
`+
max_length=0), b'')
`
``
1070
`+
out.append(zlibd.decompress(b''))
`
``
1071
+
``
1072
`+
Fill buffer with new data
`
``
1073
`+
out.append(zlibd.decompress(self.DATA[200:280], 2))
`
``
1074
+
``
1075
`+
Append some more data, not enough to require resize
`
``
1076
`+
out.append(zlibd.decompress(self.DATA[280:300], 2))
`
``
1077
+
``
1078
`+
Decompress rest of data
`
``
1079
`+
out.append(zlibd.decompress(self.DATA[300:]))
`
``
1080
`+
self.assertEqual(b''.join(out), self.TEXT)
`
``
1081
+
``
1082
`+
def test_decompressor_inputbuf_3(self):
`
``
1083
`+
Test reusing input buffer after extending it
`
``
1084
+
``
1085
`+
zlibd = zlib._ZlibDecompressor()
`
``
1086
`+
out = []
`
``
1087
+
``
1088
`+
Create almost full input buffer
`
``
1089
`+
out.append(zlibd.decompress(self.DATA[:200], 5))
`
``
1090
+
``
1091
`+
Add even more data to it, requiring resize
`
``
1092
`+
out.append(zlibd.decompress(self.DATA[200:300], 5))
`
``
1093
+
``
1094
`+
Decompress rest of data
`
``
1095
`+
out.append(zlibd.decompress(self.DATA[300:]))
`
``
1096
`+
self.assertEqual(b''.join(out), self.TEXT)
`
``
1097
+
``
1098
`+
def test_failure(self):
`
``
1099
`+
zlibd = zlib._ZlibDecompressor()
`
``
1100
`+
self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30)
`
``
1101
`+
Previously, a second call could crash due to internal inconsistency
`
``
1102
`+
self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30)
`
``
1103
+
``
1104
`+
@support.refcount_test
`
``
1105
`+
def test_refleaks_in___init__(self):
`
``
1106
`+
gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount')
`
``
1107
`+
zlibd = zlib._ZlibDecompressor()
`
``
1108
`+
refs_before = gettotalrefcount()
`
``
1109
`+
for i in range(100):
`
``
1110
`+
zlibd.init()
`
``
1111
`+
self.assertAlmostEqual(gettotalrefcount() - refs_before, 0, delta=10)
`
``
1112
+
``
1113
+
947
1114
`class CustomInt:
`
948
1115
`def index(self):
`
949
1116
`return 100
`