bpo-32503: Avoid creating too small frames in pickles. (#5127) · python/cpython@1211c9a (original) (raw)
`@@ -2037,6 +2037,7 @@ def test_setitems_on_non_dicts(self):
`
2037
2037
``
2038
2038
`# Exercise framing (proto >= 4) for significant workloads
`
2039
2039
``
``
2040
`+
FRAME_SIZE_MIN = 4
`
2040
2041
`FRAME_SIZE_TARGET = 64 * 1024
`
2041
2042
``
2042
2043
`def check_frame_opcodes(self, pickled):
`
`@@ -2047,36 +2048,43 @@ def check_frame_opcodes(self, pickled):
`
2047
2048
` framed by default and are therefore considered a frame by themselves in
`
2048
2049
` the following consistency check.
`
2049
2050
` """
`
2050
``
`-
last_arg = last_pos = last_frame_opcode_size = None
`
2051
``
`-
frameless_opcode_sizes = {
`
2052
``
`-
'BINBYTES': 5,
`
2053
``
`-
'BINUNICODE': 5,
`
2054
``
`-
'BINBYTES8': 9,
`
2055
``
`-
'BINUNICODE8': 9,
`
2056
``
`-
}
`
``
2051
`+
frame_end = frameless_start = None
`
``
2052
`+
frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8', 'BINUNICODE8'}
`
2057
2053
`for op, arg, pos in pickletools.genops(pickled):
`
2058
``
`-
if op.name in frameless_opcode_sizes:
`
2059
``
`-
if len(arg) > self.FRAME_SIZE_TARGET:
`
2060
``
`-
frame_opcode_size = frameless_opcode_sizes[op.name]
`
2061
``
`-
arg = len(arg)
`
2062
``
`-
else:
`
2063
``
`-
continue
`
2064
``
`-
elif op.name == 'FRAME':
`
2065
``
`-
frame_opcode_size = 9
`
2066
``
`-
else:
`
2067
``
`-
continue
`
2068
``
-
2069
``
`-
if last_pos is not None:
`
2070
``
`-
The previous frame's size should be equal to the number
`
2071
``
`-
of bytes up to the current frame.
`
2072
``
`-
frame_size = pos - last_pos - last_frame_opcode_size
`
2073
``
`-
self.assertEqual(frame_size, last_arg)
`
2074
``
`-
last_arg, last_pos = arg, pos
`
2075
``
`-
last_frame_opcode_size = frame_opcode_size
`
2076
``
`-
The last frame's size should be equal to the number of bytes up
`
2077
``
`-
to the pickle's end.
`
2078
``
`-
frame_size = len(pickled) - last_pos - last_frame_opcode_size
`
2079
``
`-
self.assertEqual(frame_size, last_arg)
`
``
2054
`+
if frame_end is not None:
`
``
2055
`+
self.assertLessEqual(pos, frame_end)
`
``
2056
`+
if pos == frame_end:
`
``
2057
`+
frame_end = None
`
``
2058
+
``
2059
`+
if frame_end is not None: # framed
`
``
2060
`+
self.assertNotEqual(op.name, 'FRAME')
`
``
2061
`+
if op.name in frameless_opcodes:
`
``
2062
`+
Only short bytes and str objects should be written
`
``
2063
`+
in a frame
`
``
2064
`+
self.assertLessEqual(len(arg), self.FRAME_SIZE_TARGET)
`
``
2065
+
``
2066
`+
else: # not framed
`
``
2067
`+
if (op.name == 'FRAME' or
`
``
2068
`+
(op.name in frameless_opcodes and
`
``
2069
`+
len(arg) > self.FRAME_SIZE_TARGET)):
`
``
2070
`+
Frame or large bytes or str object
`
``
2071
`+
if frameless_start is not None:
`
``
2072
`+
Only short data should be written outside of a frame
`
``
2073
`+
self.assertLess(pos - frameless_start,
`
``
2074
`+
self.FRAME_SIZE_MIN)
`
``
2075
`+
frameless_start = None
`
``
2076
`+
elif frameless_start is None and op.name != 'PROTO':
`
``
2077
`+
frameless_start = pos
`
``
2078
+
``
2079
`+
if op.name == 'FRAME':
`
``
2080
`+
self.assertGreaterEqual(arg, self.FRAME_SIZE_MIN)
`
``
2081
`+
frame_end = pos + 9 + arg
`
``
2082
+
``
2083
`+
pos = len(pickled)
`
``
2084
`+
if frame_end is not None:
`
``
2085
`+
self.assertEqual(frame_end, pos)
`
``
2086
`+
elif frameless_start is not None:
`
``
2087
`+
self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)
`
2080
2088
``
2081
2089
`def test_framing_many_objects(self):
`
2082
2090
`obj = list(range(10**5))
`
`@@ -2095,7 +2103,8 @@ def test_framing_many_objects(self):
`
2095
2103
``
2096
2104
`def test_framing_large_objects(self):
`
2097
2105
`N = 1024 * 1024
`
2098
``
`-
obj = [b'x' * N, b'y' * N, 'z' * N]
`
``
2106
`+
small_items = [[i] for i in range(10)]
`
``
2107
`+
obj = [b'x' * N, *small_items, b'y' * N, 'z' * N]
`
2099
2108
`for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
`
2100
2109
`for fast in [False, True]:
`
2101
2110
`with self.subTest(proto=proto, fast=fast):
`
`@@ -2119,12 +2128,9 @@ def test_framing_large_objects(self):
`
2119
2128
`# Perform full equality check if the lengths match.
`
2120
2129
`self.assertEqual(obj, unpickled)
`
2121
2130
`n_frames = count_opcode(pickle.FRAME, pickled)
`
2122
``
`-
if not fast:
`
2123
``
`-
One frame per memoize for each large object.
`
2124
``
`-
self.assertGreaterEqual(n_frames, len(obj))
`
2125
``
`-
else:
`
2126
``
`-
One frame at the beginning and one at the end.
`
2127
``
`-
self.assertGreaterEqual(n_frames, 2)
`
``
2131
`+
A single frame for small objects between
`
``
2132
`+
first two large objects.
`
``
2133
`+
self.assertEqual(n_frames, 1)
`
2128
2134
`self.check_frame_opcodes(pickled)
`
2129
2135
``
2130
2136
`def test_optional_frames(self):
`
`@@ -2152,7 +2158,9 @@ def remove_frames(pickled, keep_frame=None):
`
2152
2158
``
2153
2159
`frame_size = self.FRAME_SIZE_TARGET
`
2154
2160
`num_frames = 20
`
2155
``
`-
obj = [bytes([i]) * frame_size for i in range(num_frames)]
`
``
2161
`+
Large byte objects (dict values) intermitted with small objects
`
``
2162
`+
(dict keys)
`
``
2163
`+
obj = {i: bytes([i]) * frame_size for i in range(num_frames)}
`
2156
2164
``
2157
2165
`for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
`
2158
2166
`pickled = self.dumps(obj, proto)
`