bpo-32503: Avoid creating too small frames in pickles. (#5127) · python/cpython@1211c9a (original) (raw)

`@@ -2037,6 +2037,7 @@ def test_setitems_on_non_dicts(self):

`

2037

2037

``

2038

2038

`# Exercise framing (proto >= 4) for significant workloads

`

2039

2039

``

``

2040

`+

FRAME_SIZE_MIN = 4

`

2040

2041

`FRAME_SIZE_TARGET = 64 * 1024

`

2041

2042

``

2042

2043

`def check_frame_opcodes(self, pickled):

`

`@@ -2047,36 +2048,43 @@ def check_frame_opcodes(self, pickled):

`

2047

2048

` framed by default and are therefore considered a frame by themselves in

`

2048

2049

` the following consistency check.

`

2049

2050

` """

`

2050

``

`-

last_arg = last_pos = last_frame_opcode_size = None

`

2051

``

`-

frameless_opcode_sizes = {

`

2052

``

`-

'BINBYTES': 5,

`

2053

``

`-

'BINUNICODE': 5,

`

2054

``

`-

'BINBYTES8': 9,

`

2055

``

`-

'BINUNICODE8': 9,

`

2056

``

`-

}

`

``

2051

`+

frame_end = frameless_start = None

`

``

2052

`+

frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8', 'BINUNICODE8'}

`

2057

2053

`for op, arg, pos in pickletools.genops(pickled):

`

2058

``

`-

if op.name in frameless_opcode_sizes:

`

2059

``

`-

if len(arg) > self.FRAME_SIZE_TARGET:

`

2060

``

`-

frame_opcode_size = frameless_opcode_sizes[op.name]

`

2061

``

`-

arg = len(arg)

`

2062

``

`-

else:

`

2063

``

`-

continue

`

2064

``

`-

elif op.name == 'FRAME':

`

2065

``

`-

frame_opcode_size = 9

`

2066

``

`-

else:

`

2067

``

`-

continue

`

2068

``

-

2069

``

`-

if last_pos is not None:

`

2070

``

`-

The previous frame's size should be equal to the number

`

2071

``

`-

of bytes up to the current frame.

`

2072

``

`-

frame_size = pos - last_pos - last_frame_opcode_size

`

2073

``

`-

self.assertEqual(frame_size, last_arg)

`

2074

``

`-

last_arg, last_pos = arg, pos

`

2075

``

`-

last_frame_opcode_size = frame_opcode_size

`

2076

``

`-

The last frame's size should be equal to the number of bytes up

`

2077

``

`-

to the pickle's end.

`

2078

``

`-

frame_size = len(pickled) - last_pos - last_frame_opcode_size

`

2079

``

`-

self.assertEqual(frame_size, last_arg)

`

``

2054

`+

if frame_end is not None:

`

``

2055

`+

self.assertLessEqual(pos, frame_end)

`

``

2056

`+

if pos == frame_end:

`

``

2057

`+

frame_end = None

`

``

2058

+

``

2059

`+

if frame_end is not None: # framed

`

``

2060

`+

self.assertNotEqual(op.name, 'FRAME')

`

``

2061

`+

if op.name in frameless_opcodes:

`

``

2062

`+

Only short bytes and str objects should be written

`

``

2063

`+

in a frame

`

``

2064

`+

self.assertLessEqual(len(arg), self.FRAME_SIZE_TARGET)

`

``

2065

+

``

2066

`+

else: # not framed

`

``

2067

`+

if (op.name == 'FRAME' or

`

``

2068

`+

(op.name in frameless_opcodes and

`

``

2069

`+

len(arg) > self.FRAME_SIZE_TARGET)):

`

``

2070

`+

Frame or large bytes or str object

`

``

2071

`+

if frameless_start is not None:

`

``

2072

`+

Only short data should be written outside of a frame

`

``

2073

`+

self.assertLess(pos - frameless_start,

`

``

2074

`+

self.FRAME_SIZE_MIN)

`

``

2075

`+

frameless_start = None

`

``

2076

`+

elif frameless_start is None and op.name != 'PROTO':

`

``

2077

`+

frameless_start = pos

`

``

2078

+

``

2079

`+

if op.name == 'FRAME':

`

``

2080

`+

self.assertGreaterEqual(arg, self.FRAME_SIZE_MIN)

`

``

2081

`+

frame_end = pos + 9 + arg

`

``

2082

+

``

2083

`+

pos = len(pickled)

`

``

2084

`+

if frame_end is not None:

`

``

2085

`+

self.assertEqual(frame_end, pos)

`

``

2086

`+

elif frameless_start is not None:

`

``

2087

`+

self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)

`

2080

2088

``

2081

2089

`def test_framing_many_objects(self):

`

2082

2090

`obj = list(range(10**5))

`

`@@ -2095,7 +2103,8 @@ def test_framing_many_objects(self):

`

2095

2103

``

2096

2104

`def test_framing_large_objects(self):

`

2097

2105

`N = 1024 * 1024

`

2098

``

`-

obj = [b'x' * N, b'y' * N, 'z' * N]

`

``

2106

`+

small_items = [[i] for i in range(10)]

`

``

2107

`+

obj = [b'x' * N, *small_items, b'y' * N, 'z' * N]

`

2099

2108

`for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):

`

2100

2109

`for fast in [False, True]:

`

2101

2110

`with self.subTest(proto=proto, fast=fast):

`

`@@ -2119,12 +2128,9 @@ def test_framing_large_objects(self):

`

2119

2128

`# Perform full equality check if the lengths match.

`

2120

2129

`self.assertEqual(obj, unpickled)

`

2121

2130

`n_frames = count_opcode(pickle.FRAME, pickled)

`

2122

``

`-

if not fast:

`

2123

``

`-

One frame per memoize for each large object.

`

2124

``

`-

self.assertGreaterEqual(n_frames, len(obj))

`

2125

``

`-

else:

`

2126

``

`-

One frame at the beginning and one at the end.

`

2127

``

`-

self.assertGreaterEqual(n_frames, 2)

`

``

2131

`+

A single frame for small objects between

`

``

2132

`+

first two large objects.

`

``

2133

`+

self.assertEqual(n_frames, 1)

`

2128

2134

`self.check_frame_opcodes(pickled)

`

2129

2135

``

2130

2136

`def test_optional_frames(self):

`

`@@ -2152,7 +2158,9 @@ def remove_frames(pickled, keep_frame=None):

`

2152

2158

``

2153

2159

`frame_size = self.FRAME_SIZE_TARGET

`

2154

2160

`num_frames = 20

`

2155

``

`-

obj = [bytes([i]) * frame_size for i in range(num_frames)]

`

``

2161

`+

Large byte objects (dict values) intermitted with small objects

`

``

2162

`+

(dict keys)

`

``

2163

`+

obj = {i: bytes([i]) * frame_size for i in range(num_frames)}

`

2156

2164

``

2157

2165

`for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):

`

2158

2166

`pickled = self.dumps(obj, proto)

`