cpython: 656c13024ede (original) (raw)
Mercurial > cpython
changeset 72756:656c13024ede
Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists. This introduces a small private API for this common pattern. The issue has been discovered thanks to Martin's huge-mem buildbot. [#12911]
Antoine Pitrou solipsis@pitrou.net | |
---|---|
date | Thu, 06 Oct 2011 19:04:12 +0200 |
parents | 9a91ab415109(current diff)f9f782f2369e(diff) |
children | e685b02ddcac |
files | Include/Python.h Makefile.pre.in Misc/NEWS Objects/listobject.c Objects/tupleobject.c PC/VC6/pythoncore.dsp PC/VS7.1/pythoncore.vcproj PCbuild/pythoncore.vcproj |
diffstat | 13 files changed, 268 insertions(+), 84 deletions(-)[+] [-] Include/Python.h 2 Include/accu.h 35 Lib/test/test_list.py 11 Lib/test/test_tuple.py 10 Makefile.pre.in 2 Misc/NEWS 3 Objects/accu.c 114 Objects/listobject.c 79 Objects/tupleobject.c 73 PC/VC6/pythoncore.dsp 4 PC/VS7.1/pythoncore.vcproj 3 PC/VS8.0/pythoncore.vcproj 8 PCbuild/pythoncore.vcproj 8 |
line wrap: on
line diff
--- a/Include/Python.h +++ b/Include/Python.h @@ -101,7 +101,7 @@ #include "warnings.h" #include "weakrefobject.h" #include "structseq.h" - +#include "accu.h" #include "codecs.h" #include "pyerrors.h"
new file mode 100644 --- /dev/null +++ b/Include/accu.h @@ -0,0 +1,35 @@ +#ifndef Py_LIMITED_API +#ifndef Py_ACCU_H +#define Py_ACCU_H + +/*** This is a private API for use by the interpreter and the stdlib.
- *** Its definition may be changed or removed at any moment.
- ***/ + +/*
- */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct {
- PyObject large; / A list of previously accumulated large strings */
- PyObject small; / Pending small strings */
+} _PyAccu; + +PyAPI_FUNC(int) _PyAccu_Init(_PyAccu *acc); +PyAPI_FUNC(int) _PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode); +PyAPI_FUNC(PyObject *) _PyAccu_FinishAsList(_PyAccu *acc); +PyAPI_FUNC(PyObject *) _PyAccu_Finish(_PyAccu *acc); +PyAPI_FUNC(void) _PyAccu_Destroy(_PyAccu acc); + +#ifdef __cplusplus +} +#endif + +#endif / Py_ACCU_H / +#endif / Py_LIMITED_API */
--- a/Lib/test/test_list.py +++ b/Lib/test/test_list.py @@ -59,6 +59,17 @@ class ListTest(list_tests.CommonTest): self.assertRaises((MemoryError, OverflowError), mul, lst, n) self.assertRaises((MemoryError, OverflowError), imul, lst, n)
- def test_repr_large(self):
# Check the repr of large list objects[](#l3.8)
def check(n):[](#l3.9)
l = [0] * n[](#l3.10)
s = repr(l)[](#l3.11)
self.assertEqual(s,[](#l3.12)
'[' + ', '.join(['0'] * n) + ']')[](#l3.13)
check(10) # check our checking code[](#l3.14)
check(1000000)[](#l3.15)
+ + def test_main(verbose=None): support.run_unittest(ListTest)
--- a/Lib/test/test_tuple.py +++ b/Lib/test/test_tuple.py @@ -154,6 +154,16 @@ class TupleTest(seq_tests.CommonTest): # Trying to untrack an unfinished tuple could crash Python self._not_tracked(tuple(gc.collect() for i in range(101)))
- def test_repr_large(self):
# Check the repr of large list objects[](#l4.8)
def check(n):[](#l4.9)
l = (0,) * n[](#l4.10)
s = repr(l)[](#l4.11)
self.assertEqual(s,[](#l4.12)
'(' + ', '.join(['0'] * n) + ')')[](#l4.13)
check(10) # check our checking code[](#l4.14)
check(1000000)[](#l4.15)
+ def test_main(): support.run_unittest(TupleTest)
--- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -342,6 +342,7 @@ PYTHON_OBJS= [](#l5.3)
Objects
OBJECT_OBJS= [](#l5.5) Objects/abstract.o [](#l5.6) + Objects/accu.o [](#l5.7) Objects/boolobject.o [](#l5.8) Objects/bytes_methods.o [](#l5.9) Objects/bytearrayobject.o [](#l5.10) @@ -661,6 +662,7 @@ Objects/typeobject.o: $(srcdir)/Objects/ PYTHON_HEADERS= [](#l5.12) Include/Python.h [](#l5.13) Include/abstract.h [](#l5.14) + Include/accu.h [](#l5.15) Include/asdl.h [](#l5.16) Include/ast.h [](#l5.17) Include/bltinmodule.h [](#l5.18)
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1? Core and Builtins ----------------- +- Issue #12911: Fix memory consumption when calculating the repr() of huge
- PEP 393: flexible string representation. Thanks to Torsten Becker for the initial implementation, and Victor Stinner for various bug fixes.
new file mode 100644 --- /dev/null +++ b/Objects/accu.c @@ -0,0 +1,114 @@ +/* Accumulator struct implementation */ + +#include "Python.h" + +static PyObject * +join_list_unicode(PyObject *lst) +{
- /* return ''.join(lst) */
- PyObject *sep, *ret;
- sep = PyUnicode_FromStringAndSize("", 0);
- ret = PyUnicode_Join(sep, lst);
- Py_DECREF(sep);
- return ret;
+} + +int +_PyAccu_Init(_PyAccu *acc) +{
- /* Lazily allocated */
- acc->large = NULL;
- acc->small = PyList_New(0);
- if (acc->small == NULL)
return -1;[](#l7.27)
- return 0;
+} + +static int +flush_accumulator(_PyAccu *acc) +{
- Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
- if (nsmall) {
int ret;[](#l7.36)
PyObject *joined;[](#l7.37)
if (acc->large == NULL) {[](#l7.38)
acc->large = PyList_New(0);[](#l7.39)
if (acc->large == NULL)[](#l7.40)
return -1;[](#l7.41)
}[](#l7.42)
joined = join_list_unicode(acc->small);[](#l7.43)
if (joined == NULL)[](#l7.44)
return -1;[](#l7.45)
if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {[](#l7.46)
Py_DECREF(joined);[](#l7.47)
return -1;[](#l7.48)
}[](#l7.49)
ret = PyList_Append(acc->large, joined);[](#l7.50)
Py_DECREF(joined);[](#l7.51)
return ret;[](#l7.52)
- }
- return 0;
+} + +int +_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode) +{
- if (PyList_Append(acc->small, unicode))
return -1;[](#l7.64)
- nsmall = PyList_GET_SIZE(acc->small);
- /* Each item in a list of unicode objects has an overhead (in 64-bit
* builds) of:[](#l7.67)
* - 8 bytes for the list slot[](#l7.68)
* - 56 bytes for the header of the unicode object[](#l7.69)
* that is, 64 bytes. 100000 such objects waste more than 6MB[](#l7.70)
* compared to a single concatenated string.[](#l7.71)
*/[](#l7.72)
- if (nsmall < 100000)
return 0;[](#l7.74)
- return flush_accumulator(acc);
+} + +PyObject * +_PyAccu_FinishAsList(_PyAccu *acc) +{
- ret = flush_accumulator(acc);
- Py_CLEAR(acc->small);
- if (ret) {
Py_CLEAR(acc->large);[](#l7.87)
return NULL;[](#l7.88)
- }
- res = acc->large;
- acc->large = NULL;
- return res;
+} + +PyObject * +_PyAccu_Finish(_PyAccu *acc) +{
- PyObject *list, *res;
- if (acc->large == NULL) {
list = acc->small;[](#l7.100)
acc->small = NULL;[](#l7.101)
- }
- else {
list = _PyAccu_FinishAsList(acc);[](#l7.104)
if (!list)[](#l7.105)
return NULL;[](#l7.106)
- }
- res = join_list_unicode(list);
- Py_DECREF(list);
- return res;
+} + +void +_PyAccu_Destroy(_PyAccu *acc) +{
--- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -321,70 +321,59 @@ static PyObject * list_repr(PyListObject *v) { Py_ssize_t i;
- if (sep == NULL) {
sep = PyUnicode_FromString(", ");[](#l8.18)
if (sep == NULL)[](#l8.19)
return NULL;[](#l8.20)
- }
i = Py_ReprEnter((PyObject*)v); if (i != 0) { return i > 0 ? PyUnicode_FromString("[...]") : NULL; }
- s = PyUnicode_FromString("[");
- if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;[](#l8.40)
- Py_CLEAR(s);
/* Do repr() on each element. Note that this may mutate the list, so must refetch the list size on each iteration. */ for (i = 0; i < Py_SIZE(v); ++i) {
int status;[](#l8.46) if (Py_EnterRecursiveCall(" while getting the repr of a list"))[](#l8.47)
goto Done;[](#l8.48)
goto error;[](#l8.49) s = PyObject_Repr(v->ob_item[i]);[](#l8.50) Py_LeaveRecursiveCall();[](#l8.51)
if (s == NULL)[](#l8.52)
goto Done;[](#l8.53)
status = PyList_Append(pieces, s);[](#l8.54)
Py_DECREF(s); /* append created a new ref */[](#l8.55)
if (status < 0)[](#l8.56)
goto Done;[](#l8.57)
if (i > 0 && _PyAccu_Accumulate(&acc, sep))[](#l8.58)
goto error;[](#l8.59)
if (s == NULL || _PyAccu_Accumulate(&acc, s))[](#l8.60)
goto error;[](#l8.61)
} -Py_CLEAR(s);[](#l8.62)
- /* Add "[]" decorations to the first and last items. */
- assert(PyList_GET_SIZE(pieces) > 0);
- s = PyUnicode_FromString("[");
- if (s == NULL)
goto Done;[](#l8.69)
- temp = PyList_GET_ITEM(pieces, 0);
- PyUnicode_AppendAndDel(&s, temp);
- PyList_SET_ITEM(pieces, 0, s);
- if (s == NULL)
goto Done;[](#l8.74)
- s = PyUnicode_FromString("]");
- if (s == NULL)
goto Done;[](#l8.78)
- temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
- PyUnicode_AppendAndDel(&temp, s);
- PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
- if (temp == NULL)
goto Done;[](#l8.83)
- /* Paste them all together with ", " between. */
- s = PyUnicode_FromString(", ");
- if (s == NULL)
goto Done;[](#l8.91)
- result = PyUnicode_Join(s, pieces);
- Py_DECREF(s);
--- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -240,13 +240,20 @@ static PyObject * tuplerepr(PyTupleObject *v) { Py_ssize_t i, n;
n = Py_SIZE(v); if (n == 0) return PyUnicode_FromString("()");
- if (sep == NULL) {
sep = PyUnicode_FromString(", ");[](#l9.18)
if (sep == NULL)[](#l9.19)
return NULL;[](#l9.20)
- }
+ /* While not mutable, it is still possible to end up with a cycle in a tuple through an object that stores itself within a tuple (and thus infinitely asks for the repr of itself). This should only be @@ -256,52 +263,42 @@ tuplerepr(PyTupleObject *v) return i > 0 ? PyUnicode_FromString("(...)") : NULL; }
- s = PyUnicode_FromString("(");
- if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;[](#l9.38)
- Py_CLEAR(s);
/* Do repr() on each element. */ for (i = 0; i < n; ++i) { if (Py_EnterRecursiveCall(" while getting the repr of a tuple"))
goto Done;[](#l9.44)
goto error;[](#l9.45) s = PyObject_Repr(v->ob_item[i]);[](#l9.46) Py_LeaveRecursiveCall();[](#l9.47)
if (s == NULL)[](#l9.48)
goto Done;[](#l9.49)
PyTuple_SET_ITEM(pieces, i, s);[](#l9.50)
if (i > 0 && _PyAccu_Accumulate(&acc, sep))[](#l9.51)
goto error;[](#l9.52)
if (s == NULL || _PyAccu_Accumulate(&acc, s))[](#l9.53)
goto error;[](#l9.54)
} -Py_CLEAR(s);[](#l9.55)
- /* Add "()" decorations to the first and last items. */
- assert(n > 0);
- s = PyUnicode_FromString("(");
- if (s == NULL)
goto Done;[](#l9.62)
- temp = PyTuple_GET_ITEM(pieces, 0);
- PyUnicode_AppendAndDel(&s, temp);
- PyTuple_SET_ITEM(pieces, 0, s);
- if (s == NULL)
goto Done;[](#l9.67)
- if (n > 1)
s = PyUnicode_FromString(")");[](#l9.69)
- else
s = PyUnicode_FromString(",)");[](#l9.71)
- if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;[](#l9.73)
- Py_CLEAR(s);
- s = PyUnicode_FromString(n == 1 ? ",)" : ")");
- if (s == NULL)
goto Done;[](#l9.78)
- temp = PyTuple_GET_ITEM(pieces, n-1);
- PyUnicode_AppendAndDel(&temp, s);
- PyTuple_SET_ITEM(pieces, n-1, temp);
- if (temp == NULL)
goto Done;[](#l9.83)
- /* Paste them all together with ", " between. */
- s = PyUnicode_FromString(", ");
- if (s == NULL)
goto Done;[](#l9.90)
- result = PyUnicode_Join(s, pieces);
- Py_DECREF(s);
} /* The addend 82520, was selected from the range(0, 1000000) for
--- a/PC/VC6/pythoncore.dsp +++ b/PC/VC6/pythoncore.dsp @@ -205,6 +205,10 @@ SOURCE=....\Objects\abstract.c
End Source File
Begin Source File
+SOURCE=....\Objects\accu.c +# End Source File +# Begin Source File + SOURCE=....\Parser\acceler.c
End Source File
Begin Source File
--- a/PC/VS7.1/pythoncore.vcproj +++ b/PC/VS7.1/pythoncore.vcproj @@ -445,6 +445,9 @@ RelativePath="....\Objects\abstract.c"> <File + RelativePath="....\Objects\accu.c"> + + <File RelativePath="....\Parser\acceler.c"> <File
--- a/PC/VS8.0/pythoncore.vcproj +++ b/PC/VS8.0/pythoncore.vcproj @@ -635,6 +635,10 @@ > <File + RelativePath="....\Include\accu.h" + > + + <File RelativePath="....\Include\asdl.h" > @@ -1447,6 +1451,10 @@ > <File + RelativePath="....\Objects\accu.c" + > + + <File RelativePath="....\Objects\boolobject.c" >
--- a/PCbuild/pythoncore.vcproj +++ b/PCbuild/pythoncore.vcproj @@ -635,6 +635,10 @@ > <File + RelativePath="..\Include\accu.h" + > + + <File RelativePath="..\Include\asdl.h" > @@ -1455,6 +1459,10 @@ > <File + RelativePath="..\Objects\accu.c" + > + + <File RelativePath="..\Objects\boolobject.c" >