(original) (raw)

changeset: 82189:385d982ce641 branch: 2.7 parent: 82185:a43f67e95ef0 user: Serhiy Storchaka storchaka@gmail.com date: Wed Feb 13 12:07:43 2013 +0200 files: Lib/test/test_marshal.py Lib/test/test_support.py Misc/NEWS Python/marshal.c description: Issue #5308: Raise ValueError when marshalling too large object (a sequence with size >= 2**31), instead of producing illegal marshal data. diff -r a43f67e95ef0 -r 385d982ce641 Lib/test/test_marshal.py --- a/Lib/test/test_marshal.py Wed Feb 13 00:59:11 2013 +0200 +++ b/Lib/test/test_marshal.py Wed Feb 13 12:07:43 2013 +0200 @@ -269,6 +269,53 @@ invalid_string = 'l\x02\x00\x00\x00\x00\x00\x00\x00' self.assertRaises(ValueError, marshal.loads, invalid_string) +LARGE_SIZE = 2**31 +character_size = 4 if sys.maxunicode > 0xFFFF else 2 +pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4 + +@unittest.skipIf(LARGE_SIZE > sys.maxsize, "test cannot run on 32-bit systems") +class LargeValuesTestCase(unittest.TestCase): + def check_unmarshallable(self, data): + f = open(test_support.TESTFN, 'wb') + self.addCleanup(test_support.unlink, test_support.TESTFN) + with f: + self.assertRaises(ValueError, marshal.dump, data, f) + + @test_support.precisionbigmemtest(size=LARGE_SIZE, memuse=1, dry_run=False) + def test_bytes(self, size): + self.check_unmarshallable(b'x' * size) + + @test_support.precisionbigmemtest(size=LARGE_SIZE, + memuse=character_size, dry_run=False) + def test_str(self, size): + self.check_unmarshallable('x' * size) + + @test_support.precisionbigmemtest(size=LARGE_SIZE, + memuse=pointer_size, dry_run=False) + def test_tuple(self, size): + self.check_unmarshallable((None,) * size) + + @test_support.precisionbigmemtest(size=LARGE_SIZE, + memuse=pointer_size, dry_run=False) + def test_list(self, size): + self.check_unmarshallable([None] * size) + + @test_support.precisionbigmemtest(size=LARGE_SIZE, + memuse=pointer_size*12 + sys.getsizeof(LARGE_SIZE-1), + dry_run=False) + def test_set(self, size): + self.check_unmarshallable(set(range(size))) + + @test_support.precisionbigmemtest(size=LARGE_SIZE, + memuse=pointer_size*12 + sys.getsizeof(LARGE_SIZE-1), + dry_run=False) + def test_frozenset(self, size): + self.check_unmarshallable(frozenset(range(size))) + + @test_support.precisionbigmemtest(size=LARGE_SIZE, memuse=1, dry_run=False) + def test_bytearray(self, size): + self.check_unmarshallable(bytearray(size)) + def test_main(): test_support.run_unittest(IntTestCase, @@ -277,7 +324,9 @@ CodeTestCase, ContainerTestCase, ExceptionTestCase, - BugsTestCase) + BugsTestCase, + LargeValuesTestCase, + ) if __name__ == "__main__": test_main() diff -r a43f67e95ef0 -r 385d982ce641 Lib/test/test_support.py --- a/Lib/test/test_support.py Wed Feb 13 00:59:11 2013 +0200 +++ b/Lib/test/test_support.py Wed Feb 13 12:07:43 2013 +0200 @@ -1062,7 +1062,7 @@ return wrapper return decorator -def precisionbigmemtest(size, memuse, overhead=5*_1M): +def precisionbigmemtest(size, memuse, overhead=5*_1M, dry_run=True): def decorator(f): def wrapper(self): if not real_max_memuse: @@ -1070,11 +1070,12 @@ else: maxsize = size - if real_max_memuse and real_max_memuse < maxsize * memuse: - if verbose: - sys.stderr.write("Skipping %s because of memory " - "constraint\n" % (f.__name__,)) - return + if ((real_max_memuse or not dry_run) + and real_max_memuse < maxsize * memuse): + if verbose: + sys.stderr.write("Skipping %s because of memory " + "constraint\n" % (f.__name__,)) + return return f(self, maxsize) wrapper.size = size diff -r a43f67e95ef0 -r 385d982ce641 Misc/NEWS --- a/Misc/NEWS Wed Feb 13 00:59:11 2013 +0200 +++ b/Misc/NEWS Wed Feb 13 12:07:43 2013 +0200 @@ -9,6 +9,9 @@ Core and Builtins ----------------- +- Issue #5308: Raise ValueError when marshalling too large object (a sequence + with size >= 2**31), instead of producing illegal marshal data. + - Issue #17043: The unicode-internal decoder no longer read past the end of input buffer. diff -r a43f67e95ef0 -r 385d982ce641 Python/marshal.c --- a/Python/marshal.c Wed Feb 13 00:59:11 2013 +0200 +++ b/Python/marshal.c Wed Feb 13 12:07:43 2013 +0200 @@ -88,7 +88,7 @@ } static void -w_string(char *s, int n, WFILE *p) +w_string(char *s, Py_ssize_t n, WFILE *p) { if (p->fp != NULL) { fwrite(s, 1, n, p->fp); @@ -126,6 +126,21 @@ } #endif +#define SIZE32_MAX 0x7FFFFFFF + +#if SIZEOF_SIZE_T > 4 +# define W_SIZE(n, p) do { \ + if ((n) > SIZE32_MAX) { \ + (p)->depth--; \ + (p)->error = WFERR_UNMARSHALLABLE; \ + return; \ + } \ + w_long((long)(n), p); \ + } while(0) +#else +# define W_SIZE w_long +#endif + /* We assume that Python longs are stored internally in base some power of 2**15; for the sake of portability we'll always read and write them in base exactly 2**15. */ @@ -159,6 +174,11 @@ d >>= PyLong_MARSHAL_SHIFT; l++; } while (d != 0); + if (l > SIZE32_MAX) { + p->depth--; + p->error = WFERR_UNMARSHALLABLE; + return; + } w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p); for (i=0; i < n-1; i++) { @@ -244,7 +264,7 @@ n = strlen(buf); w_byte(TYPE_FLOAT, p); w_byte((int)n, p); - w_string(buf, (int)n, p); + w_string(buf, n, p); PyMem_Free(buf); } } @@ -277,7 +297,7 @@ } n = strlen(buf); w_byte((int)n, p); - w_string(buf, (int)n, p); + w_string(buf, n, p); PyMem_Free(buf); buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v), 'g', 17, 0, NULL); @@ -287,7 +307,7 @@ } n = strlen(buf); w_byte((int)n, p); - w_string(buf, (int)n, p); + w_string(buf, n, p); PyMem_Free(buf); } } @@ -319,14 +339,8 @@ w_byte(TYPE_STRING, p); } n = PyString_GET_SIZE(v); - if (n > INT_MAX) { - /* huge strings are not supported */ - p->depth--; - p->error = WFERR_UNMARSHALLABLE; - return; - } - w_long((long)n, p); - w_string(PyString_AS_STRING(v), (int)n, p); + W_SIZE(n, p); + w_string(PyString_AS_STRING(v), n, p); } #ifdef Py_USING_UNICODE else if (PyUnicode_CheckExact(v)) { @@ -339,20 +353,15 @@ } w_byte(TYPE_UNICODE, p); n = PyString_GET_SIZE(utf8); - if (n > INT_MAX) { - p->depth--; - p->error = WFERR_UNMARSHALLABLE; - return; - } - w_long((long)n, p); - w_string(PyString_AS_STRING(utf8), (int)n, p); + W_SIZE(n, p); + w_string(PyString_AS_STRING(utf8), n, p); Py_DECREF(utf8); } #endif else if (PyTuple_CheckExact(v)) { w_byte(TYPE_TUPLE, p); n = PyTuple_Size(v); - w_long((long)n, p); + W_SIZE(n, p); for (i = 0; i < n; i++) { w_object(PyTuple_GET_ITEM(v, i), p); } @@ -360,7 +369,7 @@ else if (PyList_CheckExact(v)) { w_byte(TYPE_LIST, p); n = PyList_GET_SIZE(v); - w_long((long)n, p); + W_SIZE(n, p); for (i = 0; i < n; i++) { w_object(PyList_GET_ITEM(v, i), p); } @@ -390,7 +399,7 @@ p->error = WFERR_UNMARSHALLABLE; return; } - w_long((long)n, p); + W_SIZE(n, p); it = PyObject_GetIter(v); if (it == NULL) { p->depth--; @@ -432,13 +441,8 @@ PyBufferProcs *pb = v->ob_type->tp_as_buffer; w_byte(TYPE_STRING, p); n = (*pb->bf_getreadbuffer)(v, 0, (void **)&s); - if (n > INT_MAX) { - p->depth--; - p->error = WFERR_UNMARSHALLABLE; - return; - } - w_long((long)n, p); - w_string(s, (int)n, p); + W_SIZE(n, p); + w_string(s, n, p); } else { w_byte(TYPE_UNKNOWN, p); @@ -480,14 +484,14 @@ #define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p)) -static int -r_string(char *s, int n, RFILE *p) +static Py_ssize_t +r_string(char *s, Py_ssize_t n, RFILE *p) { if (p->fp != NULL) /* The result fits into int because it must be <=n. */ - return (int)fread(s, 1, n, p->fp); + return fread(s, 1, n, p->fp); if (p->end - p->ptr < n) - n = (int)(p->end - p->ptr); + n = p->end - p->ptr; memcpy(s, p->ptr, n); p->ptr += n; return n; @@ -563,14 +567,14 @@ r_PyLong(RFILE *p) { PyLongObject *ob; - int size, i, j, md, shorts_in_top_digit; - long n; + long n, size, i; + int j, md, shorts_in_top_digit; digit d; n = r_long(p); if (n == 0) return (PyObject *)_PyLong_New(0); - if (n < -INT_MAX || n > INT_MAX) { + if (n < -SIZE32_MAX || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (long size out of range)"); return NULL; @@ -691,7 +695,7 @@ char buf[256]; double dx; n = r_byte(p); - if (n == EOF || r_string(buf, (int)n, p) != n) { + if (n == EOF || r_string(buf, n, p) != n) { PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; @@ -732,7 +736,7 @@ char buf[256]; Py_complex c; n = r_byte(p); - if (n == EOF || r_string(buf, (int)n, p) != n) { + if (n == EOF || r_string(buf, n, p) != n) { PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; @@ -745,7 +749,7 @@ break; } n = r_byte(p); - if (n == EOF || r_string(buf, (int)n, p) != n) { + if (n == EOF || r_string(buf, n, p) != n) { PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; @@ -795,7 +799,7 @@ case TYPE_INTERNED: case TYPE_STRING: n = r_long(p); - if (n < 0 || n > INT_MAX) { + if (n < 0 || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); retval = NULL; break; @@ -805,7 +809,7 @@ retval = NULL; break; } - if (r_string(PyString_AS_STRING(v), (int)n, p) != n) { + if (r_string(PyString_AS_STRING(v), n, p) != n) { Py_DECREF(v); PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); @@ -840,7 +844,7 @@ char *buffer; n = r_long(p); - if (n < 0 || n > INT_MAX) { + if (n < 0 || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); retval = NULL; break; @@ -850,7 +854,7 @@ retval = PyErr_NoMemory(); break; } - if (r_string(buffer, (int)n, p) != n) { + if (r_string(buffer, n, p) != n) { PyMem_DEL(buffer); PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); @@ -866,12 +870,12 @@ case TYPE_TUPLE: n = r_long(p); - if (n < 0 || n > INT_MAX) { + if (n < 0 || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); retval = NULL; break; } - v = PyTuple_New((int)n); + v = PyTuple_New(n); if (v == NULL) { retval = NULL; break; @@ -886,19 +890,19 @@ v = NULL; break; } - PyTuple_SET_ITEM(v, (int)i, v2); + PyTuple_SET_ITEM(v, i, v2); } retval = v; break; case TYPE_LIST: n = r_long(p); - if (n < 0 || n > INT_MAX) { + if (n < 0 || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); retval = NULL; break; } - v = PyList_New((int)n); + v = PyList_New(n); if (v == NULL) { retval = NULL; break; @@ -913,7 +917,7 @@ v = NULL; break; } - PyList_SET_ITEM(v, (int)i, v2); + PyList_SET_ITEM(v, i, v2); } retval = v; break; @@ -945,7 +949,7 @@ case TYPE_SET: case TYPE_FROZENSET: n = r_long(p); - if (n < 0 || n > INT_MAX) { + if (n < 0 || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); retval = NULL; break; @@ -1134,12 +1138,8 @@ if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) { char* pBuf = (char *)PyMem_MALLOC(filesize); if (pBuf != NULL) { - PyObject* v; - size_t n; - /* filesize must fit into an int, because it - is smaller than REASONABLE_FILE_LIMIT */ - n = fread(pBuf, 1, (int)filesize, fp); - v = PyMarshal_ReadObjectFromString(pBuf, n); + size_t n = fread(pBuf, 1, (size_t)filesize, fp); + PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n); PyMem_FREE(pBuf); return v; } /storchaka@gmail.com