(original) (raw)

changeset: 82059:c10a3ddba483 parent: 82057:b8a6bc70fc08 parent: 82058:3b316ea5aa82 user: Victor Stinner victor.stinner@gmail.com date: Thu Feb 07 23:17:34 2013 +0100 files: Lib/test/test_unicode.py Misc/NEWS Objects/unicodeobject.c description: (Merge 3.3) Issue #17137: When an Unicode string is resized, the internal wide character string (wstr) format is now cleared. diff -r b8a6bc70fc08 -r c10a3ddba483 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Thu Feb 07 17:05:32 2013 +0200 +++ b/Lib/test/test_unicode.py Thu Feb 07 23:17:34 2013 +0100 @@ -2191,6 +2191,21 @@ self.assertEqual(args[0], text) self.assertEqual(len(args), 1) + def test_resize(self): + for length in range(1, 100, 7): + # generate a fresh string (refcount=1) + text = 'a' * length + 'b' + + # fill wstr internal field + abc = text.encode('unicode_internal') + self.assertEqual(abc.decode('unicode_internal'), text) + + # resize text: wstr field must be cleared and then recomputed + text += 'c' + abcdef = text.encode('unicode_internal') + self.assertNotEqual(abc, abcdef) + self.assertEqual(abcdef.decode('unicode_internal'), text) + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff -r b8a6bc70fc08 -r c10a3ddba483 Misc/NEWS --- a/Misc/NEWS Thu Feb 07 17:05:32 2013 +0200 +++ b/Misc/NEWS Thu Feb 07 23:17:34 2013 +0100 @@ -10,6 +10,9 @@ Core and Builtins ----------------- +- Issue #17137: When an Unicode string is resized, the internal wide character + string (wstr) format is now cleared. + - Issue #17043: The unicode-internal decoder no longer read past the end of input buffer. diff -r b8a6bc70fc08 -r c10a3ddba483 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Thu Feb 07 17:05:32 2013 +0200 +++ b/Objects/unicodeobject.c Thu Feb 07 23:17:34 2013 +0100 @@ -717,6 +717,10 @@ if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = length; } + else if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) { + PyObject_DEL(_PyUnicode_WSTR(unicode)); + _PyUnicode_WSTR(unicode) = NULL; + } #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif /victor.stinner@gmail.com