cpython: 037253b7cd6d (original) (raw)

--- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -926,6 +926,32 @@ class CP65001Test(ReadTest, unittest.Tes class UTF7Test(ReadTest, unittest.TestCase): encoding = "utf-7"

+ def test_partial(self): self.check_partial( 'a+-b\x00c\x80d\u0100e\U00010000f', @@ -967,7 +993,9 @@ class UTF7Test(ReadTest, unittest.TestCa def test_errors(self): tests = [

@@ -983,6 +1011,8 @@ class UTF7Test(ReadTest, unittest.TestCa (b'a+//,+IKw-b', 'a\ufffd\u20acb'), (b'a+///,+IKw-b', 'a\uffff\ufffd\u20acb'), (b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'),

@@ -994,8 +1024,36 @@ class UTF7Test(ReadTest, unittest.TestCa self.assertEqual('\U000104A0'.encode(self.encoding), b'+2AHcoA-') self.assertEqual('\ud801\udca0'.encode(self.encoding), b'+2AHcoA-') self.assertEqual(b'+2AHcoA-'.decode(self.encoding), '\U000104A0') -

+

class UTF16ExTest(unittest.TestCase):

--- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1553,7 +1553,7 @@ class UnicodeTest(string_tests.CommonTes self.assertEqual(b'+2AHab9ze-'.decode('utf-7'), '\uD801\U000abcde') # Issue #2242: crash on some Windows/MSVC versions

# Direct encoded characters set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?" @@ -1995,6 +1995,7 @@ class UnicodeTest(string_tests.CommonTes self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii', 'strict') self.assertEqual(str(b'Andr\202 x', 'ascii', 'ignore'), "Andr x") self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x')

# Error handling (unknown character names) self.assertEqual(b"\N{foo}xx".decode("unicode-escape", "ignore"), "xx")

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,8 @@ Release date: XXXX-XX-XX Core and Builtins ----------------- +- Issue #24848: Fixed a number of bugs in UTF-7 decoding of misformed data. +

--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4360,31 +4360,31 @@ PyUnicode_DecodeUTF7Stateful(const char } else { /* now leaving a base-64 section */ inShift = 0;

@@ -4398,6 +4398,7 @@ PyUnicode_DecodeUTF7Stateful(const char } else { /* begin base64-encoded section */ inShift = 1;

@@ -4429,6 +4430,7 @@ utf7Error: if (inShift && !consumed) { /* in shift sequence, no more to follow / / if we're in an inconsistent state, that's an error */

@@ -13366,6 +13368,7 @@ int if (maxchar > writer->maxchar || writer->readonly) { /* resize + widen */