bpo-24214: Fixed the UTF-8 and UTF-16 incremental decoders. (GH-14304) · python/cpython@d32594a (original) (raw)
`@@ -429,11 +429,18 @@ def test_lone_surrogates(self):
`
429
429
`def test_incremental_surrogatepass(self):
`
430
430
`# Test incremental decoder for surrogatepass handler:
`
431
431
`# see issue #24214
`
``
432
`+
High surrogate
`
432
433
`data = '\uD901'.encode(self.encoding, 'surrogatepass')
`
433
434
`for i in range(1, len(data)):
`
434
435
`dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')
`
435
436
`self.assertEqual(dec.decode(data[:i]), '')
`
436
437
`self.assertEqual(dec.decode(data[i:], True), '\uD901')
`
``
438
`+
Low surrogate
`
``
439
`+
data = '\uDC02'.encode(self.encoding, 'surrogatepass')
`
``
440
`+
for i in range(1, len(data)):
`
``
441
`+
dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')
`
``
442
`+
self.assertEqual(dec.decode(data[:i]), '')
`
``
443
`+
self.assertEqual(dec.decode(data[i:]), '\uDC02')
`
437
444
``
438
445
``
439
446
`class UTF32Test(ReadTest, unittest.TestCase):
`
`@@ -874,6 +881,23 @@ def test_surrogatepass_handler(self):
`
874
881
`with self.assertRaises(UnicodeDecodeError):
`
875
882
`b"abc\xed\xa0z".decode(self.encoding, "surrogatepass")
`
876
883
``
``
884
`+
def test_incremental_errors(self):
`
``
885
`+
Test that the incremental decoder can fail with final=False.
`
``
886
`+
See issue #24214
`
``
887
`+
cases = [b'\x80', b'\xBF', b'\xC0', b'\xC1', b'\xF5', b'\xF6', b'\xFF']
`
``
888
`+
for prefix in (b'\xC2', b'\xDF', b'\xE0', b'\xE0\xA0', b'\xEF',
`
``
889
`+
b'\xEF\xBF', b'\xF0', b'\xF0\x90', b'\xF0\x90\x80',
`
``
890
`+
b'\xF4', b'\xF4\x8F', b'\xF4\x8F\xBF'):
`
``
891
`+
for suffix in b'\x7F', b'\xC0':
`
``
892
`+
cases.append(prefix + suffix)
`
``
893
`+
cases.extend((b'\xE0\x80', b'\xE0\x9F', b'\xED\xA0\x80',
`
``
894
`+
b'\xED\xBF\xBF', b'\xF0\x80', b'\xF0\x8F', b'\xF4\x90'))
`
``
895
+
``
896
`+
for data in cases:
`
``
897
`+
with self.subTest(data=data):
`
``
898
`+
dec = codecs.getincrementaldecoder(self.encoding)()
`
``
899
`+
self.assertRaises(UnicodeDecodeError, dec.decode, data)
`
``
900
+
877
901
``
878
902
`class UTF7Test(ReadTest, unittest.TestCase):
`
879
903
`encoding = "utf-7"
`