[3.7] bpo-24214: Fixed the UTF-8 and UTF-16 incremental decoders. (GH… · python/cpython@c755ca8 (original) (raw)
`@@ -404,11 +404,19 @@ def test_lone_surrogates(self):
`
404
404
`def test_incremental_surrogatepass(self):
`
405
405
`# Test incremental decoder for surrogatepass handler:
`
406
406
`# see issue #24214
`
``
407
`+
High surrogate
`
407
408
`data = '\uD901'.encode(self.encoding, 'surrogatepass')
`
408
409
`for i in range(1, len(data)):
`
409
410
`dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')
`
410
411
`self.assertEqual(dec.decode(data[:i]), '')
`
411
412
`self.assertEqual(dec.decode(data[i:], True), '\uD901')
`
``
413
`+
Low surrogate
`
``
414
`+
data = '\uDC02'.encode(self.encoding, 'surrogatepass')
`
``
415
`+
for i in range(1, len(data)):
`
``
416
`+
dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')
`
``
417
`+
self.assertEqual(dec.decode(data[:i]), '')
`
``
418
`+
final = self.encoding == "cp65001"
`
``
419
`+
self.assertEqual(dec.decode(data[i:], final), '\uDC02')
`
412
420
``
413
421
``
414
422
`class UTF32Test(ReadTest, unittest.TestCase):
`
`@@ -849,6 +857,23 @@ def test_surrogatepass_handler(self):
`
849
857
`with self.assertRaises(UnicodeDecodeError):
`
850
858
`b"abc\xed\xa0z".decode(self.encoding, "surrogatepass")
`
851
859
``
``
860
`+
def test_incremental_errors(self):
`
``
861
`+
Test that the incremental decoder can fail with final=False.
`
``
862
`+
See issue #24214
`
``
863
`+
cases = [b'\x80', b'\xBF', b'\xC0', b'\xC1', b'\xF5', b'\xF6', b'\xFF']
`
``
864
`+
for prefix in (b'\xC2', b'\xDF', b'\xE0', b'\xE0\xA0', b'\xEF',
`
``
865
`+
b'\xEF\xBF', b'\xF0', b'\xF0\x90', b'\xF0\x90\x80',
`
``
866
`+
b'\xF4', b'\xF4\x8F', b'\xF4\x8F\xBF'):
`
``
867
`+
for suffix in b'\x7F', b'\xC0':
`
``
868
`+
cases.append(prefix + suffix)
`
``
869
`+
cases.extend((b'\xE0\x80', b'\xE0\x9F', b'\xED\xA0\x80',
`
``
870
`+
b'\xED\xBF\xBF', b'\xF0\x80', b'\xF0\x8F', b'\xF4\x90'))
`
``
871
+
``
872
`+
for data in cases:
`
``
873
`+
with self.subTest(data=data):
`
``
874
`+
dec = codecs.getincrementaldecoder(self.encoding)()
`
``
875
`+
self.assertRaises(UnicodeDecodeError, dec.decode, data)
`
``
876
+
852
877
``
853
878
`@unittest.skipUnless(sys.platform == 'win32',
`
854
879
`'cp65001 is a Windows-only codec')
`