bpo-24214: Fixed the UTF-8 and UTF-16 incremental decoders. (GH-14304) · python/cpython@d32594a (original) (raw)

`@@ -429,11 +429,18 @@ def test_lone_surrogates(self):

`

429

429

`def test_incremental_surrogatepass(self):

`

430

430

`# Test incremental decoder for surrogatepass handler:

`

431

431

`# see issue #24214

`

``

432

`+

High surrogate

`

432

433

`data = '\uD901'.encode(self.encoding, 'surrogatepass')

`

433

434

`for i in range(1, len(data)):

`

434

435

`dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')

`

435

436

`self.assertEqual(dec.decode(data[:i]), '')

`

436

437

`self.assertEqual(dec.decode(data[i:], True), '\uD901')

`

``

438

`+

Low surrogate

`

``

439

`+

data = '\uDC02'.encode(self.encoding, 'surrogatepass')

`

``

440

`+

for i in range(1, len(data)):

`

``

441

`+

dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')

`

``

442

`+

self.assertEqual(dec.decode(data[:i]), '')

`

``

443

`+

self.assertEqual(dec.decode(data[i:]), '\uDC02')

`

437

444

``

438

445

``

439

446

`class UTF32Test(ReadTest, unittest.TestCase):

`

`@@ -874,6 +881,23 @@ def test_surrogatepass_handler(self):

`

874

881

`with self.assertRaises(UnicodeDecodeError):

`

875

882

`b"abc\xed\xa0z".decode(self.encoding, "surrogatepass")

`

876

883

``

``

884

`+

def test_incremental_errors(self):

`

``

885

`+

Test that the incremental decoder can fail with final=False.

`

``

886

`+

See issue #24214

`

``

887

`+

cases = [b'\x80', b'\xBF', b'\xC0', b'\xC1', b'\xF5', b'\xF6', b'\xFF']

`

``

888

`+

for prefix in (b'\xC2', b'\xDF', b'\xE0', b'\xE0\xA0', b'\xEF',

`

``

889

`+

b'\xEF\xBF', b'\xF0', b'\xF0\x90', b'\xF0\x90\x80',

`

``

890

`+

b'\xF4', b'\xF4\x8F', b'\xF4\x8F\xBF'):

`

``

891

`+

for suffix in b'\x7F', b'\xC0':

`

``

892

`+

cases.append(prefix + suffix)

`

``

893

`+

cases.extend((b'\xE0\x80', b'\xE0\x9F', b'\xED\xA0\x80',

`

``

894

`+

b'\xED\xBF\xBF', b'\xF0\x80', b'\xF0\x8F', b'\xF4\x90'))

`

``

895

+

``

896

`+

for data in cases:

`

``

897

`+

with self.subTest(data=data):

`

``

898

`+

dec = codecs.getincrementaldecoder(self.encoding)()

`

``

899

`+

self.assertRaises(UnicodeDecodeError, dec.decode, data)

`

``

900

+

877

901

``

878

902

`class UTF7Test(ReadTest, unittest.TestCase):

`

879

903

`encoding = "utf-7"

`