[3.7] bpo-24214: Fixed the UTF-8 and UTF-16 incremental decoders. (GH… · python/cpython@c755ca8 (original) (raw)

`@@ -404,11 +404,19 @@ def test_lone_surrogates(self):

`

404

404

`def test_incremental_surrogatepass(self):

`

405

405

`# Test incremental decoder for surrogatepass handler:

`

406

406

`# see issue #24214

`

``

407

`+

High surrogate

`

407

408

`data = '\uD901'.encode(self.encoding, 'surrogatepass')

`

408

409

`for i in range(1, len(data)):

`

409

410

`dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')

`

410

411

`self.assertEqual(dec.decode(data[:i]), '')

`

411

412

`self.assertEqual(dec.decode(data[i:], True), '\uD901')

`

``

413

`+

Low surrogate

`

``

414

`+

data = '\uDC02'.encode(self.encoding, 'surrogatepass')

`

``

415

`+

for i in range(1, len(data)):

`

``

416

`+

dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')

`

``

417

`+

self.assertEqual(dec.decode(data[:i]), '')

`

``

418

`+

final = self.encoding == "cp65001"

`

``

419

`+

self.assertEqual(dec.decode(data[i:], final), '\uDC02')

`

412

420

``

413

421

``

414

422

`class UTF32Test(ReadTest, unittest.TestCase):

`

`@@ -849,6 +857,23 @@ def test_surrogatepass_handler(self):

`

849

857

`with self.assertRaises(UnicodeDecodeError):

`

850

858

`b"abc\xed\xa0z".decode(self.encoding, "surrogatepass")

`

851

859

``

``

860

`+

def test_incremental_errors(self):

`

``

861

`+

Test that the incremental decoder can fail with final=False.

`

``

862

`+

See issue #24214

`

``

863

`+

cases = [b'\x80', b'\xBF', b'\xC0', b'\xC1', b'\xF5', b'\xF6', b'\xFF']

`

``

864

`+

for prefix in (b'\xC2', b'\xDF', b'\xE0', b'\xE0\xA0', b'\xEF',

`

``

865

`+

b'\xEF\xBF', b'\xF0', b'\xF0\x90', b'\xF0\x90\x80',

`

``

866

`+

b'\xF4', b'\xF4\x8F', b'\xF4\x8F\xBF'):

`

``

867

`+

for suffix in b'\x7F', b'\xC0':

`

``

868

`+

cases.append(prefix + suffix)

`

``

869

`+

cases.extend((b'\xE0\x80', b'\xE0\x9F', b'\xED\xA0\x80',

`

``

870

`+

b'\xED\xBF\xBF', b'\xF0\x80', b'\xF0\x8F', b'\xF4\x90'))

`

``

871

+

``

872

`+

for data in cases:

`

``

873

`+

with self.subTest(data=data):

`

``

874

`+

dec = codecs.getincrementaldecoder(self.encoding)()

`

``

875

`+

self.assertRaises(UnicodeDecodeError, dec.decode, data)

`

``

876

+

852

877

``

853

878

`@unittest.skipUnless(sys.platform == 'win32',

`

854

879

`'cp65001 is a Windows-only codec')

`