[3.6] bpo-32583: Fix possible crashing in builtin Unicode decoders (G… · python/cpython@ea94fce (original) (raw)

`@@ -1044,6 +1044,58 @@ def mutating(exc):

`

1044

1044

`for (encoding, data) in baddata:

`

1045

1045

`self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")

`

1046

1046

``

``

1047

`+

issue32583

`

``

1048

`+

def test_crashing_decode_handler(self):

`

``

1049

`+

better generating one more character to fill the extra space slot

`

``

1050

`+

so in debug build it can steadily fail

`

``

1051

`+

def forward_shorter_than_end(exc):

`

``

1052

`+

if isinstance(exc, UnicodeDecodeError):

`

``

1053

`+

size one character, 0 < forward < exc.end

`

``

1054

`+

return ('\ufffd', exc.start+1)

`

``

1055

`+

else:

`

``

1056

`+

raise TypeError("don't know how to handle %r" % exc)

`

``

1057

`+

codecs.register_error(

`

``

1058

`+

"test.forward_shorter_than_end", forward_shorter_than_end)

`

``

1059

+

``

1060

`+

self.assertEqual(

`

``

1061

`+

b'\xd8\xd8\xd8\xd8\xd8\x00\x00\x00'.decode(

`

``

1062

`+

'utf-16-le', 'test.forward_shorter_than_end'),

`

``

1063

`+

'\ufffd\ufffd\ufffd\ufffd\xd8\x00'

`

``

1064

`+

)

`

``

1065

`+

self.assertEqual(

`

``

1066

`+

b'\xd8\xd8\xd8\xd8\x00\xd8\x00\x00'.decode(

`

``

1067

`+

'utf-16-be', 'test.forward_shorter_than_end'),

`

``

1068

`+

'\ufffd\ufffd\ufffd\ufffd\xd8\x00'

`

``

1069

`+

)

`

``

1070

`+

self.assertEqual(

`

``

1071

`+

b'\x11\x11\x11\x11\x11\x00\x00\x00\x00\x00\x00'.decode(

`

``

1072

`+

'utf-32-le', 'test.forward_shorter_than_end'),

`

``

1073

`+

'\ufffd\ufffd\ufffd\u1111\x00'

`

``

1074

`+

)

`

``

1075

`+

self.assertEqual(

`

``

1076

`+

b'\x11\x11\x11\x00\x00\x11\x11\x00\x00\x00\x00'.decode(

`

``

1077

`+

'utf-32-be', 'test.forward_shorter_than_end'),

`

``

1078

`+

'\ufffd\ufffd\ufffd\u1111\x00'

`

``

1079

`+

)

`

``

1080

+

``

1081

`+

def replace_with_long(exc):

`

``

1082

`+

if isinstance(exc, UnicodeDecodeError):

`

``

1083

`+

exc.object = b"\x00" * 8

`

``

1084

`+

return ('\ufffd', exc.start)

`

``

1085

`+

else:

`

``

1086

`+

raise TypeError("don't know how to handle %r" % exc)

`

``

1087

`+

codecs.register_error("test.replace_with_long", replace_with_long)

`

``

1088

+

``

1089

`+

self.assertEqual(

`

``

1090

`+

b'\x00'.decode('utf-16', 'test.replace_with_long'),

`

``

1091

`+

'\ufffd\x00\x00\x00\x00'

`

``

1092

`+

)

`

``

1093

`+

self.assertEqual(

`

``

1094

`+

b'\x00'.decode('utf-32', 'test.replace_with_long'),

`

``

1095

`+

'\ufffd\x00\x00'

`

``

1096

`+

)

`

``

1097

+

``

1098

+

1047

1099

`def test_fake_error_class(self):

`

1048

1100

`handlers = [

`

1049

1101

`codecs.strict_errors,

`