bpo-36311: Fixes decoding multibyte characters around chunk boundarie… · python/cpython@f93c15a (original) (raw)

`@@ -3075,13 +3075,13 @@ def test_mbcs_alias(self):

`

3075

3075

`self.assertEqual(codec.name, 'mbcs')

`

3076

3076

``

3077

3077

`@support.bigmemtest(size=2**31, memuse=7, dry_run=False)

`

3078

``

`-

def test_large_input(self):

`

``

3078

`+

def test_large_input(self, size):

`

3079

3079

`# Test input longer than INT_MAX.

`

3080

3080

`# Input should contain undecodable bytes before and after

`

3081

3081

`# the INT_MAX limit.

`

3082

``

`-

encoded = (b'01234567' * (2**28-1) +

`

``

3082

`+

encoded = (b'01234567' * ((size//8)-1) +

`

3083

3083

`b'\x85\x86\xea\xeb\xec\xef\xfc\xfd\xfe\xff')

`

3084

``

`-

self.assertEqual(len(encoded), 2**31+2)

`

``

3084

`+

self.assertEqual(len(encoded), size+2)

`

3085

3085

`decoded = codecs.code_page_decode(932, encoded, 'surrogateescape', True)

`

3086

3086

`self.assertEqual(decoded[1], len(encoded))

`

3087

3087

`del encoded

`

`@@ -3092,6 +3092,20 @@ def test_large_input(self):

`

3092

3092

`'\udc85\udc86\udcea\udceb\udcec'

`

3093

3093

`'\udcef\udcfc\udcfd\udcfe\udcff')

`

3094

3094

``

``

3095

`+

@support.bigmemtest(size=2**31, memuse=6, dry_run=False)

`

``

3096

`+

def test_large_utf8_input(self, size):

`

``

3097

`+

Test input longer than INT_MAX.

`

``

3098

`+

Input should contain a decodable multi-byte character

`

``

3099

`+

surrounding INT_MAX

`

``

3100

`+

encoded = (b'0123456\xed\x84\x80' * (size//8))

`

``

3101

`+

self.assertEqual(len(encoded), size // 8 * 10)

`

``

3102

`+

decoded = codecs.code_page_decode(65001, encoded, 'ignore', True)

`

``

3103

`+

self.assertEqual(decoded[1], len(encoded))

`

``

3104

`+

del encoded

`

``

3105

`+

self.assertEqual(len(decoded[0]), size)

`

``

3106

`+

self.assertEqual(decoded[0][:10], '0123456\ud10001')

`

``

3107

`+

self.assertEqual(decoded[0][-11:], '56\ud1000123456\ud100')

`

``

3108

+

3095

3109

``

3096

3110

`class ASCIITest(unittest.TestCase):

`

3097

3111

`def test_encode(self):

`