bpo-36311: Fixes decoding multibyte characters around chunk boundarie… · python/cpython@f93c15a (original) (raw)
`@@ -3075,13 +3075,13 @@ def test_mbcs_alias(self):
`
3075
3075
`self.assertEqual(codec.name, 'mbcs')
`
3076
3076
``
3077
3077
`@support.bigmemtest(size=2**31, memuse=7, dry_run=False)
`
3078
``
`-
def test_large_input(self):
`
``
3078
`+
def test_large_input(self, size):
`
3079
3079
`# Test input longer than INT_MAX.
`
3080
3080
`# Input should contain undecodable bytes before and after
`
3081
3081
`# the INT_MAX limit.
`
3082
``
`-
encoded = (b'01234567' * (2**28-1) +
`
``
3082
`+
encoded = (b'01234567' * ((size//8)-1) +
`
3083
3083
`b'\x85\x86\xea\xeb\xec\xef\xfc\xfd\xfe\xff')
`
3084
``
`-
self.assertEqual(len(encoded), 2**31+2)
`
``
3084
`+
self.assertEqual(len(encoded), size+2)
`
3085
3085
`decoded = codecs.code_page_decode(932, encoded, 'surrogateescape', True)
`
3086
3086
`self.assertEqual(decoded[1], len(encoded))
`
3087
3087
`del encoded
`
`@@ -3092,6 +3092,20 @@ def test_large_input(self):
`
3092
3092
`'\udc85\udc86\udcea\udceb\udcec'
`
3093
3093
`'\udcef\udcfc\udcfd\udcfe\udcff')
`
3094
3094
``
``
3095
`+
@support.bigmemtest(size=2**31, memuse=6, dry_run=False)
`
``
3096
`+
def test_large_utf8_input(self, size):
`
``
3097
`+
Test input longer than INT_MAX.
`
``
3098
`+
Input should contain a decodable multi-byte character
`
``
3099
`+
surrounding INT_MAX
`
``
3100
`+
encoded = (b'0123456\xed\x84\x80' * (size//8))
`
``
3101
`+
self.assertEqual(len(encoded), size // 8 * 10)
`
``
3102
`+
decoded = codecs.code_page_decode(65001, encoded, 'ignore', True)
`
``
3103
`+
self.assertEqual(decoded[1], len(encoded))
`
``
3104
`+
del encoded
`
``
3105
`+
self.assertEqual(len(decoded[0]), size)
`
``
3106
`+
self.assertEqual(decoded[0][:10], '0123456\ud10001')
`
``
3107
`+
self.assertEqual(decoded[0][-11:], '56\ud1000123456\ud100')
`
``
3108
+
3095
3109
``
3096
3110
`class ASCIITest(unittest.TestCase):
`
3097
3111
`def test_encode(self):
`