bpo-36778: cp65001 encoding becomes an alias to utf_8 (GH-13230) · python/cpython@d267ac2 (original) (raw)
`@@ -875,95 +875,6 @@ def test_surrogatepass_handler(self):
`
875
875
`b"abc\xed\xa0z".decode(self.encoding, "surrogatepass")
`
876
876
``
877
877
``
878
``
`-
@unittest.skipUnless(sys.platform == 'win32',
`
879
``
`-
'cp65001 is a Windows-only codec')
`
880
``
`-
class CP65001Test(ReadTest, unittest.TestCase):
`
881
``
`-
encoding = "cp65001"
`
882
``
-
883
``
`-
def test_encode(self):
`
884
``
`-
tests = [
`
885
``
`-
('abc', 'strict', b'abc'),
`
886
``
`-
('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'),
`
887
``
`-
('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),
`
888
``
`-
('\udc80', 'strict', None),
`
889
``
`-
('\udc80', 'ignore', b''),
`
890
``
`-
('\udc80', 'replace', b'?'),
`
891
``
`-
('\udc80', 'backslashreplace', b'\udc80'),
`
892
``
`-
('\udc80', 'namereplace', b'\udc80'),
`
893
``
`-
('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
`
894
``
`-
]
`
895
``
`-
for text, errors, expected in tests:
`
896
``
`-
if expected is not None:
`
897
``
`-
try:
`
898
``
`-
encoded = text.encode('cp65001', errors)
`
899
``
`-
except UnicodeEncodeError as err:
`
900
``
`-
self.fail('Unable to encode %a to cp65001 with '
`
901
``
`-
'errors=%r: %s' % (text, errors, err))
`
902
``
`-
self.assertEqual(encoded, expected,
`
903
``
`-
'%a.encode("cp65001", %r)=%a != %a'
`
904
``
`-
% (text, errors, encoded, expected))
`
905
``
`-
else:
`
906
``
`-
self.assertRaises(UnicodeEncodeError,
`
907
``
`-
text.encode, "cp65001", errors)
`
908
``
-
909
``
`-
def test_decode(self):
`
910
``
`-
tests = [
`
911
``
`-
(b'abc', 'strict', 'abc'),
`
912
``
`-
(b'\xc3\xa9\xe2\x82\xac', 'strict', '\xe9\u20ac'),
`
913
``
`-
(b'\xf4\x8f\xbf\xbf', 'strict', '\U0010ffff'),
`
914
``
`-
(b'\xef\xbf\xbd', 'strict', '\ufffd'),
`
915
``
`-
(b'[\xc3\xa9]', 'strict', '[\xe9]'),
`
916
``
`-
invalid bytes
`
917
``
`-
(b'[\xff]', 'strict', None),
`
918
``
`-
(b'[\xff]', 'ignore', '[]'),
`
919
``
`-
(b'[\xff]', 'replace', '[\ufffd]'),
`
920
``
`-
(b'[\xff]', 'surrogateescape', '[\udcff]'),
`
921
``
`-
(b'[\xed\xb2\x80]', 'strict', None),
`
922
``
`-
(b'[\xed\xb2\x80]', 'ignore', '[]'),
`
923
``
`-
(b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
`
924
``
`-
]
`
925
``
`-
for raw, errors, expected in tests:
`
926
``
`-
if expected is not None:
`
927
``
`-
try:
`
928
``
`-
decoded = raw.decode('cp65001', errors)
`
929
``
`-
except UnicodeDecodeError as err:
`
930
``
`-
self.fail('Unable to decode %a from cp65001 with '
`
931
``
`-
'errors=%r: %s' % (raw, errors, err))
`
932
``
`-
self.assertEqual(decoded, expected,
`
933
``
`-
'%a.decode("cp65001", %r)=%a != %a'
`
934
``
`-
% (raw, errors, decoded, expected))
`
935
``
`-
else:
`
936
``
`-
self.assertRaises(UnicodeDecodeError,
`
937
``
`-
raw.decode, 'cp65001', errors)
`
938
``
-
939
``
`-
def test_lone_surrogates(self):
`
940
``
`-
self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001")
`
941
``
`-
self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001")
`
942
``
`-
self.assertEqual("[\uDC80]".encode("cp65001", "backslashreplace"),
`
943
``
`-
b'[\udc80]')
`
944
``
`-
self.assertEqual("[\uDC80]".encode("cp65001", "namereplace"),
`
945
``
`-
b'[\udc80]')
`
946
``
`-
self.assertEqual("[\uDC80]".encode("cp65001", "xmlcharrefreplace"),
`
947
``
`-
b'[�]')
`
948
``
`-
self.assertEqual("[\uDC80]".encode("cp65001", "surrogateescape"),
`
949
``
`-
b'[\x80]')
`
950
``
`-
self.assertEqual("[\uDC80]".encode("cp65001", "ignore"),
`
951
``
`-
b'[]')
`
952
``
`-
self.assertEqual("[\uDC80]".encode("cp65001", "replace"),
`
953
``
`-
b'[?]')
`
954
``
-
955
``
`-
def test_surrogatepass_handler(self):
`
956
``
`-
self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"),
`
957
``
`-
b"abc\xed\xa0\x80def")
`
958
``
`-
self.assertEqual(b"abc\xed\xa0\x80def".decode("cp65001", "surrogatepass"),
`
959
``
`-
"abc\ud800def")
`
960
``
`-
self.assertEqual("\U00010fff\uD800".encode("cp65001", "surrogatepass"),
`
961
``
`-
b"\xf0\x90\xbf\xbf\xed\xa0\x80")
`
962
``
`-
self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("cp65001", "surrogatepass"),
`
963
``
`-
"\U00010fff\uD800")
`
964
``
`-
self.assertTrue(codecs.lookup_error("surrogatepass"))
`
965
``
-
966
``
-
967
878
`class UTF7Test(ReadTest, unittest.TestCase):
`
968
879
`encoding = "utf-7"
`
969
880
``