bpo-36778: cp65001 encoding becomes an alias to utf_8 (GH-13230) · python/cpython@d267ac2 (original) (raw)

`@@ -875,95 +875,6 @@ def test_surrogatepass_handler(self):

`

875

875

`b"abc\xed\xa0z".decode(self.encoding, "surrogatepass")

`

876

876

``

877

877

``

878

``

`-

@unittest.skipUnless(sys.platform == 'win32',

`

879

``

`-

'cp65001 is a Windows-only codec')

`

880

``

`-

class CP65001Test(ReadTest, unittest.TestCase):

`

881

``

`-

encoding = "cp65001"

`

882

``

-

883

``

`-

def test_encode(self):

`

884

``

`-

tests = [

`

885

``

`-

('abc', 'strict', b'abc'),

`

886

``

`-

('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'),

`

887

``

`-

('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),

`

888

``

`-

('\udc80', 'strict', None),

`

889

``

`-

('\udc80', 'ignore', b''),

`

890

``

`-

('\udc80', 'replace', b'?'),

`

891

``

`-

('\udc80', 'backslashreplace', b'\udc80'),

`

892

``

`-

('\udc80', 'namereplace', b'\udc80'),

`

893

``

`-

('\udc80', 'surrogatepass', b'\xed\xb2\x80'),

`

894

``

`-

]

`

895

``

`-

for text, errors, expected in tests:

`

896

``

`-

if expected is not None:

`

897

``

`-

try:

`

898

``

`-

encoded = text.encode('cp65001', errors)

`

899

``

`-

except UnicodeEncodeError as err:

`

900

``

`-

self.fail('Unable to encode %a to cp65001 with '

`

901

``

`-

'errors=%r: %s' % (text, errors, err))

`

902

``

`-

self.assertEqual(encoded, expected,

`

903

``

`-

'%a.encode("cp65001", %r)=%a != %a'

`

904

``

`-

% (text, errors, encoded, expected))

`

905

``

`-

else:

`

906

``

`-

self.assertRaises(UnicodeEncodeError,

`

907

``

`-

text.encode, "cp65001", errors)

`

908

``

-

909

``

`-

def test_decode(self):

`

910

``

`-

tests = [

`

911

``

`-

(b'abc', 'strict', 'abc'),

`

912

``

`-

(b'\xc3\xa9\xe2\x82\xac', 'strict', '\xe9\u20ac'),

`

913

``

`-

(b'\xf4\x8f\xbf\xbf', 'strict', '\U0010ffff'),

`

914

``

`-

(b'\xef\xbf\xbd', 'strict', '\ufffd'),

`

915

``

`-

(b'[\xc3\xa9]', 'strict', '[\xe9]'),

`

916

``

`-

invalid bytes

`

917

``

`-

(b'[\xff]', 'strict', None),

`

918

``

`-

(b'[\xff]', 'ignore', '[]'),

`

919

``

`-

(b'[\xff]', 'replace', '[\ufffd]'),

`

920

``

`-

(b'[\xff]', 'surrogateescape', '[\udcff]'),

`

921

``

`-

(b'[\xed\xb2\x80]', 'strict', None),

`

922

``

`-

(b'[\xed\xb2\x80]', 'ignore', '[]'),

`

923

``

`-

(b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),

`

924

``

`-

]

`

925

``

`-

for raw, errors, expected in tests:

`

926

``

`-

if expected is not None:

`

927

``

`-

try:

`

928

``

`-

decoded = raw.decode('cp65001', errors)

`

929

``

`-

except UnicodeDecodeError as err:

`

930

``

`-

self.fail('Unable to decode %a from cp65001 with '

`

931

``

`-

'errors=%r: %s' % (raw, errors, err))

`

932

``

`-

self.assertEqual(decoded, expected,

`

933

``

`-

'%a.decode("cp65001", %r)=%a != %a'

`

934

``

`-

% (raw, errors, decoded, expected))

`

935

``

`-

else:

`

936

``

`-

self.assertRaises(UnicodeDecodeError,

`

937

``

`-

raw.decode, 'cp65001', errors)

`

938

``

-

939

``

`-

def test_lone_surrogates(self):

`

940

``

`-

self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001")

`

941

``

`-

self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001")

`

942

``

`-

self.assertEqual("[\uDC80]".encode("cp65001", "backslashreplace"),

`

943

``

`-

b'[\udc80]')

`

944

``

`-

self.assertEqual("[\uDC80]".encode("cp65001", "namereplace"),

`

945

``

`-

b'[\udc80]')

`

946

``

`-

self.assertEqual("[\uDC80]".encode("cp65001", "xmlcharrefreplace"),

`

947

``

`-

b'[�]')

`

948

``

`-

self.assertEqual("[\uDC80]".encode("cp65001", "surrogateescape"),

`

949

``

`-

b'[\x80]')

`

950

``

`-

self.assertEqual("[\uDC80]".encode("cp65001", "ignore"),

`

951

``

`-

b'[]')

`

952

``

`-

self.assertEqual("[\uDC80]".encode("cp65001", "replace"),

`

953

``

`-

b'[?]')

`

954

``

-

955

``

`-

def test_surrogatepass_handler(self):

`

956

``

`-

self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"),

`

957

``

`-

b"abc\xed\xa0\x80def")

`

958

``

`-

self.assertEqual(b"abc\xed\xa0\x80def".decode("cp65001", "surrogatepass"),

`

959

``

`-

"abc\ud800def")

`

960

``

`-

self.assertEqual("\U00010fff\uD800".encode("cp65001", "surrogatepass"),

`

961

``

`-

b"\xf0\x90\xbf\xbf\xed\xa0\x80")

`

962

``

`-

self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("cp65001", "surrogatepass"),

`

963

``

`-

"\U00010fff\uD800")

`

964

``

`-

self.assertTrue(codecs.lookup_error("surrogatepass"))

`

965

``

-

966

``

-

967

878

`class UTF7Test(ReadTest, unittest.TestCase):

`

968

879

`encoding = "utf-7"

`

969

880

``