cpython: 5e98a50e0f55 (original) (raw)
--- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -2807,6 +2807,9 @@ class CodePageTest(unittest.TestCase): ('[\u20ac]', 'replace', b'[?]'), ('[\xff]', 'backslashreplace', b'[\xff]'), ('[\xff]', 'xmlcharrefreplace', b'[ΓΏ]'),
('\udcff', 'strict', None),[](#l1.7)
('[\udcff]', 'surrogateescape', b'[\xff]'),[](#l1.8)
('[\udcff]', 'surrogatepass', None),[](#l1.9) ))[](#l1.10) self.check_decode(932, ([](#l1.11) (b'abc', 'strict', 'abc'),[](#l1.12)
@@ -2816,6 +2819,7 @@ class CodePageTest(unittest.TestCase): (b'[\xff]', 'ignore', '[]'), (b'[\xff]', 'replace', '[\ufffd]'), (b'[\xff]', 'surrogateescape', '[\udcff]'),
(b'[\xff]', 'surrogatepass', None),[](#l1.17) (b'\x81\x00abc', 'strict', None),[](#l1.18) (b'\x81\x00abc', 'ignore', '\x00abc'),[](#l1.19) (b'\x81\x00abc', 'replace', '\ufffd\x00abc'),[](#l1.20)
@@ -2826,14 +2830,23 @@ class CodePageTest(unittest.TestCase): ('abc', 'strict', b'abc'), ('\xe9\u20ac', 'strict', b'\xe9\x80'), ('\xff', 'strict', b'\xff'),
# test error handlers[](#l1.25) ('\u0141', 'strict', None),[](#l1.26) ('\u0141', 'ignore', b''),[](#l1.27) ('\u0141', 'replace', b'L'),[](#l1.28)
('\udc98', 'surrogateescape', b'\x98'),[](#l1.29)
('\udc98', 'surrogatepass', None),[](#l1.30) ))[](#l1.31) self.check_decode(1252, ([](#l1.32) (b'abc', 'strict', 'abc'),[](#l1.33) (b'\xe9\x80', 'strict', '\xe9\u20ac'),[](#l1.34) (b'\xff', 'strict', '\xff'),[](#l1.35)
# invalid bytes[](#l1.36)
(b'[\x98]', 'strict', None),[](#l1.37)
(b'[\x98]', 'ignore', '[]'),[](#l1.38)
(b'[\x98]', 'replace', '[\ufffd]'),[](#l1.39)
(b'[\x98]', 'surrogateescape', '[\udc98]'),[](#l1.40)
(b'[\x98]', 'surrogatepass', None),[](#l1.41) ))[](#l1.42)
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -84,6 +84,9 @@ Core and Builtins Library ------- +- Issue #13916: Disallowed the surrogatepass error handler for non UTF-*
- Issue #20998: Fixed re.fullmatch() of repeated single character pattern with ignore case. Original patch by Matthew Barnett.
--- a/Python/codecs.c +++ b/Python/codecs.c @@ -901,6 +901,7 @@ PyObject *PyCodec_BackslashReplaceErrors } } +#define ENC_UNKNOWN -1 #define ENC_UTF8 0 #define ENC_UTF16BE 1 #define ENC_UTF16LE 2 @@ -916,7 +917,11 @@ get_standard_encoding(const char *encodi encoding += 3; if (*encoding == '-' || *encoding == '_' ) encoding++;
if (encoding[0] == '1' && encoding[1] == '6') {[](#l3.15)
if (encoding[0] == '8' && encoding[1] == '\0') {[](#l3.16)
*bytelength = 3;[](#l3.17)
return ENC_UTF8;[](#l3.18)
}[](#l3.19)
else if (encoding[0] == '1' && encoding[1] == '6') {[](#l3.20) encoding += 2;[](#l3.21) *bytelength = 2;[](#l3.22) if (*encoding == '\0') {[](#l3.23)
@@ -955,9 +960,7 @@ get_standard_encoding(const char *encodi } } }
} /* This handler is declared static until someone demonstrates @@ -994,6 +997,12 @@ PyCodec_SurrogatePassErrors(PyObject *ex } code = get_standard_encoding(encoding, &bytelength); Py_DECREF(encode);
if (code == ENC_UNKNOWN) {[](#l3.39)
/* Not supported, fail with original exception */[](#l3.40)
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);[](#l3.41)
Py_DECREF(object);[](#l3.42)
return NULL;[](#l3.43)
}[](#l3.44)
res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start)); if (!res) { @@ -1068,6 +1077,12 @@ PyCodec_SurrogatePassErrors(PyObject *ex } code = get_standard_encoding(encoding, &bytelength); Py_DECREF(encode);
if (code == ENC_UNKNOWN) {[](#l3.52)
/* Not supported, fail with original exception */[](#l3.53)
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);[](#l3.54)
Py_DECREF(object);[](#l3.55)
return NULL;[](#l3.56)
}[](#l3.57)
/* Try decoding a single surrogate character. If there are more, let the codec call us again. */