cpython: 5e98a50e0f55 (original) (raw)

--- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -2807,6 +2807,9 @@ class CodePageTest(unittest.TestCase): ('[\u20ac]', 'replace', b'[?]'), ('[\xff]', 'backslashreplace', b'[\xff]'), ('[\xff]', 'xmlcharrefreplace', b'[ÿ]'),

       ('\udcff', 'strict', None),[](#l1.7)

       ('[\udcff]', 'surrogateescape', b'[\xff]'),[](#l1.8)

       ('[\udcff]', 'surrogatepass', None),[](#l1.9)
   ))[](#l1.10)
   self.check_decode(932, ([](#l1.11)
       (b'abc', 'strict', 'abc'),[](#l1.12)

@@ -2816,6 +2819,7 @@ class CodePageTest(unittest.TestCase): (b'[\xff]', 'ignore', '[]'), (b'[\xff]', 'replace', '[\ufffd]'), (b'[\xff]', 'surrogateescape', '[\udcff]'),

       (b'[\xff]', 'surrogatepass', None),[](#l1.17)
       (b'\x81\x00abc', 'strict', None),[](#l1.18)
       (b'\x81\x00abc', 'ignore', '\x00abc'),[](#l1.19)
       (b'\x81\x00abc', 'replace', '\ufffd\x00abc'),[](#l1.20)

@@ -2826,14 +2830,23 @@ class CodePageTest(unittest.TestCase): ('abc', 'strict', b'abc'), ('\xe9\u20ac', 'strict', b'\xe9\x80'), ('\xff', 'strict', b'\xff'),

       # test error handlers[](#l1.25)
       ('\u0141', 'strict', None),[](#l1.26)
       ('\u0141', 'ignore', b''),[](#l1.27)
       ('\u0141', 'replace', b'L'),[](#l1.28)

       ('\udc98', 'surrogateescape', b'\x98'),[](#l1.29)

       ('\udc98', 'surrogatepass', None),[](#l1.30)
   ))[](#l1.31)
   self.check_decode(1252, ([](#l1.32)
       (b'abc', 'strict', 'abc'),[](#l1.33)
       (b'\xe9\x80', 'strict', '\xe9\u20ac'),[](#l1.34)
       (b'\xff', 'strict', '\xff'),[](#l1.35)

```
       # invalid bytes[](#l1.36)
```

       (b'[\x98]', 'strict', None),[](#l1.37)

       (b'[\x98]', 'ignore', '[]'),[](#l1.38)

       (b'[\x98]', 'replace', '[\ufffd]'),[](#l1.39)

       (b'[\x98]', 'surrogateescape', '[\udc98]'),[](#l1.40)

       (b'[\x98]', 'surrogatepass', None),[](#l1.41)
   ))[](#l1.42)

def test_cp_utf7(self):

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -84,6 +84,9 @@ Core and Builtins Library ------- +- Issue #13916: Disallowed the surrogatepass error handler for non UTF-*

encodings. +

Issue #20998: Fixed re.fullmatch() of repeated single character pattern with ignore case. Original patch by Matthew Barnett.

--- a/Python/codecs.c +++ b/Python/codecs.c @@ -901,6 +901,7 @@ PyObject *PyCodec_BackslashReplaceErrors } } +#define ENC_UNKNOWN -1 #define ENC_UTF8 0 #define ENC_UTF16BE 1 #define ENC_UTF16LE 2 @@ -916,7 +917,11 @@ get_standard_encoding(const char *encodi encoding += 3; if (*encoding == '-' || *encoding == '_' ) encoding++;

   if (encoding[0] == '1' && encoding[1] == '6') {[](#l3.15)

   if (encoding[0] == '8' && encoding[1] == '\0') {[](#l3.16)

```
       *bytelength = 3;[](#l3.17)
```
```
       return ENC_UTF8;[](#l3.18)
```
```
   }[](#l3.19)
```

   else if (encoding[0] == '1' && encoding[1] == '6') {[](#l3.20)
       encoding += 2;[](#l3.21)
       *bytelength = 2;[](#l3.22)
       if (*encoding == '\0') {[](#l3.23)

@@ -955,9 +960,7 @@ get_standard_encoding(const char *encodi } } }

/* utf-8 */
*bytelength = 3;
return ENC_UTF8;

return ENC_UNKNOWN;

} /* This handler is declared static until someone demonstrates @@ -994,6 +997,12 @@ PyCodec_SurrogatePassErrors(PyObject *ex } code = get_standard_encoding(encoding, &bytelength); Py_DECREF(encode);

```
   if (code == ENC_UNKNOWN) {[](#l3.39)
```

       /* Not supported, fail with original exception */[](#l3.40)

       PyErr_SetObject(PyExceptionInstance_Class(exc), exc);[](#l3.41)

```
       Py_DECREF(object);[](#l3.42)
```
```
       return NULL;[](#l3.43)
```
```
   }[](#l3.44)
```

res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start)); if (!res) { @@ -1068,6 +1077,12 @@ PyCodec_SurrogatePassErrors(PyObject *ex } code = get_standard_encoding(encoding, &bytelength); Py_DECREF(encode);

```
   if (code == ENC_UNKNOWN) {[](#l3.52)
```

       /* Not supported, fail with original exception */[](#l3.53)

       PyErr_SetObject(PyExceptionInstance_Class(exc), exc);[](#l3.54)

```
       Py_DECREF(object);[](#l3.55)
```
```
       return NULL;[](#l3.56)
```
```
   }[](#l3.57)
```

/* Try decoding a single surrogate character. If there are more, let the codec call us again. */