cpython: 6ac4f1609847 (original) (raw)
--- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1737,6 +1737,10 @@ class CharmapTest(unittest.TestCase): codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab" )
self.assertRaises(UnicodeDecodeError,[](#l1.7)
codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab\ufffe"[](#l1.8)
)[](#l1.9)
+ self.assertEqual( codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"), ("ab\ufffd", 3) @@ -1793,6 +1797,17 @@ class CharmapTest(unittest.TestCase): {0: 'a', 1: 'b'} )
self.assertRaises(UnicodeDecodeError,[](#l1.18)
codecs.charmap_decode, b"\x00\x01\x02", "strict",[](#l1.19)
{0: 'a', 1: 'b', 2: None}[](#l1.20)
)[](#l1.21)
# Issue #14850[](#l1.23)
self.assertRaises(UnicodeDecodeError,[](#l1.24)
codecs.charmap_decode, b"\x00\x01\x02", "strict",[](#l1.25)
{0: 'a', 1: 'b', 2: '\ufffe'}[](#l1.26)
)[](#l1.27)
+ self.assertEqual( codecs.charmap_decode(b"\x00\x01\x02", "replace", {0: 'a', 1: 'b'}), @@ -1805,6 +1820,13 @@ class CharmapTest(unittest.TestCase): ("ab\ufffd", 3) )
# Issue #14850[](#l1.36)
self.assertEqual([](#l1.37)
codecs.charmap_decode(b"\x00\x01\x02", "replace",[](#l1.38)
{0: 'a', 1: 'b', 2: '\ufffe'}),[](#l1.39)
("ab\ufffd", 3)[](#l1.40)
)[](#l1.41)
+ self.assertEqual( codecs.charmap_decode(b"\x00\x01\x02", "ignore", {0: 'a', 1: 'b'}), @@ -1817,6 +1839,13 @@ class CharmapTest(unittest.TestCase): ("ab", 3) )
# Issue #14850[](#l1.50)
self.assertEqual([](#l1.51)
codecs.charmap_decode(b"\x00\x01\x02", "ignore",[](#l1.52)
{0: 'a', 1: 'b', 2: '\ufffe'}),[](#l1.53)
("ab", 3)[](#l1.54)
)[](#l1.55)
+ allbytes = bytes(range(256)) self.assertEqual( codecs.charmap_decode(allbytes, "ignore", {}), @@ -1857,6 +1886,11 @@ class CharmapTest(unittest.TestCase): {0: a, 1: b}, )
self.assertRaises(UnicodeDecodeError,[](#l1.64)
codecs.charmap_decode, b"\x00\x01\x02", "strict",[](#l1.65)
{0: a, 1: b, 2: 0xFFFE},[](#l1.66)
)[](#l1.67)
+ self.assertEqual( codecs.charmap_decode(b"\x00\x01\x02", "replace", {0: a, 1: b}), @@ -1864,11 +1898,23 @@ class CharmapTest(unittest.TestCase): ) self.assertEqual(
codecs.charmap_decode(b"\x00\x01\x02", "replace",[](#l1.76)
{0: a, 1: b, 2: 0xFFFE}),[](#l1.77)
("ab\ufffd", 3)[](#l1.78)
)[](#l1.79)
self.assertEqual([](#l1.81) codecs.charmap_decode(b"\x00\x01\x02", "ignore",[](#l1.82) {0: a, 1: b}),[](#l1.83) ("ab", 3)[](#l1.84) )[](#l1.85)
self.assertEqual([](#l1.87)
codecs.charmap_decode(b"\x00\x01\x02", "ignore",[](#l1.88)
{0: a, 1: b, 2: 0xFFFE}),[](#l1.89)
("ab", 3)[](#l1.90)
)[](#l1.91)
+ class WithStmtTest(unittest.TestCase): def test_encodedfile(self):
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 3.3.1? Core and Builtins ----------------- +- Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping"
- Issue #16730: importlib.machinery.FileFinder now no longers raises an exception when trying to populate its cache and it finds out the directory is unreadable or has turned into a file. Reported and diagnosed by
--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7511,15 +7511,18 @@ Error: if (PyErr_ExceptionMatches(PyExc_LookupError)) { /* No mapping found means: mapping is undefined. */ PyErr_Clear();
x = Py_None;[](#l3.7)
Py_INCREF(x);[](#l3.8)
goto Undefined;[](#l3.9) } else[](#l3.10) goto onError;[](#l3.11) }[](#l3.12)
if (x == Py_None)[](#l3.15)
goto Undefined;[](#l3.16) if (PyLong_Check(x)) {[](#l3.17) long value = PyLong_AS_LONG(x);[](#l3.18)
if (value == 0xFFFE)[](#l3.19)
goto Undefined;[](#l3.20) if (value < 0 || value > MAX_UNICODE) {[](#l3.21) PyErr_Format(PyExc_TypeError,[](#l3.22) "character mapping must be in range(0x%lx)",[](#l3.23)
@@ -7530,21 +7533,6 @@ Error: if (unicode_putchar(&v, &outpos, value) < 0) goto onError; }
else if (x == Py_None) {[](#l3.28)
/* undefined mapping */[](#l3.29)
startinpos = s-starts;[](#l3.30)
endinpos = startinpos+1;[](#l3.31)
if (unicode_decode_call_errorhandler([](#l3.32)
errors, &errorHandler,[](#l3.33)
"charmap", "character maps to <undefined>",[](#l3.34)
&starts, &e, &startinpos, &endinpos, &exc, &s,[](#l3.35)
&v, &outpos)) {[](#l3.36)
Py_DECREF(x);[](#l3.37)
goto onError;[](#l3.38)
}[](#l3.39)
Py_DECREF(x);[](#l3.40)
continue;[](#l3.41)
}[](#l3.42) else if (PyUnicode_Check(x)) {[](#l3.43) Py_ssize_t targetsize;[](#l3.44)
@@ -7554,8 +7542,10 @@ Error: if (targetsize == 1) { /* 1-1 mapping */
if (unicode_putchar(&v, &outpos,[](#l3.50)
PyUnicode_READ_CHAR(x, 0)) < 0)[](#l3.51)
Py_UCS4 value = PyUnicode_READ_CHAR(x, 0);[](#l3.52)
if (value == 0xFFFE)[](#l3.53)
goto Undefined;[](#l3.54)
if (unicode_putchar(&v, &outpos, value) < 0)[](#l3.55) goto onError;[](#l3.56) }[](#l3.57) else if (targetsize > 1) {[](#l3.58)
@@ -7590,6 +7580,19 @@ Error: } Py_DECREF(x); ++s;
continue;[](#l3.63)
/* undefined mapping */[](#l3.65)
Py_XDECREF(x);[](#l3.66)
startinpos = s-starts;[](#l3.67)
endinpos = startinpos+1;[](#l3.68)
if (unicode_decode_call_errorhandler([](#l3.69)
errors, &errorHandler,[](#l3.70)
"charmap", "character maps to <undefined>",[](#l3.71)
&starts, &e, &startinpos, &endinpos, &exc, &s,[](#l3.72)
&v, &outpos)) {[](#l3.73)
goto onError;[](#l3.74)
} if (unicode_resize(&v, outpos) < 0)}[](#l3.75) }[](#l3.76)