cpython: 620d23f7ad41 (original) (raw)
--- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -744,7 +744,7 @@ class CodecCallbackTest(unittest.TestCas raise ValueError self.assertRaises(UnicodeError, codecs.charmap_decode, b"\xff", "strict", {0xff: None}) self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict", D())
self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: sys.maxunicode+1})[](#l1.7)
self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: 0x110000})[](#l1.8)
def test_encodehelper(self): # enhance coverage of:
--- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1546,6 +1546,10 @@ class CharmapTest(unittest.TestCase): ("abc", 3) )
self.assertRaises(UnicodeDecodeError,[](#l2.7)
codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab"[](#l2.8)
)[](#l2.9)
+ self.assertEqual( codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"), ("ab\ufffd", 3) @@ -1572,6 +1576,107 @@ class CharmapTest(unittest.TestCase): ("", len(allbytes)) )
- def test_decode_with_int2str_map(self):
self.assertEqual([](#l2.19)
codecs.charmap_decode(b"\x00\x01\x02", "strict",[](#l2.20)
{0: 'a', 1: 'b', 2: 'c'}),[](#l2.21)
("abc", 3)[](#l2.22)
)[](#l2.23)
self.assertEqual([](#l2.25)
codecs.charmap_decode(b"\x00\x01\x02", "strict",[](#l2.26)
{0: 'Aa', 1: 'Bb', 2: 'Cc'}),[](#l2.27)
("AaBbCc", 3)[](#l2.28)
)[](#l2.29)
self.assertEqual([](#l2.31)
codecs.charmap_decode(b"\x00\x01\x02", "strict",[](#l2.32)
{0: '\U0010FFFF', 1: 'b', 2: 'c'}),[](#l2.33)
("\U0010FFFFbc", 3)[](#l2.34)
)[](#l2.35)
self.assertEqual([](#l2.37)
codecs.charmap_decode(b"\x00\x01\x02", "strict",[](#l2.38)
{0: 'a', 1: 'b', 2: ''}),[](#l2.39)
("ab", 3)[](#l2.40)
)[](#l2.41)
self.assertRaises(UnicodeDecodeError,[](#l2.43)
codecs.charmap_decode, b"\x00\x01\x02", "strict",[](#l2.44)
{0: 'a', 1: 'b'}[](#l2.45)
)[](#l2.46)
self.assertEqual([](#l2.48)
codecs.charmap_decode(b"\x00\x01\x02", "replace",[](#l2.49)
{0: 'a', 1: 'b'}),[](#l2.50)
("ab\ufffd", 3)[](#l2.51)
)[](#l2.52)
self.assertEqual([](#l2.54)
codecs.charmap_decode(b"\x00\x01\x02", "replace",[](#l2.55)
{0: 'a', 1: 'b', 2: None}),[](#l2.56)
("ab\ufffd", 3)[](#l2.57)
)[](#l2.58)
self.assertEqual([](#l2.60)
codecs.charmap_decode(b"\x00\x01\x02", "ignore",[](#l2.61)
{0: 'a', 1: 'b'}),[](#l2.62)
("ab", 3)[](#l2.63)
)[](#l2.64)
self.assertEqual([](#l2.66)
codecs.charmap_decode(b"\x00\x01\x02", "ignore",[](#l2.67)
{0: 'a', 1: 'b', 2: None}),[](#l2.68)
("ab", 3)[](#l2.69)
)[](#l2.70)
allbytes = bytes(range(256))[](#l2.72)
self.assertEqual([](#l2.73)
codecs.charmap_decode(allbytes, "ignore", {}),[](#l2.74)
("", len(allbytes))[](#l2.75)
)[](#l2.76)
- def test_decode_with_int2int_map(self):
a = ord('a')[](#l2.79)
b = ord('b')[](#l2.80)
c = ord('c')[](#l2.81)
self.assertEqual([](#l2.83)
codecs.charmap_decode(b"\x00\x01\x02", "strict",[](#l2.84)
{0: a, 1: b, 2: c}),[](#l2.85)
("abc", 3)[](#l2.86)
)[](#l2.87)
# Issue #15379[](#l2.89)
self.assertEqual([](#l2.90)
codecs.charmap_decode(b"\x00\x01\x02", "strict",[](#l2.91)
{0: 0x10FFFF, 1: b, 2: c}),[](#l2.92)
("\U0010FFFFbc", 3)[](#l2.93)
)[](#l2.94)
self.assertRaises(TypeError,[](#l2.96)
codecs.charmap_decode, b"\x00\x01\x02", "strict",[](#l2.97)
{0: 0x110000, 1: b, 2: c}[](#l2.98)
)[](#l2.99)
self.assertRaises(UnicodeDecodeError,[](#l2.101)
codecs.charmap_decode, b"\x00\x01\x02", "strict",[](#l2.102)
{0: a, 1: b},[](#l2.103)
)[](#l2.104)
self.assertEqual([](#l2.106)
codecs.charmap_decode(b"\x00\x01\x02", "replace",[](#l2.107)
{0: a, 1: b}),[](#l2.108)
("ab\ufffd", 3)[](#l2.109)
)[](#l2.110)
self.assertEqual([](#l2.112)
codecs.charmap_decode(b"\x00\x01\x02", "ignore",[](#l2.113)
{0: a, 1: b}),[](#l2.114)
("ab", 3)[](#l2.115)
)[](#l2.116)
+ + class WithStmtTest(unittest.TestCase): def test_encodedfile(self): f = io.BytesIO(b"\xc3\xbc")
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.2.4 Core and Builtins ----------------- +- Issue #15379: Fix passing of non-BMP characters as integers for the charmap
- Issue #13992: The trashcan mechanism is now thread-safe. This eliminates sporadic crashes in multi-thread programs when several long deallocator chains ran concurrently and involved subclasses of built-in container
--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5250,12 +5250,36 @@ PyObject PyUnicode_DecodeCharmap(const / Apply mapping */ if (PyLong_Check(x)) { long value = PyLong_AS_LONG(x);
if (value < 0 || value > 65535) {[](#l4.7)
if (value < 0 || value > 0x10FFFF) {[](#l4.8) PyErr_SetString(PyExc_TypeError,[](#l4.9)
"character mapping must be in range(65536)");[](#l4.10)
"character mapping must be in range(0x110000)");[](#l4.11) Py_DECREF(x);[](#l4.12) goto onError;[](#l4.13) }[](#l4.14)
if (value > 0xFFFF) {[](#l4.17)
/* see the code for 1-n mapping below */[](#l4.18)
if (extrachars < 2) {[](#l4.19)
/* resize first */[](#l4.20)
Py_ssize_t oldpos = p - PyUnicode_AS_UNICODE(v);[](#l4.21)
Py_ssize_t needed = 10 - extrachars;[](#l4.22)
extrachars += needed;[](#l4.23)
/* XXX overflow detection missing */[](#l4.24)
if (_PyUnicode_Resize(&v,[](#l4.25)
PyUnicode_GET_SIZE(v) + needed) < 0) {[](#l4.26)
Py_DECREF(x);[](#l4.27)
goto onError;[](#l4.28)
}[](#l4.29)
p = PyUnicode_AS_UNICODE(v) + oldpos;[](#l4.30)
}[](#l4.31)
value -= 0x10000;[](#l4.32)
*p++ = 0xD800 | (value >> 10);[](#l4.33)
*p++ = 0xDC00 | (value & 0x3FF);[](#l4.34)
extrachars -= 2;[](#l4.35)
}[](#l4.36)
else[](#l4.37)
+#endif *p++ = (Py_UNICODE)value; } else if (x == Py_None) {