cpython: 050772822bde (original) (raw)
Mercurial > cpython
changeset 73554:050772822bde 2.7
Issue #13333: The UTF-7 decoder now accepts lone surrogates (the encoder already accepts them). [#13333]
Antoine Pitrou solipsis@pitrou.net | |
---|---|
date | Tue, 15 Nov 2011 01:49:40 +0100 |
parents | 555871844962 |
children | e277fe8380e0 |
files | Lib/test/test_unicode.py Misc/NEWS Objects/unicodeobject.c |
diffstat | 3 files changed, 19 insertions(+), 12 deletions(-)[+] [-] Lib/test/test_unicode.py 14 Misc/NEWS 3 Objects/unicodeobject.c 14 |
line wrap: on
line diff
--- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -771,10 +771,18 @@ class UnicodeTest( for (x, y) in utfTests: self.assertEqual(x.encode('utf-7'), y)
# Unpaired surrogates not supported[](#l1.7)
self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')[](#l1.8)
# Unpaired surrogates are passed through[](#l1.9)
self.assertEqual(u'\uD801'.encode('utf-7'), '+2AE-')[](#l1.10)
self.assertEqual(u'\uD801x'.encode('utf-7'), '+2AE-x')[](#l1.11)
self.assertEqual(u'\uDC01'.encode('utf-7'), '+3AE-')[](#l1.12)
self.assertEqual(u'\uDC01x'.encode('utf-7'), '+3AE-x')[](#l1.13)
self.assertEqual('+2AE-'.decode('utf-7'), u'\uD801')[](#l1.14)
self.assertEqual('+2AE-x'.decode('utf-7'), u'\uD801x')[](#l1.15)
self.assertEqual('+3AE-'.decode('utf-7'), u'\uDC01')[](#l1.16)
self.assertEqual('+3AE-x'.decode('utf-7'), u'\uDC01x')[](#l1.17)
self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd\ufffd')[](#l1.19)
self.assertEqual(u'\uD801\U000abcde'.encode('utf-7'), '+2AHab9ze-')[](#l1.20)
self.assertEqual('+2AHab9ze-'.decode('utf-7'), u'\uD801\U000abcde')[](#l1.21)
# Direct encoded characters set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -9,6 +9,9 @@ What's New in Python 2.7.3? Core and Builtins ----------------- +- Issue #13333: The UTF-7 decoder now accepts lone surrogates (the encoder
--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1628,21 +1628,17 @@ PyObject *PyUnicode_DecodeUTF7Stateful(c *p++ = outCh; #endif surrogate = 0;
continue;[](#l3.7) }[](#l3.8) else {[](#l3.9)
*p++ = surrogate;[](#l3.10) surrogate = 0;[](#l3.11)
errmsg = "second surrogate missing";[](#l3.12)
goto utf7Error;[](#l3.13) }[](#l3.14) }[](#l3.15)
else if (outCh >= 0xD800 && outCh <= 0xDBFF) {[](#l3.16)
if (outCh >= 0xD800 && outCh <= 0xDBFF) {[](#l3.17) /* first surrogate */[](#l3.18) surrogate = outCh;[](#l3.19) }[](#l3.20)
else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {[](#l3.21)
errmsg = "unexpected second surrogate";[](#l3.22)
goto utf7Error;[](#l3.23)
}[](#l3.24) else {[](#l3.25) *p++ = outCh;[](#l3.26) }[](#l3.27)
@@ -1652,8 +1648,8 @@ PyObject *PyUnicode_DecodeUTF7Stateful(c inShift = 0; s++; if (surrogate) {
errmsg = "second surrogate missing at end of shift sequence";[](#l3.32)
goto utf7Error;[](#l3.33)
*p++ = surrogate;[](#l3.34)
surrogate = 0;[](#l3.35) }[](#l3.36) if (base64bits > 0) { /* left-over bits */[](#l3.37) if (base64bits >= 6) {[](#l3.38)