cpython: ddfcb0de564f (original) (raw)
Mercurial > cpython
changeset 73552:ddfcb0de564f 3.2
Issue #13333: The UTF-7 decoder now accepts lone surrogates (the encoder already accepts them). [#13333]
Antoine Pitrou solipsis@pitrou.net | |
---|---|
date | Tue, 15 Nov 2011 01:42:21 +0100 |
parents | 16ed15ff0d7c |
children | 250091e60f28 a00bb30cf775 |
files | Lib/test/test_unicode.py Misc/NEWS Objects/unicodeobject.c |
diffstat | 3 files changed, 19 insertions(+), 12 deletions(-)[+] [-] Lib/test/test_unicode.py 14 Misc/NEWS 3 Objects/unicodeobject.c 14 |
line wrap: on
line diff
--- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1091,10 +1091,18 @@ class UnicodeTest(string_tests.CommonTes for (x, y) in utfTests: self.assertEqual(x.encode('utf-7'), y)
# Unpaired surrogates not supported[](#l1.7)
self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7')[](#l1.8)
# Unpaired surrogates are passed through[](#l1.9)
self.assertEqual('\uD801'.encode('utf-7'), b'+2AE-')[](#l1.10)
self.assertEqual('\uD801x'.encode('utf-7'), b'+2AE-x')[](#l1.11)
self.assertEqual('\uDC01'.encode('utf-7'), b'+3AE-')[](#l1.12)
self.assertEqual('\uDC01x'.encode('utf-7'), b'+3AE-x')[](#l1.13)
self.assertEqual(b'+2AE-'.decode('utf-7'), '\uD801')[](#l1.14)
self.assertEqual(b'+2AE-x'.decode('utf-7'), '\uD801x')[](#l1.15)
self.assertEqual(b'+3AE-'.decode('utf-7'), '\uDC01')[](#l1.16)
self.assertEqual(b'+3AE-x'.decode('utf-7'), '\uDC01x')[](#l1.17)
self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd\ufffd')[](#l1.19)
self.assertEqual('\uD801\U000abcde'.encode('utf-7'), b'+2AHab9ze-')[](#l1.20)
self.assertEqual(b'+2AHab9ze-'.decode('utf-7'), '\uD801\U000abcde')[](#l1.21)
# Issue #2242: crash on some Windows/MSVC versions self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1')
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.2.3? Core and Builtins ----------------- +- Issue #13333: The UTF-7 decoder now accepts lone surrogates (the encoder
- Issue #13342: input() used to ignore sys.stdin's and sys.stdout's unicode error handler in interactive mode (when calling into PyOS_Readline()).
--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2282,21 +2282,17 @@ PyObject *PyUnicode_DecodeUTF7Stateful(c *p++ = outCh; #endif surrogate = 0;
continue;[](#l3.7) }[](#l3.8) else {[](#l3.9)
*p++ = surrogate;[](#l3.10) surrogate = 0;[](#l3.11)
errmsg = "second surrogate missing";[](#l3.12)
goto utf7Error;[](#l3.13) }[](#l3.14) }[](#l3.15)
else if (outCh >= 0xD800 && outCh <= 0xDBFF) {[](#l3.16)
if (outCh >= 0xD800 && outCh <= 0xDBFF) {[](#l3.17) /* first surrogate */[](#l3.18) surrogate = outCh;[](#l3.19) }[](#l3.20)
else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {[](#l3.21)
errmsg = "unexpected second surrogate";[](#l3.22)
goto utf7Error;[](#l3.23)
}[](#l3.24) else {[](#l3.25) *p++ = outCh;[](#l3.26) }[](#l3.27)
@@ -2306,8 +2302,8 @@ PyObject *PyUnicode_DecodeUTF7Stateful(c inShift = 0; s++; if (surrogate) {
errmsg = "second surrogate missing at end of shift sequence";[](#l3.32)
goto utf7Error;[](#l3.33)
*p++ = surrogate;[](#l3.34)
surrogate = 0;[](#l3.35) }[](#l3.36) if (base64bits > 0) { /* left-over bits */[](#l3.37) if (base64bits >= 6) {[](#l3.38)