(original) (raw)

changeset: 89836:bc06f67234d0 user: Victor Stinner victor.stinner@gmail.com date: Tue Mar 18 01🔞21 2014 +0100 files: Doc/whatsnew/3.5.rst Lib/test/test_sys.py Misc/NEWS Python/pythonrun.c description: Issue #19977: When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale), :py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the ``surrogateescape`` error handler, instead of the ``strict`` error handler. diff -r daa6bf71170f -r bc06f67234d0 Doc/whatsnew/3.5.rst --- a/Doc/whatsnew/3.5.rst Tue Mar 18 00:53:32 2014 +0100 +++ b/Doc/whatsnew/3.5.rst Tue Mar 18 01🔞21 2014 +0100 @@ -79,7 +79,10 @@ Implementation improvements: -* None yet. +* When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale), + :py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the + ``surrogateescape`` error handler, instead of the ``strict`` error handler + (:issue:`19977`). Significantly Improved Library Modules: diff -r daa6bf71170f -r bc06f67234d0 Lib/test/test_sys.py --- a/Lib/test/test_sys.py Tue Mar 18 00:53:32 2014 +0100 +++ b/Lib/test/test_sys.py Tue Mar 18 01🔞21 2014 +0100 @@ -615,6 +615,50 @@ expected = None self.check_fsencoding(fs_encoding, expected) + @unittest.skipIf(sys.platform == 'win32', + 'test specific to UNIX') + def test_c_locale_surrogateescape(self): + # Force the POSIX locale + env = os.environ.copy() + env["LC_ALL"] = "C" + code = '\n'.join(( + 'import codecs, sys', + 'def dump(name):', + ' std = getattr(sys, name)', + ' encoding = codecs.lookup(std.encoding).name', + ' print("%s: %s:%s" % (name, encoding, std.errors))', + 'dump("stdin")', + 'dump("stdout")', + 'dump("stderr")', + )) + p = subprocess.Popen([sys.executable, "-I", "-c", code], + stdout=subprocess.PIPE, env=env) + out = p.communicate()[0] + self.assertEqual(out, + b'stdin: ascii:surrogateescape\n' + b'stdout: ascii:surrogateescape\n' + b'stderr: ascii:backslashreplace\n') + + # replace the default error handler + env['PYTHONIOENCODING'] = ':strict' + p = subprocess.Popen([sys.executable, "-c", code], + stdout=subprocess.PIPE, env=env) + out = p.communicate()[0] + self.assertEqual(out, + b'stdin: ascii:strict\n' + b'stdout: ascii:strict\n' + b'stderr: ascii:backslashreplace\n') + + # force the encoding + env['PYTHONIOENCODING'] = 'iso8859-1' + p = subprocess.Popen([sys.executable, "-c", code], + stdout=subprocess.PIPE, env=env) + out = p.communicate()[0] + self.assertEqual(out, + b'stdin: iso8859-1:surrogateescape\n' + b'stdout: iso8859-1:surrogateescape\n' + b'stderr: iso8859-1:backslashreplace\n') + def test_implementation(self): # This test applies to all implementations equally. diff -r daa6bf71170f -r bc06f67234d0 Misc/NEWS --- a/Misc/NEWS Tue Mar 18 00:53:32 2014 +0100 +++ b/Misc/NEWS Tue Mar 18 01🔞21 2014 +0100 @@ -13,6 +13,10 @@ Library ------- +- Issue #19977: When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale), + :py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the + ``surrogateescape`` error handler, instead of the ``strict`` error handler. + - Issue #20574: Implement incremental decoder for cp65001 code (Windows code page 65001, Microsoft UTF-8). diff -r daa6bf71170f -r bc06f67234d0 Python/pythonrun.c --- a/Python/pythonrun.c Tue Mar 18 00:53:32 2014 +0100 +++ b/Python/pythonrun.c Tue Mar 18 01🔞21 2014 +0100 @@ -1156,6 +1156,15 @@ encoding = _Py_StandardStreamEncoding; errors = _Py_StandardStreamErrors; if (!encoding || !errors) { + if (!errors) { + /* When the LC_CTYPE locale is the POSIX locale ("C locale"), + stdin and stdout use the surrogateescape error handler by + default, instead of the strict error handler. */ + char *loc = setlocale(LC_CTYPE, NULL); + if (loc != NULL && strcmp(loc, "C") == 0) + errors = "surrogateescape"; + } + pythonioencoding = Py_GETENV("PYTHONIOENCODING"); if (pythonioencoding) { char *err; @@ -1168,7 +1177,7 @@ if (err) { *err = '\0'; err++; - if (*err && !errors) { + if (*err && !_Py_StandardStreamErrors) { errors = err; } } /victor.stinner@gmail.com