(original) (raw)
changeset: 87840:4580976c07cb branch: 3.3 parent: 87838:03a056c3b88e user: Victor Stinner victor.stinner@gmail.com date: Mon Dec 09 00:01:27 2013 +0100 files: Lib/platform.py Lib/test/test_platform.py Misc/ACKS Misc/NEWS description: Issue #17429: platform.linux_distribution() now decodes files from the UTF-8 encoding with the surrogateescape error handler, instead of decoding from the locale encoding in strict mode. It fixes the function on Fedora 19 which is probably the first major distribution release with a non-ASCII name. Patch written by Toshio Kuratomi. diff -r 03a056c3b88e -r 4580976c07cb Lib/platform.py --- a/Lib/platform.py Sun Dec 08 10:56:07 2013 -0800 +++ b/Lib/platform.py Mon Dec 09 00:01:27 2013 +0100 @@ -129,6 +129,10 @@ # Standard Unix uses /dev/null DEV_NULL = '/dev/null' +# Directory to search for configuration information on Unix. +# Constant used by test_platform to test linux_distribution(). +_UNIXCONFDIR = '/etc' + ### Platform specific APIs _libc_search = re.compile(b'(__libc_init)' @@ -315,7 +319,7 @@ """ try: - etc = os.listdir('/etc') + etc = os.listdir(_UNIXCONFDIR) except os.error: # Probably not a Unix system return distname,version,id @@ -331,7 +335,8 @@ return _dist_try_harder(distname,version,id) # Read the first line - with open('/etc/'+file, 'r') as f: + with open(os.path.join(_UNIXCONFDIR, file), 'r', + encoding='utf-8', errors='surrogateescape') as f: firstline = f.readline() _distname, _version, _id = _parse_release_file(firstline) diff -r 03a056c3b88e -r 4580976c07cb Lib/test/test_platform.py --- a/Lib/test/test_platform.py Sun Dec 08 10:56:07 2013 -0800 +++ b/Lib/test/test_platform.py Mon Dec 09 00:01:27 2013 +0100 @@ -1,7 +1,10 @@ +from unittest import mock +import contextlib import os import platform import subprocess import sys +import tempfile import unittest import warnings @@ -295,6 +298,19 @@ returncode = ret >> 8 self.assertEqual(returncode, len(data)) + def test_linux_distribution_encoding(self): + # Issue #17429 + with tempfile.TemporaryDirectory() as tempdir: + filename = os.path.join(tempdir, 'fedora-release') + with open(filename, 'w', encoding='utf-8') as f: + f.write('Fedora release 19 (Schr\xf6dinger\u2019s Cat)\n') + + with mock.patch('platform._UNIXCONFDIR', tempdir): + distname, version, distid = platform.linux_distribution() + + self.assertEqual(distname, 'Fedora') + self.assertEqual(version, '19') + self.assertEqual(distid, 'Schr\xf6dinger\u2019s Cat') def test_main(): support.run_unittest( diff -r 03a056c3b88e -r 4580976c07cb Misc/ACKS --- a/Misc/ACKS Sun Dec 08 10:56:07 2013 -0800 +++ b/Misc/ACKS Mon Dec 09 00:01:27 2013 +0100 @@ -689,6 +689,7 @@ Andrew Kuchling Dave Kuhlman Jon Kuhn +Toshio Kuratomi Vladimir Kushnir Erno Kuusela Ross Lagerwall diff -r 03a056c3b88e -r 4580976c07cb Misc/NEWS --- a/Misc/NEWS Sun Dec 08 10:56:07 2013 -0800 +++ b/Misc/NEWS Mon Dec 09 00:01:27 2013 +0100 @@ -18,6 +18,12 @@ Library ------- +- Issue #17429: platform.linux_distribution() now decodes files from the UTF-8 + encoding with the surrogateescape error handler, instead of decoding from the + locale encoding in strict mode. It fixes the function on Fedora 19 which is + probably the first major distribution release with a non-ASCII name. Patch + written by Toshio Kuratomi. + - Issue #19929: Call os.read with 32768 within subprocess.Popen.communicate rather than 4096 for efficiency. A microbenchmark shows Linux and OS X both using ~50% less cpu time this way. /victor.stinner@gmail.com