(original) (raw)

changeset: 72834:8d837bd8148a branch: 2.7 user: Éric Araujo merwok@netwok.org date: Sun Oct 09 07:11:19 2011 +0200 files: Lib/distutils/command/check.py Lib/distutils/command/register.py Lib/distutils/tests/test_check.py Lib/distutils/tests/test_register.py Misc/ACKS Misc/NEWS description: Fix distutils’ check and register Unicode handling (#13114). The check command was fixed by Kirill Kuzminykh. The register command was using StringIO.getvalue, which uses “''.join” and thus coerces to str using the default encoding (ASCII), so I changed the code to use one extra intermediary list and correctly encode to UTF-8. diff -r 86ffa3d59c36 -r 8d837bd8148a Lib/distutils/command/check.py --- a/Lib/distutils/command/check.py Sun Oct 09 06:33:54 2011 +0200 +++ b/Lib/distutils/command/check.py Sun Oct 09 07:11:19 2011 +0200 @@ -5,6 +5,7 @@ __revision__ = "$Id$" from distutils.core import Command +from distutils.dist import PKG_INFO_ENCODING from distutils.errors import DistutilsSetupError try: @@ -108,6 +109,8 @@ def check_restructuredtext(self): """Checks if the long string fields are reST-compliant.""" data = self.distribution.get_long_description() + if not isinstance(data, unicode): + data = data.decode(PKG_INFO_ENCODING) for warning in self._check_rst_data(data): line = warning[-1].get('line') if line is None: diff -r 86ffa3d59c36 -r 8d837bd8148a Lib/distutils/command/register.py --- a/Lib/distutils/command/register.py Sun Oct 09 06:33:54 2011 +0200 +++ b/Lib/distutils/command/register.py Sun Oct 09 07:11:19 2011 +0200 @@ -10,7 +10,6 @@ import urllib2 import getpass import urlparse -import StringIO from warnings import warn from distutils.core import PyPIRCCommand @@ -260,21 +259,30 @@ boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' sep_boundary = '\n--' + boundary end_boundary = sep_boundary + '--' - body = StringIO.StringIO() + chunks = [] for key, value in data.items(): # handle multiple entries for the same name if type(value) not in (type([]), type( () )): value = [value] for value in value: - body.write(sep_boundary) - body.write('\nContent-Disposition: form-data; name="%s"'%key) - body.write("\n\n") - body.write(value) + chunks.append(sep_boundary) + chunks.append('\nContent-Disposition: form-data; name="%s"'%key) + chunks.append("\n\n") + chunks.append(value) if value and value[-1] == '\r': - body.write('\n') # write an extra newline (lurve Macs) - body.write(end_boundary) - body.write("\n") - body = body.getvalue() + chunks.append('\n') # write an extra newline (lurve Macs) + chunks.append(end_boundary) + chunks.append("\n") + + # chunks may be bytes (str) or unicode objects that we need to encode + body = [] + for chunk in chunks: + if isinstance(chunk, unicode): + body.append(chunk.encode('utf-8')) + else: + body.append(chunk) + + body = ''.join(body) # build the Request headers = { diff -r 86ffa3d59c36 -r 8d837bd8148a Lib/distutils/tests/test_check.py --- a/Lib/distutils/tests/test_check.py Sun Oct 09 06:33:54 2011 +0200 +++ b/Lib/distutils/tests/test_check.py Sun Oct 09 07:11:19 2011 +0200 @@ -1,3 +1,4 @@ +# -*- encoding: utf8 -*- """Tests for distutils.command.check.""" import unittest from test.test_support import run_unittest @@ -46,6 +47,15 @@ cmd = self._run(metadata, strict=1) self.assertEqual(cmd._warnings, 0) + # now a test with Unicode entries + metadata = {'url': u'xxx', 'author': u'\u00c9ric', + 'author_email': u'xxx', u'name': 'xxx', + 'version': u'xxx', + 'description': u'Something about esszet \u00df', + 'long_description': u'More things about esszet \u00df'} + cmd = self._run(metadata) + self.assertEqual(cmd._warnings, 0) + def test_check_document(self): if not HAS_DOCUTILS: # won't test without docutils return @@ -80,8 +90,8 @@ self.assertRaises(DistutilsSetupError, self._run, metadata, **{'strict': 1, 'restructuredtext': 1}) - # and non-broken rest - metadata['long_description'] = 'title\n=====\n\ntest' + # and non-broken rest, including a non-ASCII character to test #12114 + metadata['long_description'] = u'title\n=====\n\ntest \u00df' cmd = self._run(metadata, strict=1, restructuredtext=1) self.assertEqual(cmd._warnings, 0) diff -r 86ffa3d59c36 -r 8d837bd8148a Lib/distutils/tests/test_register.py --- a/Lib/distutils/tests/test_register.py Sun Oct 09 06:33:54 2011 +0200 +++ b/Lib/distutils/tests/test_register.py Sun Oct 09 07:11:19 2011 +0200 @@ -1,5 +1,5 @@ +# -*- encoding: utf8 -*- """Tests for distutils.command.register.""" -# -*- encoding: utf8 -*- import sys import os import unittest @@ -246,6 +246,24 @@ finally: del register_module.raw_input + # and finally a Unicode test (bug #12114) + metadata = {'url': u'xxx', 'author': u'\u00c9ric', + 'author_email': u'xxx', u'name': 'xxx', + 'version': u'xxx', + 'description': u'Something about esszet \u00df', + 'long_description': u'More things about esszet \u00df'} + + cmd = self._get_cmd(metadata) + cmd.ensure_finalized() + cmd.strict = 1 + inputs = RawInputs('1', 'tarek', 'y') + register_module.raw_input = inputs.__call__ + # let's run the command + try: + cmd.run() + finally: + del register_module.raw_input + def test_check_metadata_deprecated(self): # makes sure make_metadata is deprecated cmd = self._get_cmd() diff -r 86ffa3d59c36 -r 8d837bd8148a Misc/ACKS --- a/Misc/ACKS Sun Oct 09 06:33:54 2011 +0200 +++ b/Misc/ACKS Sun Oct 09 07:11:19 2011 +0200 @@ -469,6 +469,7 @@ Andrew Kuchling Ralf W. Grosse-Kunstleve Vladimir Kushnir +Kirill Kuzminykh (Кирилл Кузьминых) Ross Lagerwall Cameron Laird Łukasz Langa diff -r 86ffa3d59c36 -r 8d837bd8148a Misc/NEWS --- a/Misc/NEWS Sun Oct 09 06:33:54 2011 +0200 +++ b/Misc/NEWS Sun Oct 09 07:11:19 2011 +0200 @@ -50,6 +50,9 @@ Library ------- +- Issue #13114: Fix the distutils commands check and register when the + long description is a Unicode string with non-ASCII characters. + - Issue #7367: Fix pkgutil.walk_paths to skip directories whose contents cannot be read. /merwok@netwok.org