[Python-checkins] cpython (2.7): Fix distutils’ check and register Unicode handling (#13114).

eric.araujo python-checkins at python.org
Sun Oct 9 09:00:18 CEST 2011


http://hg.python.org/cpython/rev/8d837bd8148a
changeset:   72834:8d837bd8148a
branch:      2.7
user:        Éric Araujo <merwok at netwok.org>
date:        Sun Oct 09 07:11:19 2011 +0200
summary:
  Fix distutils’ check and register Unicode handling (#13114).

The check command was fixed by Kirill Kuzminykh.

The register command was using StringIO.getvalue, which uses “''.join”
and thus coerces to str using the default encoding (ASCII), so I changed
the code to use one extra intermediary list and correctly encode to
UTF-8.

files:
  Lib/distutils/command/check.py       |   3 +
  Lib/distutils/command/register.py    |  28 ++++++++++-----
  Lib/distutils/tests/test_check.py    |  14 ++++++-
  Lib/distutils/tests/test_register.py |  20 ++++++++++-
  Misc/ACKS                            |   1 +
  Misc/NEWS                            |   3 +
  6 files changed, 56 insertions(+), 13 deletions(-)


diff --git a/Lib/distutils/command/check.py b/Lib/distutils/command/check.py
--- a/Lib/distutils/command/check.py
+++ b/Lib/distutils/command/check.py
@@ -5,6 +5,7 @@
 __revision__ = "$Id$"
 
 from distutils.core import Command
+from distutils.dist import PKG_INFO_ENCODING
 from distutils.errors import DistutilsSetupError
 
 try:
@@ -108,6 +109,8 @@
     def check_restructuredtext(self):
         """Checks if the long string fields are reST-compliant."""
         data = self.distribution.get_long_description()
+        if not isinstance(data, unicode):
+            data = data.decode(PKG_INFO_ENCODING)
         for warning in self._check_rst_data(data):
             line = warning[-1].get('line')
             if line is None:
diff --git a/Lib/distutils/command/register.py b/Lib/distutils/command/register.py
--- a/Lib/distutils/command/register.py
+++ b/Lib/distutils/command/register.py
@@ -10,7 +10,6 @@
 import urllib2
 import getpass
 import urlparse
-import StringIO
 from warnings import warn
 
 from distutils.core import PyPIRCCommand
@@ -260,21 +259,30 @@
         boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
         sep_boundary = '\n--' + boundary
         end_boundary = sep_boundary + '--'
-        body = StringIO.StringIO()
+        chunks = []
         for key, value in data.items():
             # handle multiple entries for the same name
             if type(value) not in (type([]), type( () )):
                 value = [value]
             for value in value:
-                body.write(sep_boundary)
-                body.write('\nContent-Disposition: form-data; name="%s"'%key)
-                body.write("\n\n")
-                body.write(value)
+                chunks.append(sep_boundary)
+                chunks.append('\nContent-Disposition: form-data; name="%s"'%key)
+                chunks.append("\n\n")
+                chunks.append(value)
                 if value and value[-1] == '\r':
-                    body.write('\n')  # write an extra newline (lurve Macs)
-        body.write(end_boundary)
-        body.write("\n")
-        body = body.getvalue()
+                    chunks.append('\n')  # write an extra newline (lurve Macs)
+        chunks.append(end_boundary)
+        chunks.append("\n")
+
+        # chunks may be bytes (str) or unicode objects that we need to encode
+        body = []
+        for chunk in chunks:
+            if isinstance(chunk, unicode):
+                body.append(chunk.encode('utf-8'))
+            else:
+                body.append(chunk)
+
+        body = ''.join(body)
 
         # build the Request
         headers = {
diff --git a/Lib/distutils/tests/test_check.py b/Lib/distutils/tests/test_check.py
--- a/Lib/distutils/tests/test_check.py
+++ b/Lib/distutils/tests/test_check.py
@@ -1,3 +1,4 @@
+# -*- encoding: utf8 -*-
 """Tests for distutils.command.check."""
 import unittest
 from test.test_support import run_unittest
@@ -46,6 +47,15 @@
         cmd = self._run(metadata, strict=1)
         self.assertEqual(cmd._warnings, 0)
 
+        # now a test with Unicode entries
+        metadata = {'url': u'xxx', 'author': u'\u00c9ric',
+                    'author_email': u'xxx', u'name': 'xxx',
+                    'version': u'xxx',
+                    'description': u'Something about esszet \u00df',
+                    'long_description': u'More things about esszet \u00df'}
+        cmd = self._run(metadata)
+        self.assertEqual(cmd._warnings, 0)
+
     def test_check_document(self):
         if not HAS_DOCUTILS: # won't test without docutils
             return
@@ -80,8 +90,8 @@
         self.assertRaises(DistutilsSetupError, self._run, metadata,
                           **{'strict': 1, 'restructuredtext': 1})
 
-        # and non-broken rest
-        metadata['long_description'] = 'title\n=====\n\ntest'
+        # and non-broken rest, including a non-ASCII character to test #12114
+        metadata['long_description'] = u'title\n=====\n\ntest \u00df'
         cmd = self._run(metadata, strict=1, restructuredtext=1)
         self.assertEqual(cmd._warnings, 0)
 
diff --git a/Lib/distutils/tests/test_register.py b/Lib/distutils/tests/test_register.py
--- a/Lib/distutils/tests/test_register.py
+++ b/Lib/distutils/tests/test_register.py
@@ -1,5 +1,5 @@
+# -*- encoding: utf8 -*-
 """Tests for distutils.command.register."""
-# -*- encoding: utf8 -*-
 import sys
 import os
 import unittest
@@ -246,6 +246,24 @@
         finally:
             del register_module.raw_input
 
+        # and finally a Unicode test (bug #12114)
+        metadata = {'url': u'xxx', 'author': u'\u00c9ric',
+                    'author_email': u'xxx', u'name': 'xxx',
+                    'version': u'xxx',
+                    'description': u'Something about esszet \u00df',
+                    'long_description': u'More things about esszet \u00df'}
+
+        cmd = self._get_cmd(metadata)
+        cmd.ensure_finalized()
+        cmd.strict = 1
+        inputs = RawInputs('1', 'tarek', 'y')
+        register_module.raw_input = inputs.__call__
+        # let's run the command
+        try:
+            cmd.run()
+        finally:
+            del register_module.raw_input
+
     def test_check_metadata_deprecated(self):
         # makes sure make_metadata is deprecated
         cmd = self._get_cmd()
diff --git a/Misc/ACKS b/Misc/ACKS
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -469,6 +469,7 @@
 Andrew Kuchling
 Ralf W. Grosse-Kunstleve
 Vladimir Kushnir
+Kirill Kuzminykh (Кирилл Кузьминых)
 Ross Lagerwall
 Cameron Laird
 Łukasz Langa
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -50,6 +50,9 @@
 Library
 -------
 
+- Issue #13114: Fix the distutils commands check and register when the
+  long description is a Unicode string with non-ASCII characters.
+
 - Issue #7367: Fix pkgutil.walk_paths to skip directories whose
   contents cannot be read.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list