[Python-checkins] cpython (2.7): #1065986: Make pydoc handle unicode strings.
r.david.murray
python-checkins at python.org
Sun Jan 5 21:39:41 CET 2014
http://hg.python.org/cpython/rev/bf077fc97fdd
changeset: 88315:bf077fc97fdd
branch: 2.7
parent: 88286:d7ae948d9eee
user: R David Murray <rdmurray at bitdance.com>
date: Sun Jan 05 12:35:59 2014 -0500
summary:
#1065986: Make pydoc handle unicode strings.
Patch by Akira Kitada.
files:
Lib/pydoc.py | 58 ++++++++++++++----
Lib/test/test_pydoc.py | 91 ++++++++++++++++++++++++++++++
Misc/NEWS | 2 +
3 files changed, 137 insertions(+), 14 deletions(-)
diff --git a/Lib/pydoc.py b/Lib/pydoc.py
--- a/Lib/pydoc.py
+++ b/Lib/pydoc.py
@@ -81,6 +81,7 @@
def getdoc(object):
"""Get the doc string or comments for an object."""
result = inspect.getdoc(object) or inspect.getcomments(object)
+ result = _encode(result)
return result and re.sub('^ *\n', '', rstrip(result)) or ''
def splitdoc(doc):
@@ -182,6 +183,34 @@
return name, kind, cls, value
return map(fixup, inspect.classify_class_attrs(object))
+# ----------------------------------------------------- Unicode support helpers
+
+try:
+ _unicode = unicode
+except NameError:
+ # If Python is built without Unicode support, the unicode type
+ # will not exist. Fake one that nothing will match, and make
+ # the _encode function that do nothing.
+ class _unicode(object):
+ pass
+ _encoding = 'ascii'
+ def _encode(text, encoding='ascii'):
+ return text
+else:
+ import locale
+ _encoding = locale.getpreferredencoding()
+
+ def _encode(text, encoding=None):
+ if isinstance(text, unicode):
+ return text.encode(encoding or _encoding, 'xmlcharrefreplace')
+ else:
+ return text
+
+def _binstr(obj):
+ # Ensure that we have an encoded (binary) string representation of obj,
+ # even if it is a unicode string.
+ return obj.encode(_encoding) if isinstance(obj, _unicode) else str(obj)
+
# ----------------------------------------------------- module manipulation
def ispackage(path):
@@ -424,12 +453,13 @@
def page(self, title, contents):
"""Format an HTML page."""
- return '''
+ return _encode('''
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html><head><title>Python: %s</title>
+<meta charset="utf-8">
</head><body bgcolor="#f0f0f8">
%s
-</body></html>''' % (title, contents)
+</body></html>''' % (title, contents), 'ascii')
def heading(self, title, fgcol, bgcol, extras=''):
"""Format a page heading."""
@@ -606,12 +636,12 @@
filelink = '(built-in)'
info = []
if hasattr(object, '__version__'):
- version = str(object.__version__)
+ version = _binstr(object.__version__)
if version[:11] == '$' + 'Revision: ' and version[-1:] == '$':
version = strip(version[11:-1])
info.append('version %s' % self.escape(version))
if hasattr(object, '__date__'):
- info.append(self.escape(str(object.__date__)))
+ info.append(self.escape(_binstr(object.__date__)))
if info:
head = head + ' (%s)' % join(info, ', ')
docloc = self.getdocloc(object)
@@ -694,11 +724,11 @@
result = result + self.bigsection(
'Data', '#ffffff', '#55aa55', join(contents, '<br>\n'))
if hasattr(object, '__author__'):
- contents = self.markup(str(object.__author__), self.preformat)
+ contents = self.markup(_binstr(object.__author__), self.preformat)
result = result + self.bigsection(
'Author', '#ffffff', '#7799ee', contents)
if hasattr(object, '__credits__'):
- contents = self.markup(str(object.__credits__), self.preformat)
+ contents = self.markup(_binstr(object.__credits__), self.preformat)
result = result + self.bigsection(
'Credits', '#ffffff', '#7799ee', contents)
@@ -1116,16 +1146,16 @@
result = result + self.section('DATA', join(contents, '\n'))
if hasattr(object, '__version__'):
- version = str(object.__version__)
+ version = _binstr(object.__version__)
if version[:11] == '$' + 'Revision: ' and version[-1:] == '$':
version = strip(version[11:-1])
result = result + self.section('VERSION', version)
if hasattr(object, '__date__'):
- result = result + self.section('DATE', str(object.__date__))
+ result = result + self.section('DATE', _binstr(object.__date__))
if hasattr(object, '__author__'):
- result = result + self.section('AUTHOR', str(object.__author__))
+ result = result + self.section('AUTHOR', _binstr(object.__author__))
if hasattr(object, '__credits__'):
- result = result + self.section('CREDITS', str(object.__credits__))
+ result = result + self.section('CREDITS', _binstr(object.__credits__))
return result
def docclass(self, object, name=None, mod=None, *ignored):
@@ -1375,7 +1405,7 @@
"""Page through text by feeding it to another program."""
pipe = os.popen(cmd, 'w')
try:
- pipe.write(text)
+ pipe.write(_encode(text))
pipe.close()
except IOError:
pass # Ignore broken pipes caused by quitting the pager program.
@@ -1385,7 +1415,7 @@
import tempfile
filename = tempfile.mktemp()
file = open(filename, 'w')
- file.write(text)
+ file.write(_encode(text))
file.close()
try:
os.system(cmd + ' "' + filename + '"')
@@ -1394,7 +1424,7 @@
def ttypager(text):
"""Page through text on a text terminal."""
- lines = split(plain(text), '\n')
+ lines = plain(_encode(plain(text), getattr(sys.stdout, 'encoding', _encoding))).split('\n')
try:
import tty
fd = sys.stdin.fileno()
@@ -1432,7 +1462,7 @@
def plainpager(text):
"""Simply print unformatted text. This is the ultimate fallback."""
- sys.stdout.write(plain(text))
+ sys.stdout.write(_encode(plain(text), getattr(sys.stdout, 'encoding', _encoding)))
def describe(thing):
"""Produce a short description of the given thing."""
diff --git a/Lib/test/test_pydoc.py b/Lib/test/test_pydoc.py
--- a/Lib/test/test_pydoc.py
+++ b/Lib/test/test_pydoc.py
@@ -10,6 +10,7 @@
import pkgutil
import unittest
import xml.etree
+import types
import test.test_support
from collections import namedtuple
from test.script_helper import assert_python_ok
@@ -428,6 +429,95 @@
self.assertIn('_asdict', helptext)
+ at unittest.skipUnless(test.test_support.have_unicode,
+ "test requires unicode support")
+class TestUnicode(unittest.TestCase):
+
+ def setUp(self):
+ # Better not to use unicode escapes in literals, lest the
+ # parser choke on it if Python has been built without
+ # unicode support.
+ self.Q = types.ModuleType(
+ 'Q', 'Rational numbers: \xe2\x84\x9a'.decode('utf8'))
+ self.Q.__version__ = '\xe2\x84\x9a'.decode('utf8')
+ self.Q.__date__ = '\xe2\x84\x9a'.decode('utf8')
+ self.Q.__author__ = '\xe2\x84\x9a'.decode('utf8')
+ self.Q.__credits__ = '\xe2\x84\x9a'.decode('utf8')
+
+ self.assertIsInstance(self.Q.__doc__, unicode)
+
+ def test_render_doc(self):
+ # render_doc is robust against unicode in docstrings
+ doc = pydoc.render_doc(self.Q)
+ self.assertIsInstance(doc, str)
+
+ def test_encode(self):
+ # _encode is robust against characters out the specified encoding
+ self.assertEqual(pydoc._encode(self.Q.__doc__, 'ascii'), 'Rational numbers: ℚ')
+
+ def test_pipepager(self):
+ # pipepager does not choke on unicode
+ doc = pydoc.render_doc(self.Q)
+
+ saved, os.popen = os.popen, open
+ try:
+ with test.test_support.temp_cwd():
+ pydoc.pipepager(doc, 'pipe')
+ self.assertEqual(open('pipe').read(), pydoc._encode(doc))
+ finally:
+ os.popen = saved
+
+ def test_tempfilepager(self):
+ # tempfilepager does not choke on unicode
+ doc = pydoc.render_doc(self.Q)
+
+ output = {}
+ def mock_system(cmd):
+ import ast
+ output['content'] = open(ast.literal_eval(cmd.strip())).read()
+ saved, os.system = os.system, mock_system
+ try:
+ pydoc.tempfilepager(doc, '')
+ self.assertEqual(output['content'], pydoc._encode(doc))
+ finally:
+ os.system = saved
+
+ def test_plainpager(self):
+ # plainpager does not choke on unicode
+ doc = pydoc.render_doc(self.Q)
+
+ # Note: captured_stdout is too permissive when it comes to
+ # unicode, and using it here would make the test always
+ # pass.
+ with test.test_support.temp_cwd():
+ with open('output', 'w') as f:
+ saved, sys.stdout = sys.stdout, f
+ try:
+ pydoc.plainpager(doc)
+ finally:
+ sys.stdout = saved
+ self.assertIn('Rational numbers:', open('output').read())
+
+ def test_ttypager(self):
+ # ttypager does not choke on unicode
+ doc = pydoc.render_doc(self.Q)
+ # Test ttypager
+ with test.test_support.temp_cwd(), test.test_support.captured_stdin():
+ with open('output', 'w') as f:
+ saved, sys.stdout = sys.stdout, f
+ try:
+ pydoc.ttypager(doc)
+ finally:
+ sys.stdout = saved
+ self.assertIn('Rational numbers:', open('output').read())
+
+ def test_htmlpage(self):
+ # html.page does not choke on unicode
+ with test.test_support.temp_cwd():
+ with captured_stdout() as output:
+ pydoc.writedoc(self.Q)
+ self.assertEqual(output.getvalue(), 'wrote Q.html\n')
+
class TestHelper(unittest.TestCase):
def test_keywords(self):
self.assertEqual(sorted(pydoc.Helper.keywords),
@@ -456,6 +546,7 @@
test.test_support.run_unittest(PydocDocTest,
PydocImportTest,
TestDescriptions,
+ TestUnicode,
TestHelper)
finally:
reap_children()
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -30,6 +30,8 @@
Library
-------
+- Issue #1065986: pydoc can now handle unicode strings.
+
- Issue #16039: CVE-2013-1752: Change use of readline in imaplib module to
limit line length. Patch by Emil Lind.
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list