[Python-checkins] r58052 - in sandbox/trunk/pep0: NOTES pep0/__init__.py pep0/constants.py pep0/output.py pep0/pep.py

brett.cannon python-checkins at python.org
Sat Sep 8 05:32:42 CEST 2007


Author: brett.cannon
Date: Sat Sep  8 05:32:41 2007
New Revision: 58052

Modified:
   sandbox/trunk/pep0/NOTES
   sandbox/trunk/pep0/pep0/__init__.py
   sandbox/trunk/pep0/pep0/constants.py
   sandbox/trunk/pep0/pep0/output.py
   sandbox/trunk/pep0/pep0/pep.py
Log:
Move over to using Unicode in a proper fashion.


Modified: sandbox/trunk/pep0/NOTES
==============================================================================
--- sandbox/trunk/pep0/NOTES	(original)
+++ sandbox/trunk/pep0/NOTES	Sat Sep  8 05:32:41 2007
@@ -1,11 +1,6 @@
 TODO
 -----
 
-* Explicitly read PEPs as UTF-8.
-
-* Use unicodedata to reformat author names to NFC (or NFKC, not sure which) to
-  make Unicode characters count as one when taking the len of a unicode string.
-
 * Backport email addresses from old PEP 0 to PEPs themselves.
 
 * Add support for author names formatted as "Last, First, Suffix".

Modified: sandbox/trunk/pep0/pep0/__init__.py
==============================================================================
--- sandbox/trunk/pep0/pep0/__init__.py	(original)
+++ sandbox/trunk/pep0/pep0/__init__.py	Sat Sep  8 05:32:41 2007
@@ -15,6 +15,8 @@
 """
 from __future__ import absolute_import, with_statement
 
+import codecs
+
 if __name__ == '__main__':
     from pep0.output import write_pep0
     from pep0.pep import PEP
@@ -37,7 +39,7 @@
             if (not file_path.startswith('pep-') or
                     not file_path.endswith('.txt')):
                 continue
-            with open(abs_file_path, 'r') as pep_file:
+            with codecs.open(abs_file_path, 'r', encoding='UTF-8') as pep_file:
                 peps.append(PEP(pep_file))
         else:
             peps.sort(key=attrgetter('number'))
@@ -47,4 +49,5 @@
     else:
         raise ValueError("argument must be a directory or file path")
 
-    write_pep0(peps)
+    with codecs.open('pep-0.txt', 'w', encoding='UTF-8') as pep0_file:
+        write_pep0(peps, pep0_file)

Modified: sandbox/trunk/pep0/pep0/constants.py
==============================================================================
--- sandbox/trunk/pep0/pep0/constants.py	(original)
+++ sandbox/trunk/pep0/pep0/constants.py	Sat Sep  8 05:32:41 2007
@@ -1,9 +1,9 @@
 # -*- coding: utf-8 -*-
 title_length = 44
-column_format = (' %(type)1s%(status)1s %(number)4s  %(title)-' +
-                    str(title_length) + 's %(authors)-s')
+column_format = (u' %(type)1s%(status)1s %(number)4s  %(title)-' +
+                    unicode(title_length) + u's %(authors)-s')
 
-header = """PEP: 0
+header = u"""PEP: 0
 Title: Index of Python Enhancement Proposals (PEPs)
 Version: $Revision$
 Last-Modified: $Date$
@@ -14,7 +14,7 @@
 Created: 13-Jul-2000
 """
 
-intro = """
+intro = u"""
     The PEP contains the index of all Python Enhancement Proposals,
     known as PEPs.  PEP numbers are assigned by the PEP Editor, and
     once assigned are never changed.  The SVN history[1] of the PEP
@@ -25,12 +25,12 @@
     musings on the various outstanding PEPs.
 """
 
-references = """
+references = u"""
     [1] View PEP history online
         http://svn.python.org/projects/peps/trunk/
 """
 
-footer = """
+footer = u"""
 Local Variables:
 mode: indented-text
 indent-tabs-mode: nil

Modified: sandbox/trunk/pep0/pep0/output.py
==============================================================================
--- sandbox/trunk/pep0/pep0/output.py	(original)
+++ sandbox/trunk/pep0/pep0/output.py	Sat Sep  8 05:32:41 2007
@@ -4,19 +4,20 @@
 
 from operator import attrgetter
 from sys import stdout
+from unicodedata import normalize
 import warnings
 
 
-indent = ' '
+indent = u' '
 
 def write_column_headers(output):
     """Output the column headers for the PEP indices."""
-    column_headers = {'status': '', 'type': '', 'number': 'num',
-                        'title': 'title', 'authors': 'owner'}
+    column_headers = {'status': u'', 'type': u'', 'number': u'num',
+                        'title': u'title', 'authors': u'owner'}
     print>>output, constants.column_format % column_headers
     underline_headers = {}
     for key, value in column_headers.items():
-        underline_headers[key] = len(value) * '-'
+        underline_headers[key] = unicode(len(value) * '-')
     print>>output, constants.column_format % underline_headers
 
 
@@ -101,84 +102,84 @@
 def write_pep0(peps, output=stdout):
     print>>output, constants.header
     print>>output
-    print>>output, "Introduction"
+    print>>output, u"Introduction"
     print>>output, constants.intro
     print>>output
-    print>>output, "Index by Category"
+    print>>output, u"Index by Category"
     print>>output
     write_column_headers(output)
     meta, info, accepted, open_, finished, dead = sort_peps(peps)
     print>>output
-    print>>output, " Meta-PEPs (PEPs about PEPs or Processs)"
+    print>>output, u" Meta-PEPs (PEPs about PEPs or Processs)"
     print>>output
     for pep in meta:
-        print>>output, pep
+        print>>output, unicode(pep)
     print>>output
-    print>>output, " Other Informational PEPs"
+    print>>output, u" Other Informational PEPs"
     print>>output
     for pep in info:
-        print>>output, pep
+        print>>output, unicode(pep)
     print>>output
-    print>>output, " Accepted PEPs (accepted; may not be implemented yet)"
+    print>>output, u" Accepted PEPs (accepted; may not be implemented yet)"
     print>>output
     for pep in accepted:
-        print>>output, pep
+        print>>output, unicode(pep)
     print>>output
-    print>>output, " Open PEPs (under consideration)"
+    print>>output, u" Open PEPs (under consideration)"
     print>>output
     for pep in open_:
-        print>>output, pep
+        print>>output, unicode(pep)
     print>>output
-    print>>output, " Finished PEPs (done, implemented in code repository)"
+    print>>output, u" Finished PEPs (done, implemented in code repository)"
     print>>output
     for pep in finished:
-        print>>output, pep
+        print>>output, unicode(pep)
     print>>output
-    print>>output, " Deferred, Abandoned, Withdrawn, and Rejected PEPs"
+    print>>output, u" Deferred, Abandoned, Withdrawn, and Rejected PEPs"
     print>>output
     for pep in dead:
-        print>>output, pep
+        print>>output, unicode(pep)
     print>>output
     print>>output
-    print>>output, " Numerical Index"
+    print>>output, u" Numerical Index"
     print>>output
     write_column_headers(output)
     prev_pep = 0
     for pep in peps:
         if pep.number - prev_pep > 1:
             print>>output
-        print>>output, pep
+        print>>output, unicode(pep)
         prev_pep = pep.number
     print>>output
     print>>output
-    print>>output, "Key"
+    print>>output, u"Key"
     print>>output
     for type_ in PEP.type_values:
-        print>>output, "    %s - %s PEP" % (type_[0], type_)
+        print>>output, u"    %s - %s PEP" % (type_[0], type_)
     print>>output
     for status in PEP.status_values:
-        print>>output, "    %s - %s proposal" % (status[0], status)
+        print>>output, u"    %s - %s proposal" % (status[0], status)
 
     print>>output
     print>>output
-    print>>output, "Owners"
+    print>>output, u"Owners"
     print>>output
     authors_dict = verify_email_addresses(peps)
     max_name = max(authors_dict.keys(),
-                            key=lambda x: len(x.last_first))
+                    key=lambda x: len(normalize('NFC', x.last_first)))
     max_name_len = len(max_name.last_first)
-    print>>output, "    %s  %s" % ('name'.ljust(max_name_len), 'email address')
-    print>>output, "    %s  %s" % ((len('name')*'-').ljust(max_name_len),
+    print>>output, u"    %s  %s" % ('name'.ljust(max_name_len), 'email address')
+    print>>output, u"    %s  %s" % ((len('name')*'-').ljust(max_name_len),
                                     len('email address')*'-')
     sorted_authors = sort_authors(authors_dict)
     for author in sorted_authors:
         # Use the email from authors_dict instead of the one from 'author' as
         # the author instance may have an empty email.
-        print>>output, ("    %s  %s" %
+        print>>output, (u"    %s  %s" %
                 (author.last_first.ljust(max_name_len), authors_dict[author]))
     print>>output
     print>>output
-    print>>output, "References"
+    print>>output, u"References"
     print>>output
     print>>output, constants.references
     print>>output, constants.footer

Modified: sandbox/trunk/pep0/pep0/pep.py
==============================================================================
--- sandbox/trunk/pep0/pep0/pep.py	(original)
+++ sandbox/trunk/pep0/pep0/pep.py	Sat Sep  8 05:32:41 2007
@@ -48,9 +48,9 @@
         if not self.first:
             self.last_first = self.last
         else:
-            self.last_first = ', '.join([self.last, self.first])
+            self.last_first = u', '.join([self.last, self.first])
             if self.suffix:
-                self.last_first += ', ' + self.suffix
+                self.last_first += u', ' + self.suffix
 
     def __hash__(self):
         return hash(self.first_last)
@@ -60,7 +60,7 @@
 
     @property
     def sort_by(self):
-        if ' ' not in self.last:
+        if u' ' not in self.last:
             return self.last
         name_parts = self.last.split()
         for index, part in enumerate(name_parts):
@@ -68,7 +68,7 @@
                 break
         else:
             raise ValueError("last name missing a capital letter")
-        return ' '.join(name_parts[index:])
+        return u' '.join(name_parts[index:])
 
     def _last_name(self, full_name):
         """Find the last name (or nickname) of a full name.
@@ -79,7 +79,7 @@
         comma, then drop the suffix.
 
         """
-        name_partition = full_name.partition(',')
+        name_partition = full_name.partition(u',')
         no_suffix = name_partition[0].strip()
         suffix = name_partition[2].strip()
         name_parts = no_suffix.split()
@@ -89,7 +89,7 @@
         else:
             assert part_count > 2
             if name_parts[-2].islower():
-                return ' '.join(name_parts[-2:]), suffix
+                return u' '.join(name_parts[-2:]), suffix
             else:
                 return name_parts[-1], suffix
 
@@ -128,11 +128,11 @@
                 ('Post-History', True), ('Replaces', False),
                 ('Replaced-By', False))
     # Valid values for the Type header.
-    type_values = ("Standards Track", "Informational", "Process")
+    type_values = (u"Standards Track", u"Informational", u"Process")
     # Valid values for the Status header.
     # Active PEPs can only be for Informational or Process PEPs.
-    status_values = ("Accepted", "Rejected", "Withdrawn", "Deferred", "Final",
-                     "Active", "Draft", "Replaced")
+    status_values = (u"Accepted", u"Rejected", u"Withdrawn", u"Deferred", u"Final",
+                     u"Active", u"Draft", u"Replaced")
 
     def __init__(self, pep_file):
         """Init object from an open PEP file object."""
@@ -141,7 +141,7 @@
         metadata = pep_parser.parse(pep_file)
         header_names = metadata.keys()
         header_order = iter(self.headers)
-        current_header = ''
+        current_header = u''
         required = False
         try:
             for header_name in header_names:
@@ -183,7 +183,7 @@
             raise ValueError("%r is not a valid Status value (PEP %s)" %
                                 (status, self.number))
         # Special case for Active PEPs.
-        if (status == "Active" and
+        if (status == u"Active" and
                 self.type_ not in ("Process", "Informational")):
             raise ValueError("Only Process and Informational PEPs may have an "
                                 "Active status (PEP %s)" % self.number)
@@ -198,13 +198,13 @@
         """Return a list of author names and emails."""
         # XXX Consider using email.utils.parseaddr (doesn't work with names
         # lacking an email address.
-        angled = r'(?P<author>.+?) <(?P<email>.+?)>'
-        paren = r'(?P<email>.+?) \((?P<author>.+?)\)'
-        simple = r'(?P<author>[^,]+)'
+        angled = ur'(?P<author>.+?) <(?P<email>.+?)>'
+        paren = ur'(?P<email>.+?) \((?P<author>.+?)\)'
+        simple = ur'(?P<author>[^,]+)'
         author_list = []
         for regex in (angled, paren, simple):
             # Watch out for commas separating multiple names.
-            regex += '(,\s*)?'
+            regex += u'(,\s*)?'
             for match in re.finditer(regex, data):
                 # Watch out for suffixes like 'Jr.' when they are comma-separated
                 # from the name and thus cause issues when *all* names are only
@@ -214,7 +214,7 @@
                 if not author.partition(' ')[1] and author.endswith('.'):
                     prev_author = author_list.pop()
                     author = ', '.join([prev_author, author])
-                if 'email' not in match_dict:
+                if u'email' not in match_dict:
                     email = ''
                 else:
                     email = match_dict['email']
@@ -235,14 +235,14 @@
     def status_abbr(self):
         """Return how the status should be represented in the index."""
         if self.status in ('Draft', 'Active'):
-            return ' '
+            return u' '
         else:
             return self.status[0].upper()
 
     @property
     def author_abbr(self):
         """Return the author list as a comma-separated with only last names."""
-        return ', '.join(x.last for x in self.authors)
+        return u', '.join(x.last for x in self.authors)
 
     @property
     def title_abbr(self):
@@ -250,9 +250,9 @@
         if len(self.title) <= constants.title_length:
             return self.title
         wrapped_title = textwrap.wrap(self.title, constants.title_length - 4)
-        return wrapped_title[0] + ' ...'
+        return wrapped_title[0] + u' ...'
 
-    def __str__(self):
+    def __unicode__(self):
         """Return the line entry for the PEP."""
         pep_info = {'type': self.type_abbr, 'number': str(self.number),
                 'title': self.title_abbr, 'status': self.status_abbr,


More information about the Python-checkins mailing list