[Python-checkins] r58052 - in sandbox/trunk/pep0: NOTES pep0/__init__.py pep0/constants.py pep0/output.py pep0/pep.py
brett.cannon
python-checkins at python.org
Sat Sep 8 05:32:42 CEST 2007
Author: brett.cannon
Date: Sat Sep 8 05:32:41 2007
New Revision: 58052
Modified:
sandbox/trunk/pep0/NOTES
sandbox/trunk/pep0/pep0/__init__.py
sandbox/trunk/pep0/pep0/constants.py
sandbox/trunk/pep0/pep0/output.py
sandbox/trunk/pep0/pep0/pep.py
Log:
Move over to using Unicode in a proper fashion.
Modified: sandbox/trunk/pep0/NOTES
==============================================================================
--- sandbox/trunk/pep0/NOTES (original)
+++ sandbox/trunk/pep0/NOTES Sat Sep 8 05:32:41 2007
@@ -1,11 +1,6 @@
TODO
-----
-* Explicitly read PEPs as UTF-8.
-
-* Use unicodedata to reformat author names to NFC (or NFKC, not sure which) to
- make Unicode characters count as one when taking the len of a unicode string.
-
* Backport email addresses from old PEP 0 to PEPs themselves.
* Add support for author names formatted as "Last, First, Suffix".
Modified: sandbox/trunk/pep0/pep0/__init__.py
==============================================================================
--- sandbox/trunk/pep0/pep0/__init__.py (original)
+++ sandbox/trunk/pep0/pep0/__init__.py Sat Sep 8 05:32:41 2007
@@ -15,6 +15,8 @@
"""
from __future__ import absolute_import, with_statement
+import codecs
+
if __name__ == '__main__':
from pep0.output import write_pep0
from pep0.pep import PEP
@@ -37,7 +39,7 @@
if (not file_path.startswith('pep-') or
not file_path.endswith('.txt')):
continue
- with open(abs_file_path, 'r') as pep_file:
+ with codecs.open(abs_file_path, 'r', encoding='UTF-8') as pep_file:
peps.append(PEP(pep_file))
else:
peps.sort(key=attrgetter('number'))
@@ -47,4 +49,5 @@
else:
raise ValueError("argument must be a directory or file path")
- write_pep0(peps)
+ with codecs.open('pep-0.txt', 'w', encoding='UTF-8') as pep0_file:
+ write_pep0(peps, pep0_file)
Modified: sandbox/trunk/pep0/pep0/constants.py
==============================================================================
--- sandbox/trunk/pep0/pep0/constants.py (original)
+++ sandbox/trunk/pep0/pep0/constants.py Sat Sep 8 05:32:41 2007
@@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-
title_length = 44
-column_format = (' %(type)1s%(status)1s %(number)4s %(title)-' +
- str(title_length) + 's %(authors)-s')
+column_format = (u' %(type)1s%(status)1s %(number)4s %(title)-' +
+ unicode(title_length) + u's %(authors)-s')
-header = """PEP: 0
+header = u"""PEP: 0
Title: Index of Python Enhancement Proposals (PEPs)
Version: $Revision$
Last-Modified: $Date$
@@ -14,7 +14,7 @@
Created: 13-Jul-2000
"""
-intro = """
+intro = u"""
The PEP contains the index of all Python Enhancement Proposals,
known as PEPs. PEP numbers are assigned by the PEP Editor, and
once assigned are never changed. The SVN history[1] of the PEP
@@ -25,12 +25,12 @@
musings on the various outstanding PEPs.
"""
-references = """
+references = u"""
[1] View PEP history online
http://svn.python.org/projects/peps/trunk/
"""
-footer = """
+footer = u"""
Local Variables:
mode: indented-text
indent-tabs-mode: nil
Modified: sandbox/trunk/pep0/pep0/output.py
==============================================================================
--- sandbox/trunk/pep0/pep0/output.py (original)
+++ sandbox/trunk/pep0/pep0/output.py Sat Sep 8 05:32:41 2007
@@ -4,19 +4,20 @@
from operator import attrgetter
from sys import stdout
+from unicodedata import normalize
import warnings
-indent = ' '
+indent = u' '
def write_column_headers(output):
"""Output the column headers for the PEP indices."""
- column_headers = {'status': '', 'type': '', 'number': 'num',
- 'title': 'title', 'authors': 'owner'}
+ column_headers = {'status': u'', 'type': u'', 'number': u'num',
+ 'title': u'title', 'authors': u'owner'}
print>>output, constants.column_format % column_headers
underline_headers = {}
for key, value in column_headers.items():
- underline_headers[key] = len(value) * '-'
+ underline_headers[key] = unicode(len(value) * '-')
print>>output, constants.column_format % underline_headers
@@ -101,84 +102,84 @@
def write_pep0(peps, output=stdout):
print>>output, constants.header
print>>output
- print>>output, "Introduction"
+ print>>output, u"Introduction"
print>>output, constants.intro
print>>output
- print>>output, "Index by Category"
+ print>>output, u"Index by Category"
print>>output
write_column_headers(output)
meta, info, accepted, open_, finished, dead = sort_peps(peps)
print>>output
- print>>output, " Meta-PEPs (PEPs about PEPs or Processs)"
+ print>>output, u" Meta-PEPs (PEPs about PEPs or Processs)"
print>>output
for pep in meta:
- print>>output, pep
+ print>>output, unicode(pep)
print>>output
- print>>output, " Other Informational PEPs"
+ print>>output, u" Other Informational PEPs"
print>>output
for pep in info:
- print>>output, pep
+ print>>output, unicode(pep)
print>>output
- print>>output, " Accepted PEPs (accepted; may not be implemented yet)"
+ print>>output, u" Accepted PEPs (accepted; may not be implemented yet)"
print>>output
for pep in accepted:
- print>>output, pep
+ print>>output, unicode(pep)
print>>output
- print>>output, " Open PEPs (under consideration)"
+ print>>output, u" Open PEPs (under consideration)"
print>>output
for pep in open_:
- print>>output, pep
+ print>>output, unicode(pep)
print>>output
- print>>output, " Finished PEPs (done, implemented in code repository)"
+ print>>output, u" Finished PEPs (done, implemented in code repository)"
print>>output
for pep in finished:
- print>>output, pep
+ print>>output, unicode(pep)
print>>output
- print>>output, " Deferred, Abandoned, Withdrawn, and Rejected PEPs"
+ print>>output, u" Deferred, Abandoned, Withdrawn, and Rejected PEPs"
print>>output
for pep in dead:
- print>>output, pep
+ print>>output, unicode(pep)
print>>output
print>>output
- print>>output, " Numerical Index"
+ print>>output, u" Numerical Index"
print>>output
write_column_headers(output)
prev_pep = 0
for pep in peps:
if pep.number - prev_pep > 1:
print>>output
- print>>output, pep
+ print>>output, unicode(pep)
prev_pep = pep.number
print>>output
print>>output
- print>>output, "Key"
+ print>>output, u"Key"
print>>output
for type_ in PEP.type_values:
- print>>output, " %s - %s PEP" % (type_[0], type_)
+ print>>output, u" %s - %s PEP" % (type_[0], type_)
print>>output
for status in PEP.status_values:
- print>>output, " %s - %s proposal" % (status[0], status)
+ print>>output, u" %s - %s proposal" % (status[0], status)
print>>output
print>>output
- print>>output, "Owners"
+ print>>output, u"Owners"
print>>output
authors_dict = verify_email_addresses(peps)
max_name = max(authors_dict.keys(),
- key=lambda x: len(x.last_first))
+ key=lambda x: len(normalize('NFC', x.last_first)))
max_name_len = len(max_name.last_first)
- print>>output, " %s %s" % ('name'.ljust(max_name_len), 'email address')
- print>>output, " %s %s" % ((len('name')*'-').ljust(max_name_len),
+ print>>output, u" %s %s" % ('name'.ljust(max_name_len), 'email address')
+ print>>output, u" %s %s" % ((len('name')*'-').ljust(max_name_len),
len('email address')*'-')
sorted_authors = sort_authors(authors_dict)
for author in sorted_authors:
# Use the email from authors_dict instead of the one from 'author' as
# the author instance may have an empty email.
- print>>output, (" %s %s" %
+ print>>output, (u" %s %s" %
(author.last_first.ljust(max_name_len), authors_dict[author]))
print>>output
print>>output
- print>>output, "References"
+ print>>output, u"References"
print>>output
print>>output, constants.references
print>>output, constants.footer
Modified: sandbox/trunk/pep0/pep0/pep.py
==============================================================================
--- sandbox/trunk/pep0/pep0/pep.py (original)
+++ sandbox/trunk/pep0/pep0/pep.py Sat Sep 8 05:32:41 2007
@@ -48,9 +48,9 @@
if not self.first:
self.last_first = self.last
else:
- self.last_first = ', '.join([self.last, self.first])
+ self.last_first = u', '.join([self.last, self.first])
if self.suffix:
- self.last_first += ', ' + self.suffix
+ self.last_first += u', ' + self.suffix
def __hash__(self):
return hash(self.first_last)
@@ -60,7 +60,7 @@
@property
def sort_by(self):
- if ' ' not in self.last:
+ if u' ' not in self.last:
return self.last
name_parts = self.last.split()
for index, part in enumerate(name_parts):
@@ -68,7 +68,7 @@
break
else:
raise ValueError("last name missing a capital letter")
- return ' '.join(name_parts[index:])
+ return u' '.join(name_parts[index:])
def _last_name(self, full_name):
"""Find the last name (or nickname) of a full name.
@@ -79,7 +79,7 @@
comma, then drop the suffix.
"""
- name_partition = full_name.partition(',')
+ name_partition = full_name.partition(u',')
no_suffix = name_partition[0].strip()
suffix = name_partition[2].strip()
name_parts = no_suffix.split()
@@ -89,7 +89,7 @@
else:
assert part_count > 2
if name_parts[-2].islower():
- return ' '.join(name_parts[-2:]), suffix
+ return u' '.join(name_parts[-2:]), suffix
else:
return name_parts[-1], suffix
@@ -128,11 +128,11 @@
('Post-History', True), ('Replaces', False),
('Replaced-By', False))
# Valid values for the Type header.
- type_values = ("Standards Track", "Informational", "Process")
+ type_values = (u"Standards Track", u"Informational", u"Process")
# Valid values for the Status header.
# Active PEPs can only be for Informational or Process PEPs.
- status_values = ("Accepted", "Rejected", "Withdrawn", "Deferred", "Final",
- "Active", "Draft", "Replaced")
+ status_values = (u"Accepted", u"Rejected", u"Withdrawn", u"Deferred", u"Final",
+ u"Active", u"Draft", u"Replaced")
def __init__(self, pep_file):
"""Init object from an open PEP file object."""
@@ -141,7 +141,7 @@
metadata = pep_parser.parse(pep_file)
header_names = metadata.keys()
header_order = iter(self.headers)
- current_header = ''
+ current_header = u''
required = False
try:
for header_name in header_names:
@@ -183,7 +183,7 @@
raise ValueError("%r is not a valid Status value (PEP %s)" %
(status, self.number))
# Special case for Active PEPs.
- if (status == "Active" and
+ if (status == u"Active" and
self.type_ not in ("Process", "Informational")):
raise ValueError("Only Process and Informational PEPs may have an "
"Active status (PEP %s)" % self.number)
@@ -198,13 +198,13 @@
"""Return a list of author names and emails."""
# XXX Consider using email.utils.parseaddr (doesn't work with names
# lacking an email address.
- angled = r'(?P<author>.+?) <(?P<email>.+?)>'
- paren = r'(?P<email>.+?) \((?P<author>.+?)\)'
- simple = r'(?P<author>[^,]+)'
+ angled = ur'(?P<author>.+?) <(?P<email>.+?)>'
+ paren = ur'(?P<email>.+?) \((?P<author>.+?)\)'
+ simple = ur'(?P<author>[^,]+)'
author_list = []
for regex in (angled, paren, simple):
# Watch out for commas separating multiple names.
- regex += '(,\s*)?'
+ regex += u'(,\s*)?'
for match in re.finditer(regex, data):
# Watch out for suffixes like 'Jr.' when they are comma-separated
# from the name and thus cause issues when *all* names are only
@@ -214,7 +214,7 @@
if not author.partition(' ')[1] and author.endswith('.'):
prev_author = author_list.pop()
author = ', '.join([prev_author, author])
- if 'email' not in match_dict:
+ if u'email' not in match_dict:
email = ''
else:
email = match_dict['email']
@@ -235,14 +235,14 @@
def status_abbr(self):
"""Return how the status should be represented in the index."""
if self.status in ('Draft', 'Active'):
- return ' '
+ return u' '
else:
return self.status[0].upper()
@property
def author_abbr(self):
"""Return the author list as a comma-separated with only last names."""
- return ', '.join(x.last for x in self.authors)
+ return u', '.join(x.last for x in self.authors)
@property
def title_abbr(self):
@@ -250,9 +250,9 @@
if len(self.title) <= constants.title_length:
return self.title
wrapped_title = textwrap.wrap(self.title, constants.title_length - 4)
- return wrapped_title[0] + ' ...'
+ return wrapped_title[0] + u' ...'
- def __str__(self):
+ def __unicode__(self):
"""Return the line entry for the PEP."""
pep_info = {'type': self.type_abbr, 'number': str(self.number),
'title': self.title_abbr, 'status': self.status_abbr,
More information about the Python-checkins
mailing list