Author: brett.cannon
Date: Tue Jun 19 06:22:32 2007
New Revision: 56034
Removed:
sandbox/trunk/pep0/pep0/parse.py
Modified:
sandbox/trunk/pep0/pep0/__init__.py
sandbox/trunk/pep0/pep0/pep.py
Log:
Move parsing of headers into the PEP class. Also shift the constructor over to
taking an open file instead of a dictionary. That allows for the removal of
pep0.parse.
All of this led to the verification that headers were in the proper order and
that no required headers were missing.
Modified: sandbox/trunk/pep0/pep0/__init__.py
==============================================================================
--- sandbox/trunk/pep0/pep0/__init__.py (original)
+++ sandbox/trunk/pep0/pep0/__init__.py Tue Jun 19 06:22:32 2007
@@ -13,22 +13,38 @@
3. Output the PEP (both by category and numerical index).
"""
-from __future__ import absolute_import
+from __future__ import absolute_import, with_statement
if __name__ == '__main__':
- from pep0.parse import consume_directory, consume_pep
from pep0.output import write_pep0
+ from pep0.pep import PEP
- from os.path import isdir
+ from operator import attrgetter
+ import os.path
from sys import argv, stdout
if not argv[1:]:
path = '.'
else:
path = argv[1]
- if isdir(path):
- peps = consume_directory(path)
+
+ peps = []
+ if os.path.isdir(path):
+ for file_path in os.listdir(path):
+ abs_file_path = os.path.join(path, file_path)
+ if not os.path.isfile(abs_file_path):
+ continue
+ if (not file_path.startswith('pep-') or
+ not file_path.endswith('.txt')):
+ continue
+ with open(abs_file_path, 'r') as pep_file:
+ peps.append(PEP(pep_file))
+ else:
+ peps.sort(key=attrgetter('number'))
+ elif os.path.isfile(path):
+ with open(path, 'r') as pep_file:
+ peps.append(PEP(pep_file))
else:
- peps = [consume_pep(path)]
+ raise ValueError("argument must be a directory or file path")
write_pep0(peps)
Deleted: /sandbox/trunk/pep0/pep0/parse.py
==============================================================================
--- /sandbox/trunk/pep0/pep0/parse.py Tue Jun 19 06:22:32 2007
+++ (empty file)
@@ -1,86 +0,0 @@
-"""Parse the metadata from a PEP file.
-
-Parsing consists of several steps:
-
- * Detecting and reading all lines of text relating to metadata.
- * Concatenating multi-line metadata for a single field into a single line.
- * Validate PEP number (needed for future error reporting).
- + Must be an integer.
- + Must match file name.
-
-"""
-from __future__ import with_statement
-from .pep import PEP
-
-from operator import attrgetter
-import os
-
-def consume_directory(directory):
- """Pull out metadata for every PEP in the specified directory and return
- them in a list sorted by PEP name.
-
- The PEP file name must start with 'pep-' and end with '.txt' to be
- considered.
-
- """
- peps = []
- for file_name in os.listdir(directory):
- if file_name.startswith('pep-') and file_name.endswith('.txt'):
- peps.append(consume_pep(os.path.join(directory, file_name)))
- peps.sort(key=attrgetter('number'))
- return peps
-
-def consume_pep(path):
- """Consume the specified file as a PEP to get its metadata."""
- metadata = {}
- field = None
- with open(path, 'rU') as pep_file:
- try:
- for line in pep_file:
- if line == '\n':
- # Found end of metadata.
- break
- elif line[0].isspace():
- assert field is not None
- # Whitespace indent signifies multi-line field data.
- field, data = split_metadata(line, field)
- else:
- field, data = split_metadata(line)
- prev_data = metadata.get(field)
- if prev_data:
- data = metadata[field] + data
- metadata[field] = data
- except Exception:
- raise
- # Make sure PEP field was found ...
- if not 'PEP' in metadata:
- raise ValueError("PEP at file %s lacks a PEP number" % path)
- # ... it matches the file name in some way ...
- if metadata['PEP'] not in path:
- raise ValueError("PEP number in file %s does not match number "
- "specified in its file name" % path)
- # ... and that the number is a valid integer.
- try:
- metadata['PEP'] = int(metadata['PEP'])
- except ValueError:
- raise ValueError("PEP number in file %s is not valid" % path)
- return PEP(metadata)
-
-def split_metadata(line, continue_field=None):
- """Parse the given line for PEP metadata, returning the field and data for
- the line parsed.
-
- If continue_field is specified then return that as the field parsed.
-
- """
- if continue_field:
- field = continue_field
- data = line
- else:
- try:
- field, data = line.split(':', 1)
- except ValueError:
- raise ValueError("could not find field in %r" % line)
- field = field.strip()
- data = data.strip()
- return field, data
Modified: sandbox/trunk/pep0/pep0/pep.py
==============================================================================
--- sandbox/trunk/pep0/pep0/pep.py (original)
+++ sandbox/trunk/pep0/pep0/pep.py Tue Jun 19 06:22:32 2007
@@ -28,7 +28,19 @@
A list of the authors' full names.
"""
+ # The various RFC 822 headers that are supported.
+ # The second item in the nested tuples represents if the header is
+ # required or not.
+ headers = (('PEP', True), ('Title', True), ('Version', True),
+ ('Last-Modified', True), ('Author', True),
+ ('Discussions-To', False), ('Status', True), ('Type', True),
+ ('Content-Type', False), ('Requires', False),
+ ('Created', True), ('Python-Version', False),
+ ('Post-History', True), ('Replaces', False),
+ ('Replaced-By', False))
+ # Valid values for the Type header.
type_values = ("Standards Track", "Informational", "Process")
+ # Valid values for the Status header.
# Active PEPs can only be for Informational or Process PEPs.
status_values = ("Accepted", "Rejected", "Withdrawn", "Deferred", "Final",
"Active", "Draft", "Replaced")
@@ -36,48 +48,68 @@
# XXX Uncomment to valid author names (along with code in __init__).
#valid_authors = set(x[0] for x in constants.email_addresses)
- # XXX Take in an open file.
- # XXX Parse header metadata (verify order and that all required fields
- # exist).
- def __init__(self, metadata_dict):
- """Init object based on dict containing metadata from a file.
-
- Required keys from metadata_dict are:
-
- * PEP
- Value must be an integer.
-
- * Title
- A string.
-
- * Type
- Value must match a value in self.type_values.
-
- * Status
- Value must match a value in self.status_values.
-
- * Author
- Value must have at least one author in the string as returned
- by self.parse_author.
-
- """
- # Verify keys exist.
- for required_key in ('PEP', 'Title', 'Type', 'Status', 'Author'):
- if required_key not in metadata_dict:
- raise KeyError("required key %r not in dict")
- # 'PEP'. PEP parsing should have already converted the number to an
- # integer, so just being safe here.
- self.number = int(metadata_dict['PEP'])
+
+ def __init__(self, pep_file):
+ """Init object from an open PEP file object."""
+ # Parse the headers.
+ metadata = {}
+ header_name = None
+ header_field_iter = iter(self.headers)
+ try:
+ while True:
+ header_line = pep_file.readline()
+ if header_line == '\n':
+ break
+ elif header_line.startswith(' '):
+ existing_data = metadata[header_name]
+ metadata[header_name] = existing_data + header_line.strip()
+ continue
+ else:
+ header_name, data = header_line.split(':', 1)
+ header_name = header_name.strip()
+ data = data.strip()
+ expected_header, required = header_field_iter.next()
+ try:
+ while header_name != expected_header:
+ if required:
+ err_msg = ("The PEP at %s did not handle the "
+ "%s header before needing to "
+ "handle the %s header")
+ raise ValueError(err_msg % (pep_file.name,
+ header_name,
+ expected_header))
+ else:
+ expected_header, required = header_field_iter.next()
+ metadata[header_name] = data
+ except StopIteration:
+ raise ValueError("the PEP at %s had the %s header unhandled "
+ "(something out of order?)" %
+ (pep_file.name, header_name))
+ else:
+ raise ValueError('no body to the PEP at %s' % pep_file.name)
+ except StopIteration:
+ raise ValueError("no body to the PEP at %s" % pep_file.name)
+ else:
+ try:
+ required = False
+ while not required:
+ expected_header, required = header_field_iter.next()
+ raise ValueError("the PEP at %s is missing the %s header" %
+ (pep_file.name, expected_header))
+ except StopIteration:
+ pass
+ # 'PEP'.
+ self.number = int(metadata['PEP'])
# 'Title'.
- self.title = metadata_dict['Title']
+ self.title = metadata['Title']
# 'Type'.
- type_ = metadata_dict['Type']
+ type_ = metadata['Type']
if type_ not in self.type_values:
raise ValueError('%r is not a valid Type value (PEP %s)' %
(type_, self.number))
self.type_ = type_
# 'Status'.
- status = metadata_dict['Status']
+ status = metadata['Status']
if status not in self.status_values:
raise ValueError("%r is not a valid Status value (PEP %s)" %
(status, self.number))
@@ -88,7 +120,7 @@
"Active status (PEP %s)" % self.number)
self.status = status
# 'Author'.
- authors = self._parse_author(metadata_dict['Author'])
+ authors = self._parse_author(metadata['Author'])
# XXX Uncomment (plus valid_authors) to validate author names.
#for author in authors:
# if author not in self.valid_authors:
@@ -97,6 +129,7 @@
raise ValueError("no authors found (PEP %s)" % self.number)
self.authors = authors
+
def _parse_author(self, data):
"""Return a list of author names."""
# XXX Consider using email.utils.parseaddr (doesn't work with names