r56034 - sandbox/trunk/pep0/pep0/__init__.py sandbox/trunk/pep0/pep0/parse.py sandbox/trunk/pep0/pep0/pep.py
Author: brett.cannon Date: Tue Jun 19 06:22:32 2007 New Revision: 56034 Removed: sandbox/trunk/pep0/pep0/parse.py Modified: sandbox/trunk/pep0/pep0/__init__.py sandbox/trunk/pep0/pep0/pep.py Log: Move parsing of headers into the PEP class. Also shift the constructor over to taking an open file instead of a dictionary. That allows for the removal of pep0.parse. All of this led to the verification that headers were in the proper order and that no required headers were missing. Modified: sandbox/trunk/pep0/pep0/__init__.py ============================================================================== --- sandbox/trunk/pep0/pep0/__init__.py (original) +++ sandbox/trunk/pep0/pep0/__init__.py Tue Jun 19 06:22:32 2007 @@ -13,22 +13,38 @@ 3. Output the PEP (both by category and numerical index). """ -from __future__ import absolute_import +from __future__ import absolute_import, with_statement if __name__ == '__main__': - from pep0.parse import consume_directory, consume_pep from pep0.output import write_pep0 + from pep0.pep import PEP - from os.path import isdir + from operator import attrgetter + import os.path from sys import argv, stdout if not argv[1:]: path = '.' else: path = argv[1] - if isdir(path): - peps = consume_directory(path) + + peps = [] + if os.path.isdir(path): + for file_path in os.listdir(path): + abs_file_path = os.path.join(path, file_path) + if not os.path.isfile(abs_file_path): + continue + if (not file_path.startswith('pep-') or + not file_path.endswith('.txt')): + continue + with open(abs_file_path, 'r') as pep_file: + peps.append(PEP(pep_file)) + else: + peps.sort(key=attrgetter('number')) + elif os.path.isfile(path): + with open(path, 'r') as pep_file: + peps.append(PEP(pep_file)) else: - peps = [consume_pep(path)] + raise ValueError("argument must be a directory or file path") write_pep0(peps) Deleted: /sandbox/trunk/pep0/pep0/parse.py ============================================================================== --- /sandbox/trunk/pep0/pep0/parse.py Tue Jun 19 06:22:32 2007 +++ (empty file) @@ -1,86 +0,0 @@ -"""Parse the metadata from a PEP file. - -Parsing consists of several steps: - - * Detecting and reading all lines of text relating to metadata. - * Concatenating multi-line metadata for a single field into a single line. - * Validate PEP number (needed for future error reporting). - + Must be an integer. - + Must match file name. - -""" -from __future__ import with_statement -from .pep import PEP - -from operator import attrgetter -import os - -def consume_directory(directory): - """Pull out metadata for every PEP in the specified directory and return - them in a list sorted by PEP name. - - The PEP file name must start with 'pep-' and end with '.txt' to be - considered. - - """ - peps = [] - for file_name in os.listdir(directory): - if file_name.startswith('pep-') and file_name.endswith('.txt'): - peps.append(consume_pep(os.path.join(directory, file_name))) - peps.sort(key=attrgetter('number')) - return peps - -def consume_pep(path): - """Consume the specified file as a PEP to get its metadata.""" - metadata = {} - field = None - with open(path, 'rU') as pep_file: - try: - for line in pep_file: - if line == '\n': - # Found end of metadata. - break - elif line[0].isspace(): - assert field is not None - # Whitespace indent signifies multi-line field data. - field, data = split_metadata(line, field) - else: - field, data = split_metadata(line) - prev_data = metadata.get(field) - if prev_data: - data = metadata[field] + data - metadata[field] = data - except Exception: - raise - # Make sure PEP field was found ... - if not 'PEP' in metadata: - raise ValueError("PEP at file %s lacks a PEP number" % path) - # ... it matches the file name in some way ... - if metadata['PEP'] not in path: - raise ValueError("PEP number in file %s does not match number " - "specified in its file name" % path) - # ... and that the number is a valid integer. - try: - metadata['PEP'] = int(metadata['PEP']) - except ValueError: - raise ValueError("PEP number in file %s is not valid" % path) - return PEP(metadata) - -def split_metadata(line, continue_field=None): - """Parse the given line for PEP metadata, returning the field and data for - the line parsed. - - If continue_field is specified then return that as the field parsed. - - """ - if continue_field: - field = continue_field - data = line - else: - try: - field, data = line.split(':', 1) - except ValueError: - raise ValueError("could not find field in %r" % line) - field = field.strip() - data = data.strip() - return field, data Modified: sandbox/trunk/pep0/pep0/pep.py ============================================================================== --- sandbox/trunk/pep0/pep0/pep.py (original) +++ sandbox/trunk/pep0/pep0/pep.py Tue Jun 19 06:22:32 2007 @@ -28,7 +28,19 @@ A list of the authors' full names. """ + # The various RFC 822 headers that are supported. + # The second item in the nested tuples represents if the header is + # required or not. + headers = (('PEP', True), ('Title', True), ('Version', True), + ('Last-Modified', True), ('Author', True), + ('Discussions-To', False), ('Status', True), ('Type', True), + ('Content-Type', False), ('Requires', False), + ('Created', True), ('Python-Version', False), + ('Post-History', True), ('Replaces', False), + ('Replaced-By', False)) + # Valid values for the Type header. type_values = ("Standards Track", "Informational", "Process") + # Valid values for the Status header. # Active PEPs can only be for Informational or Process PEPs. status_values = ("Accepted", "Rejected", "Withdrawn", "Deferred", "Final", "Active", "Draft", "Replaced") @@ -36,48 +48,68 @@ # XXX Uncomment to valid author names (along with code in __init__). #valid_authors = set(x[0] for x in constants.email_addresses) - # XXX Take in an open file. - # XXX Parse header metadata (verify order and that all required fields - # exist). - def __init__(self, metadata_dict): - """Init object based on dict containing metadata from a file. - - Required keys from metadata_dict are: - - * PEP - Value must be an integer. - - * Title - A string. - - * Type - Value must match a value in self.type_values. - - * Status - Value must match a value in self.status_values. - - * Author - Value must have at least one author in the string as returned - by self.parse_author. - - """ - # Verify keys exist. - for required_key in ('PEP', 'Title', 'Type', 'Status', 'Author'): - if required_key not in metadata_dict: - raise KeyError("required key %r not in dict") - # 'PEP'. PEP parsing should have already converted the number to an - # integer, so just being safe here. - self.number = int(metadata_dict['PEP']) + + def __init__(self, pep_file): + """Init object from an open PEP file object.""" + # Parse the headers. + metadata = {} + header_name = None + header_field_iter = iter(self.headers) + try: + while True: + header_line = pep_file.readline() + if header_line == '\n': + break + elif header_line.startswith(' '): + existing_data = metadata[header_name] + metadata[header_name] = existing_data + header_line.strip() + continue + else: + header_name, data = header_line.split(':', 1) + header_name = header_name.strip() + data = data.strip() + expected_header, required = header_field_iter.next() + try: + while header_name != expected_header: + if required: + err_msg = ("The PEP at %s did not handle the " + "%s header before needing to " + "handle the %s header") + raise ValueError(err_msg % (pep_file.name, + header_name, + expected_header)) + else: + expected_header, required = header_field_iter.next() + metadata[header_name] = data + except StopIteration: + raise ValueError("the PEP at %s had the %s header unhandled " + "(something out of order?)" % + (pep_file.name, header_name)) + else: + raise ValueError('no body to the PEP at %s' % pep_file.name) + except StopIteration: + raise ValueError("no body to the PEP at %s" % pep_file.name) + else: + try: + required = False + while not required: + expected_header, required = header_field_iter.next() + raise ValueError("the PEP at %s is missing the %s header" % + (pep_file.name, expected_header)) + except StopIteration: + pass + # 'PEP'. + self.number = int(metadata['PEP']) # 'Title'. - self.title = metadata_dict['Title'] + self.title = metadata['Title'] # 'Type'. - type_ = metadata_dict['Type'] + type_ = metadata['Type'] if type_ not in self.type_values: raise ValueError('%r is not a valid Type value (PEP %s)' % (type_, self.number)) self.type_ = type_ # 'Status'. - status = metadata_dict['Status'] + status = metadata['Status'] if status not in self.status_values: raise ValueError("%r is not a valid Status value (PEP %s)" % (status, self.number)) @@ -88,7 +120,7 @@ "Active status (PEP %s)" % self.number) self.status = status # 'Author'. - authors = self._parse_author(metadata_dict['Author']) + authors = self._parse_author(metadata['Author']) # XXX Uncomment (plus valid_authors) to validate author names. #for author in authors: # if author not in self.valid_authors: @@ -97,6 +129,7 @@ raise ValueError("no authors found (PEP %s)" % self.number) self.authors = authors + def _parse_author(self, data): """Return a list of author names.""" # XXX Consider using email.utils.parseaddr (doesn't work with names
participants (1)
-
brett.cannon