[Python-checkins] r54847 - in sandbox/trunk/pep0: TODO pep0/parse.py pep0/pep.py test_pep0.py

Tue Apr 17 05:51:16 CEST 2007

Author: brett.cannon
Date: Tue Apr 17 05:51:06 2007
New Revision: 54847

Modified:
   sandbox/trunk/pep0/TODO
   sandbox/trunk/pep0/pep0/parse.py
   sandbox/trunk/pep0/pep0/pep.py
   sandbox/trunk/pep0/test_pep0.py
Log:
Rework pep0.parse (including some renaming).

Also write tests for split_metadata.  This makes test_pep0 only run tests that
actually work.


Modified: sandbox/trunk/pep0/TODO
==============================================================================

--- sandbox/trunk/pep0/TODO	(original)
+++ sandbox/trunk/pep0/TODO	Tue Apr 17 05:51:06 2007
@@ -1,6 +1,9 @@
+* Update test_pep0:
+    + Group tests for pep0.parse together.
+    + Update tests to match current functionality.
+
 * Handle XXX comments:
     + __init__
-    + parse
     + pep
     + output
     + Update test_pep0 to reflect refactoring.

Modified: sandbox/trunk/pep0/pep0/parse.py
==============================================================================
--- sandbox/trunk/pep0/pep0/parse.py	(original)
+++ sandbox/trunk/pep0/pep0/parse.py	Tue Apr 17 05:51:06 2007
@@ -8,22 +8,20 @@
         + Must be an integer.
         + Must match file name.
 
-XXX
-    * Remove metadata validation and move to pep class.
-        + Still validate PEP number is an integer (it's needed for error
-          reporting from this point forward).
-        + Validate PEP number matches number in file name.
-
 """
 from __future__ import with_statement
 from .pep import PEP
 
 import os
-import re
 
-def consume_headers(directory='.'):
+def consume_directory(directory):
     """Pull out metadata for every PEP in the specified directory and return
-    them in a list sorted by PEP name."""
+    them in a list sorted by PEP name.
+
+    The PEP file name must start with 'pep-' and end with '.txt' to be
+    considered.
+
+    """
     peps = []
     for file_name in os.listdir(directory):
         if file_name.startswith('pep-') and file_name.endswith('.txt'):
@@ -33,96 +31,54 @@
 
 def consume_pep(path):
     """Consume the specified file as a PEP to get its metadata."""
-    pep_info = {}
+    metadata = {}
     with open(path, 'rU') as pep_file:
         try:
             for line in pep_file:
                 if line == '\n':
+                    # Found end of metadata.
                     break
-                elif line[1].isspace():
-                    type_ = parse_metadata(pep_info, line, type_)
+                elif line[0].isspace():
+                    # Whitespace indent signifies multi-line field data.
+                    field, data = split_metadata(line, field)
                 else:
-                    type_ = parse_metadata(pep_info, line)
+                    field, data = split_metadata(line)
+                prev_data = metadata.get(field)
+                if prev_data:
+                    data = metadata[field] + data
+                metadata[field] = data
         except Exception:
             print "*** In", pep_file
             raise
-    if not 'PEP' in pep_info:
+    # Make sure PEP field was found ...
+    if not 'PEP' in metadata:
         raise ValueError("PEP at file %s lacks a PEP number" % path)
-    if not 'Author' in pep_info:
-        raise ValueError("PEP %s is missing the Author field" %
-                         pep_info['PEP'])
-    if len(pep_info['Author']) < 1:
-        raise ValueError("PEP %s is lacking authors" % pep_info['PEP'])
-    if pep_info['Type'] not in PEP.type_values:
-        raise ValueError("%s is an invalid Type value for PEP %s" %
-                         (pep_info['Type'], pep_info['PEP']))
-    if pep_info['Status'] not in PEP.status_values:
-        raise ValueError("%s is an invalid Status value for PEP %s" %
-                         (pep_info['Status'], pep_info['PEP']))
-    return pep_info
-
-def parse_metadata(pep_info, line, previous_type=None):
-    """Parse the given line for PEP metadata, adding on to existing metadata if
-    previous_type is specified, returning the last type of metadata handled."""
-    if previous_type:
-        type_ = previous_type
+    # ... it matches the file name in some way ...
+    if metadata['PEP'] not in path:
+        raise ValueError("PEP number in file %s does not match number "
+                         "specified in its file name" % path)
+    # ... and that the number is a valid integer.
+    try:
+        metadata['PEP'] = int(metadata['PEP'])
+    except ValueError:
+        raise ValueError("PEP number in file %s is not valid" % path)
+    return metadata
+
+def split_metadata(line, continue_field=None):
+    """Parse the given line for PEP metadata, returning the field and data for
+    the line parsed.
+
+    If continue_field is specified then return that as the field parsed.
+
+    """
+    if continue_field:
+        field = continue_field
         data = line
     else:
-        type_, data = line.split(':', 1)
-    type_ = type_.strip()
+        try:
+            field, data = line.split(':', 1)
+        except ValueError:
+            raise ValueError("could not find field in %r" % line)
+    field = field.strip()
     data = data.strip()
-    handler = handlers.get(type_, handle_generic)
-    result = handler(data)
-    if previous_type:
-        previous_data = pep_info[type_]
-        if not isinstance(previous_data, list):
-            previous_data = [previous_data]
-            pep_info[type_] = previous_data
-        previous_data.extend(result)
-    else:
-        pep_info[type_] = result
-    return type_
-
-def handle_generic(data):
-    """Default handler for PEP metadata."""
-    return data
-
-def handle_pep_num(data):
-    """Return the integer for the PEP number."""
-    return int(data)
-
-def handle_author(data):
-    """Return a list of author names."""
-    angled = r'(?P<author>.+?) <.+?>'
-    paren = r'.+? \((?P<author>.+?)\)'
-    simple = r'(?P<author>[^,]+)'
-    author_list = []
-    for regex in (angled, paren, simple):
-        # Watch out for commas separating multiple names.
-        regex += '(,\s+)?'
-        for match in re.finditer(regex, data):
-            author = match.group('author')
-            # Watch out for suffixes like 'Jr.' when they are comma-separated
-            # from the name and thus cause issues when *all* names are only
-            # separated by commas.
-            author = match.group('author')
-            if not author.partition(' ')[1] and author.endswith('.'):
-                prev_author = author_list.pop()
-                author = ', '.join([prev_author, author])
-            author_list.append(author)
-        else:
-            # If authors were found then stop searching as only expect one
-            # style of author citation.
-            if author_list:
-                break
-    return author_list
-
-def handle_csv(data):
-    """Handle the Post-History."""
-    return [value.strip() for value in data.split(',') if value]
-
-handlers = {'Author': handle_author,
-            'PEP': handle_pep_num,
-            'Post-History': handle_csv,
-           }
-
+    return field, data

Modified: sandbox/trunk/pep0/pep0/pep.py
==============================================================================
--- sandbox/trunk/pep0/pep0/pep.py	(original)
+++ sandbox/trunk/pep0/pep0/pep.py	Tue Apr 17 05:51:06 2007
@@ -80,4 +80,30 @@
             finished.append(pep)
     return meta, info, accepted, open_, finished, empty, dead
 
+def handle_author(data):
+    """Return a list of author names."""
+    angled = r'(?P<author>.+?) <.+?>'
+    paren = r'.+? \((?P<author>.+?)\)'
+    simple = r'(?P<author>[^,]+)'
+    author_list = []
+    for regex in (angled, paren, simple):
+        # Watch out for commas separating multiple names.
+        regex += '(,\s+)?'
+        for match in re.finditer(regex, data):
+            author = match.group('author')
+            # Watch out for suffixes like 'Jr.' when they are comma-separated
+            # from the name and thus cause issues when *all* names are only
+            # separated by commas.
+            author = match.group('author')
+            if not author.partition(' ')[1] and author.endswith('.'):
+                prev_author = author_list.pop()
+                author = ', '.join([prev_author, author])
+            author_list.append(author)
+        else:
+            # If authors were found then stop searching as only expect one
+            # style of author citation.
+            if author_list:
+                break
+    return author_list
+
 

Modified: sandbox/trunk/pep0/test_pep0.py
==============================================================================
--- sandbox/trunk/pep0/test_pep0.py	(original)
+++ sandbox/trunk/pep0/test_pep0.py	Tue Apr 17 05:51:06 2007
@@ -1,11 +1,38 @@
 from __future__ import with_statement
-import pep0
+import pep0.parse
 
 import unittest
 from test import test_support
 from contextlib import contextmanager
 import os
 
+class ParseTests(unittest.TestCase):
+
+    """Test pep0.parse ."""
+
+    def test_split_metadata_single_line(self):
+        # Test basic use case.
+        gave_field = "field"
+        gave_data = "data"
+        # Spaces in separator help test whitespace stripping.
+        for sep in (':', ' :', ': '):
+            line = sep.join([gave_field, gave_data])
+            got_field, got_data = pep0.parse.split_metadata(line)
+            self.failUnlessEqual(gave_field, got_field)
+            self.failUnlessEqual(gave_data, got_data)
+        bad_line = 'abc'
+        self.failUnlessRaises(ValueError, pep0.parse.split_metadata, bad_line)
+
+    def test_split_metadata_continuing_line(self):
+        # Make sure that if the line is considered a continuation of another
+        # one that the same field is returned.
+        gave_field ='a'
+        gave_data = 'b:c'
+        got_field, got_data = pep0.parse.split_metadata(gave_data, gave_field)
+        self.failUnlessEqual(got_field, gave_field)
+        self.failUnlessEqual(got_data, gave_data)
+
+
 class HandlerTests(unittest.TestCase):
 
     """Test the PEP field handlers for parsing data."""
@@ -263,11 +290,7 @@
 
 def test_main():
     test_support.run_unittest(
-                HandlerTests,
-                ParseMetaDataTests,
-                PEPClassTests,
-                ConsumePepTests,
-                EntryOutputTests,
+                ParseTests,
             )