[Python-checkins] r54847 - in sandbox/trunk/pep0: TODO pep0/parse.py pep0/pep.py test_pep0.py
brett.cannon
python-checkins at python.org
Tue Apr 17 05:51:16 CEST 2007
Author: brett.cannon
Date: Tue Apr 17 05:51:06 2007
New Revision: 54847
Modified:
sandbox/trunk/pep0/TODO
sandbox/trunk/pep0/pep0/parse.py
sandbox/trunk/pep0/pep0/pep.py
sandbox/trunk/pep0/test_pep0.py
Log:
Rework pep0.parse (including some renaming).
Also write tests for split_metadata. This makes test_pep0 only run tests that
actually work.
Modified: sandbox/trunk/pep0/TODO
==============================================================================
--- sandbox/trunk/pep0/TODO (original)
+++ sandbox/trunk/pep0/TODO Tue Apr 17 05:51:06 2007
@@ -1,6 +1,9 @@
+* Update test_pep0:
+ + Group tests for pep0.parse together.
+ + Update tests to match current functionality.
+
* Handle XXX comments:
+ __init__
- + parse
+ pep
+ output
+ Update test_pep0 to reflect refactoring.
Modified: sandbox/trunk/pep0/pep0/parse.py
==============================================================================
--- sandbox/trunk/pep0/pep0/parse.py (original)
+++ sandbox/trunk/pep0/pep0/parse.py Tue Apr 17 05:51:06 2007
@@ -8,22 +8,20 @@
+ Must be an integer.
+ Must match file name.
-XXX
- * Remove metadata validation and move to pep class.
- + Still validate PEP number is an integer (it's needed for error
- reporting from this point forward).
- + Validate PEP number matches number in file name.
-
"""
from __future__ import with_statement
from .pep import PEP
import os
-import re
-def consume_headers(directory='.'):
+def consume_directory(directory):
"""Pull out metadata for every PEP in the specified directory and return
- them in a list sorted by PEP name."""
+ them in a list sorted by PEP name.
+
+ The PEP file name must start with 'pep-' and end with '.txt' to be
+ considered.
+
+ """
peps = []
for file_name in os.listdir(directory):
if file_name.startswith('pep-') and file_name.endswith('.txt'):
@@ -33,96 +31,54 @@
def consume_pep(path):
"""Consume the specified file as a PEP to get its metadata."""
- pep_info = {}
+ metadata = {}
with open(path, 'rU') as pep_file:
try:
for line in pep_file:
if line == '\n':
+ # Found end of metadata.
break
- elif line[1].isspace():
- type_ = parse_metadata(pep_info, line, type_)
+ elif line[0].isspace():
+ # Whitespace indent signifies multi-line field data.
+ field, data = split_metadata(line, field)
else:
- type_ = parse_metadata(pep_info, line)
+ field, data = split_metadata(line)
+ prev_data = metadata.get(field)
+ if prev_data:
+ data = metadata[field] + data
+ metadata[field] = data
except Exception:
print "*** In", pep_file
raise
- if not 'PEP' in pep_info:
+ # Make sure PEP field was found ...
+ if not 'PEP' in metadata:
raise ValueError("PEP at file %s lacks a PEP number" % path)
- if not 'Author' in pep_info:
- raise ValueError("PEP %s is missing the Author field" %
- pep_info['PEP'])
- if len(pep_info['Author']) < 1:
- raise ValueError("PEP %s is lacking authors" % pep_info['PEP'])
- if pep_info['Type'] not in PEP.type_values:
- raise ValueError("%s is an invalid Type value for PEP %s" %
- (pep_info['Type'], pep_info['PEP']))
- if pep_info['Status'] not in PEP.status_values:
- raise ValueError("%s is an invalid Status value for PEP %s" %
- (pep_info['Status'], pep_info['PEP']))
- return pep_info
-
-def parse_metadata(pep_info, line, previous_type=None):
- """Parse the given line for PEP metadata, adding on to existing metadata if
- previous_type is specified, returning the last type of metadata handled."""
- if previous_type:
- type_ = previous_type
+ # ... it matches the file name in some way ...
+ if metadata['PEP'] not in path:
+ raise ValueError("PEP number in file %s does not match number "
+ "specified in its file name" % path)
+ # ... and that the number is a valid integer.
+ try:
+ metadata['PEP'] = int(metadata['PEP'])
+ except ValueError:
+ raise ValueError("PEP number in file %s is not valid" % path)
+ return metadata
+
+def split_metadata(line, continue_field=None):
+ """Parse the given line for PEP metadata, returning the field and data for
+ the line parsed.
+
+ If continue_field is specified then return that as the field parsed.
+
+ """
+ if continue_field:
+ field = continue_field
data = line
else:
- type_, data = line.split(':', 1)
- type_ = type_.strip()
+ try:
+ field, data = line.split(':', 1)
+ except ValueError:
+ raise ValueError("could not find field in %r" % line)
+ field = field.strip()
data = data.strip()
- handler = handlers.get(type_, handle_generic)
- result = handler(data)
- if previous_type:
- previous_data = pep_info[type_]
- if not isinstance(previous_data, list):
- previous_data = [previous_data]
- pep_info[type_] = previous_data
- previous_data.extend(result)
- else:
- pep_info[type_] = result
- return type_
-
-def handle_generic(data):
- """Default handler for PEP metadata."""
- return data
-
-def handle_pep_num(data):
- """Return the integer for the PEP number."""
- return int(data)
-
-def handle_author(data):
- """Return a list of author names."""
- angled = r'(?P<author>.+?) <.+?>'
- paren = r'.+? \((?P<author>.+?)\)'
- simple = r'(?P<author>[^,]+)'
- author_list = []
- for regex in (angled, paren, simple):
- # Watch out for commas separating multiple names.
- regex += '(,\s+)?'
- for match in re.finditer(regex, data):
- author = match.group('author')
- # Watch out for suffixes like 'Jr.' when they are comma-separated
- # from the name and thus cause issues when *all* names are only
- # separated by commas.
- author = match.group('author')
- if not author.partition(' ')[1] and author.endswith('.'):
- prev_author = author_list.pop()
- author = ', '.join([prev_author, author])
- author_list.append(author)
- else:
- # If authors were found then stop searching as only expect one
- # style of author citation.
- if author_list:
- break
- return author_list
-
-def handle_csv(data):
- """Handle the Post-History."""
- return [value.strip() for value in data.split(',') if value]
-
-handlers = {'Author': handle_author,
- 'PEP': handle_pep_num,
- 'Post-History': handle_csv,
- }
-
+ return field, data
Modified: sandbox/trunk/pep0/pep0/pep.py
==============================================================================
--- sandbox/trunk/pep0/pep0/pep.py (original)
+++ sandbox/trunk/pep0/pep0/pep.py Tue Apr 17 05:51:06 2007
@@ -80,4 +80,30 @@
finished.append(pep)
return meta, info, accepted, open_, finished, empty, dead
+def handle_author(data):
+ """Return a list of author names."""
+ angled = r'(?P<author>.+?) <.+?>'
+ paren = r'.+? \((?P<author>.+?)\)'
+ simple = r'(?P<author>[^,]+)'
+ author_list = []
+ for regex in (angled, paren, simple):
+ # Watch out for commas separating multiple names.
+ regex += '(,\s+)?'
+ for match in re.finditer(regex, data):
+ author = match.group('author')
+ # Watch out for suffixes like 'Jr.' when they are comma-separated
+ # from the name and thus cause issues when *all* names are only
+ # separated by commas.
+ author = match.group('author')
+ if not author.partition(' ')[1] and author.endswith('.'):
+ prev_author = author_list.pop()
+ author = ', '.join([prev_author, author])
+ author_list.append(author)
+ else:
+ # If authors were found then stop searching as only expect one
+ # style of author citation.
+ if author_list:
+ break
+ return author_list
+
Modified: sandbox/trunk/pep0/test_pep0.py
==============================================================================
--- sandbox/trunk/pep0/test_pep0.py (original)
+++ sandbox/trunk/pep0/test_pep0.py Tue Apr 17 05:51:06 2007
@@ -1,11 +1,38 @@
from __future__ import with_statement
-import pep0
+import pep0.parse
import unittest
from test import test_support
from contextlib import contextmanager
import os
+class ParseTests(unittest.TestCase):
+
+ """Test pep0.parse ."""
+
+ def test_split_metadata_single_line(self):
+ # Test basic use case.
+ gave_field = "field"
+ gave_data = "data"
+ # Spaces in separator help test whitespace stripping.
+ for sep in (':', ' :', ': '):
+ line = sep.join([gave_field, gave_data])
+ got_field, got_data = pep0.parse.split_metadata(line)
+ self.failUnlessEqual(gave_field, got_field)
+ self.failUnlessEqual(gave_data, got_data)
+ bad_line = 'abc'
+ self.failUnlessRaises(ValueError, pep0.parse.split_metadata, bad_line)
+
+ def test_split_metadata_continuing_line(self):
+ # Make sure that if the line is considered a continuation of another
+ # one that the same field is returned.
+ gave_field ='a'
+ gave_data = 'b:c'
+ got_field, got_data = pep0.parse.split_metadata(gave_data, gave_field)
+ self.failUnlessEqual(got_field, gave_field)
+ self.failUnlessEqual(got_data, gave_data)
+
+
class HandlerTests(unittest.TestCase):
"""Test the PEP field handlers for parsing data."""
@@ -263,11 +290,7 @@
def test_main():
test_support.run_unittest(
- HandlerTests,
- ParseMetaDataTests,
- PEPClassTests,
- ConsumePepTests,
- EntryOutputTests,
+ ParseTests,
)
More information about the Python-checkins
mailing list