joining strings question

patrick.waldo at gmail.com patrick.waldo at gmail.com
Sat Mar 1 15:14:30 CET 2008


>    def category_iterator(source):
>      source = iter(source)
>      try:
>        while True:
>          item = source.next()

This gave me a lot of inspiration.  After a couple of days of banging
my head against the wall, I finally figured out a code that could
attach headers, titles, numbers, and categories in their appropriate
combinations--basically one BIG logic puzzle.

It's not the prettiest thing in the world, but it works.  If anyone
has a better way to do it, then I'll be all ears.  Anyways, thank you
all for your input, it helped me think outside the box.

import re

data = ['RULES', 'Approval and Promulgation of Air Quality
Implementation Plans:', 'Illinois; Revisions to Emission Reduction
Market System, ', '11042 [E8-3800]', 'E8-3800.pdf', 'Ohio; Oxides of
Nitrogen Budget Trading Program; Correction, ', '11192 [Z8-2506]',
'Z8-2506.pdf', 'NOTICES', 'Agency Information Collection Activities;
Proposals, Submissions, and Approvals, ', '11108-11110 [E8-3934]',
'E8-3934.pdf', 'Data Availability for Lead National Ambient Air
Quality Standard Review, ', '11110-11111 [E8-3935]', 'E8-3935.pdf',
'Environmental Impacts Statements; Notice of  Availability, ', '11112
[E8-3917]', 'E8-3917.pdf']

NOTICES = re.compile(r'NOTICES')
RULES = re.compile(r'RULES')
TITLE = re.compile(r'[A-Z][a-z].*')
NUM = re.compile(r'\d.*')
PDF = re.compile(r'.*\.pdf')

counted = []
sorted = []
title = []
tot = len(data)
x=0
while x < tot:
    try:
        item = data[x]
        title = []
        if NOTICES.match(item) or RULES.match(item):
            module = item
            header = ''
            if TITLE.match(data[x+1]) and TITLE.match(data[x+2]) and
NUM.match(data[x+3]):
                #Header
                header = data[x+1]
                counted.append(data[x+1])
                sorted.append(data[x+1])
                #Title
                counted.append(data[x+2])
                sorted.append(data[x+2])
                #Number
                counted.append(data[x+3])
                sorted.append(data[x+3])
                title.append(''.join(sorted))
                print title, module
                print
                sorted = []
                x+=1
            elif TITLE.match(data[x+1]) and NUM.match(data[x+2]):
                #Title
                counted.append(data[x+1])
                sorted.append(data[x+1])
                #Number
                counted.append(data[x+2])
                sorted.append(data[x+2])
                title.append(''.join(sorted))
                print title, module
                print
                sorted = []
                x+=1
            else:
                print item, "strange1"
                break
                x+=1
        else:
            if item in counted:
                x+=1
            elif PDF.match(item):
                x+=1
            elif TITLE.match(data[x]) and TITLE.match(data[x+1]) and
NUM.match(data[x+2]):
                #Header
                header = data[x]
                counted.append(data[x])
                sorted.append(data[x])
                #Title
                counted.append(data[x+1])
                sorted.append(data[x+1])
                #Number
                counted.append(data[x+2])
                sorted.append(data[x+2])
                title.append(''.join(sorted))
                sorted = []
                print title, module
                print
                x+=1
            elif TITLE.match(data[x]) and NUM.match(data[x+1]):
                #Title
                sorted.append(header)
                counted.append(data[x])
                sorted.append(data[x])
                #Number
                counted.append(data[x+1])
                sorted.append(data[x+1])
                title.append(''.join(sorted))
                sorted = []
                print title, module
                print
                x+=1
            else:
                print item, "strange2"
                x+=1
                break
    except IndexError:
        break



More information about the Python-list mailing list