Parse each line by character location
bearophileHUGS at lycos.com
bearophileHUGS at lycos.com
Tue Nov 4 15:35:41 EST 2008
George Sakkis:
> Here's a general solution for fixed size records:
> >>> def slicer(*sizes):
>
> ... slices = len(sizes) * [None]
> ... start = 0
> ... for i,size in enumerate(sizes):
> ... stop = start+size
> ... slices[i] = slice(start,stop)
> ... start = stop
> ... return lambda string: [string[s].strip() for s in slices]
> ...>>> order_slicer = slicer(10,1,10,4)
> >>> order_slicer('______2345H0000300000_NC_'.replace('_',' '))
> ['2345', 'H', '0000300000', 'NC']
Nice. Here's a little modified version:
from collections import namedtuple
def slicer(names, sizes):
"""
>>> sl = slicer(["code", "p1", "progressive", "label"], (10, 1,
10, 4))
>>> txt = "______2345H0000300000_NC_"
>>> print sl(txt.replace('_', ' '))
Sliced(code='2345', p1='H', progressive='0000300000', label='NC')
"""
# several input controls can be added here
slices = []
start = 0
for size in sizes:
stop = start + size
slices.append(slice(start, stop))
start = stop
Sliced = namedtuple("Sliced", names)
return lambda txt: Sliced(*(txt[s].strip() for s in slices))
if __name__ == "__main__":
import doctest
doctest.testmod()
print "Doctests done.\n"
Bye,
bearophile
More information about the Python-list
mailing list