python grep
Peter Otten
__peter__ at web.de
Fri Apr 9 14:55:13 EDT 2010
Mag Gam wrote:
> I am in the process of reading a zipped file which is about 6gb.
>
> I would like to know if there is a command similar to grep in python
> because I would like to emulate, -A -B option of GNU grep.
>
> Lets say I have this,
>
> 083828.441,AA
> 093828.441,AA
> 094028.441,AA
> 094058.441,CC
> 094828.441,AA
> 103828.441,AA
> 123828.441,AA
>
>
> if I do grep -A2 -B2 "CC"
>
> I get 2 lines before and 2 lines after "C"
>
> Is there an easy way to do this in python?
from itertools import islice, groupby
from collections import deque
def grep(instream, ismatch, before, after):
items_before = None
for key, group in groupby(instream, ismatch):
if key:
if items_before is not None:
for item in items_before:
yield "before", item
else:
items_before = not None # ;)
for item in group:
yield "match", item
else:
if items_before is not None:
for item in islice(group, after):
yield "after", item
items_before = deque(group, maxlen=before)
def demo1():
with open(__file__) as instream:
for state, (index, line) in grep(enumerate(instream, 1),
ismatch=lambda (i, s): "item" in s,
before=2, after=2):
print "%3d %-6s %s" % (index, state + ":", line),
def demo2():
from StringIO import StringIO
import csv
lines = StringIO("""\
083828.441,AA
093828.441,AA
094028.441,AA
094058.441,CC
094828.441,AA
103828.441,AA
123828.441,AA
""")
rows = csv.reader(lines)
for state, row in grep(rows, lambda r: r[-1] == "CC", 1, 2):
print row
if __name__ == "__main__":
demo1()
demo2()
Probably too slow; badly needs testing.
Peter
More information about the Python-list
mailing list