python grep

Peter Otten __peter__ at web.de
Fri Apr 9 14:55:13 EDT 2010


Mag Gam wrote:

> I am in the process of reading a zipped file which is about 6gb.
> 
> I would like to know if there is a command similar to grep in python
> because I would like to emulate, -A -B option of GNU grep.
> 
> Lets say I have this,
> 
> 083828.441,AA
> 093828.441,AA
> 094028.441,AA
> 094058.441,CC
> 094828.441,AA
> 103828.441,AA
> 123828.441,AA
> 
> 
> if I do grep -A2 -B2 "CC"
> 
> I get 2 lines before and 2 lines after "C"
> 
> Is there an easy way to do this in python?


from itertools import islice, groupby
from collections import deque

def grep(instream, ismatch, before, after):
    items_before = None
    for key, group in groupby(instream, ismatch):
        if key:
            if items_before is not None:
                for item in items_before:
                    yield "before", item
            else:
                items_before = not None # ;)
            for item in group:
                yield "match", item
        else:
            if items_before is not None:
                for item in islice(group, after):
                    yield "after", item
            items_before = deque(group, maxlen=before)


def demo1():
    with open(__file__) as instream:
        for state, (index, line) in grep(enumerate(instream, 1),
                                         ismatch=lambda (i, s): "item" in s, 
before=2, after=2):
            print "%3d %-6s %s" % (index, state + ":", line),

def demo2():
    from StringIO import StringIO
    import csv
    lines = StringIO("""\
083828.441,AA
093828.441,AA
094028.441,AA
094058.441,CC
094828.441,AA
103828.441,AA
123828.441,AA
""")

    rows =  csv.reader(lines)
    for state, row in grep(rows, lambda r: r[-1] == "CC", 1, 2):
        print row

if __name__ == "__main__":
    demo1()
    demo2()

Probably too slow; badly needs testing.

Peter



More information about the Python-list mailing list