[Tutor] A genomic sequence generator?

Danny Yoo dyoo@hkn.eecs.berkeley.edu
Sat, 5 Oct 2002 21:45:12 -0700 (PDT)


Hi everyone,

I've written another way of doing the genomic sequence generator that
uses... well... generators.  *grin* But I'm a bit ashamed because I know
it's more complicated than it needs to be.  I wonder how to simplify it!


Here's it is:

######
"""An iterator and generator approach to the genomic sequence
generator described in:

    http://mail.python.org/pipermail/tutor/2002-October/017690.html

Danny Yoo (dyoo@hkn.eecs.berkeley.edu)
"""

from __future__ import nested_scopes
from __future__ import generators

import re


def genomic_sequence_generator(sequence):
    """Given a sequence like "AT<GC>A<TA>", returns an iterator that
    produces all possible values where the bracketed values vary."""
    tokens = tokenize(sequence)
    generator = SingleIterator("")
    for t in tokens:
        if t.startswith('<'):
            generator = PairSummingIterator(generator,
                                            GroupIterator(t[1:-1]))
        else:
            generator = PairSummingIterator(generator,
                                            SingleIterator(t))
    return generator


######################################################################

def tokenize(sequence):
    """Given a string like "AT<GC>A<TA>", splits it into a list that
    splits off the group characters into ['AT', '<GC>', 'A', 'TA']."""
    return filter(identity, re.split('(<.+?>)', sequence))

def identity(x):
    """A simple identity function.  x -> x."""
    return x


######################################################################
class GroupIterator:
    """Given a group, returns an iterator that iteratively visits each
    element in the group."""
    def __init__(self, group):
        self.group = group
    def __iter__(self):
        return sequence_generator(self.group)

def sequence_generator(sequence):
    for x in sequence:
        yield x

def SingleIterator(single_thing):
    return GroupIterator([single_thing])
######################################################################

class PairSummingIterator:
    """Given two iterators p1 and p2, returns all possible sums
    between elements of p1 and p2.

    Warning: p1 and p2 have to be finite, or else we run into big
    problems.
    """
    def __init__(self, p1, p2):
        self.p1, self.p2 = p1, p2
    def __iter__(self):
        return pair_summing_generator(self.p1, self.p2)

def pair_summing_generator(p1, p2):
    for x in p1:
        for y in p2:
            yield x+y

######################################################################


if __name__ == '__main__':
    for solution in genomic_sequence_generator("AT<GC>A<TA>"):
        print solution
######



Hope this helps!