[Tutor] A genomic sequence generator?
Danny Yoo
dyoo@hkn.eecs.berkeley.edu
Sat, 5 Oct 2002 21:45:12 -0700 (PDT)
Hi everyone,
I've written another way of doing the genomic sequence generator that
uses... well... generators. *grin* But I'm a bit ashamed because I know
it's more complicated than it needs to be. I wonder how to simplify it!
Here's it is:
######
"""An iterator and generator approach to the genomic sequence
generator described in:
http://mail.python.org/pipermail/tutor/2002-October/017690.html
Danny Yoo (dyoo@hkn.eecs.berkeley.edu)
"""
from __future__ import nested_scopes
from __future__ import generators
import re
def genomic_sequence_generator(sequence):
"""Given a sequence like "AT<GC>A<TA>", returns an iterator that
produces all possible values where the bracketed values vary."""
tokens = tokenize(sequence)
generator = SingleIterator("")
for t in tokens:
if t.startswith('<'):
generator = PairSummingIterator(generator,
GroupIterator(t[1:-1]))
else:
generator = PairSummingIterator(generator,
SingleIterator(t))
return generator
######################################################################
def tokenize(sequence):
"""Given a string like "AT<GC>A<TA>", splits it into a list that
splits off the group characters into ['AT', '<GC>', 'A', 'TA']."""
return filter(identity, re.split('(<.+?>)', sequence))
def identity(x):
"""A simple identity function. x -> x."""
return x
######################################################################
class GroupIterator:
"""Given a group, returns an iterator that iteratively visits each
element in the group."""
def __init__(self, group):
self.group = group
def __iter__(self):
return sequence_generator(self.group)
def sequence_generator(sequence):
for x in sequence:
yield x
def SingleIterator(single_thing):
return GroupIterator([single_thing])
######################################################################
class PairSummingIterator:
"""Given two iterators p1 and p2, returns all possible sums
between elements of p1 and p2.
Warning: p1 and p2 have to be finite, or else we run into big
problems.
"""
def __init__(self, p1, p2):
self.p1, self.p2 = p1, p2
def __iter__(self):
return pair_summing_generator(self.p1, self.p2)
def pair_summing_generator(p1, p2):
for x in p1:
for y in p2:
yield x+y
######################################################################
if __name__ == '__main__':
for solution in genomic_sequence_generator("AT<GC>A<TA>"):
print solution
######
Hope this helps!