Creating combination of sequences

Sat Nov 13 23:17:48 CET 2004

```Minho Chae wrote:
> I'm trying to create combinations of sequences.
>
> For example, if the sequence is 'acgt' and the length is 8,
> then I would like to have 4^8 results such as
> 'aaaaaaaa', 'aaaaaaac', 'aaaaaaag', 'aaaaaaat', ... 'tttttttt'

Given the DNA base characters you use for your test case, do you

Here is a solution to your problem.

def cycle(chars, length):
"""Generate 'length' copies of chars[0], then length of chars[1]
then of chars[2], etc.  When chars is exhausted, start again.
"""
counter = xrange(length)
while 1:
for c in chars:
for _ in counter:
yield c

def once(chars, length):
"""Generate 'length' copies of chars[0], then of chars[1], etc.
then of chars[-1].  When chars is exhausted, stop.
"""
counter = xrange(length)
for c in chars:
for _ in counter:
yield c

def all_combinations(chars, size):
"""Generate all words combinations using items from chars as
the characters for each position.  If chars is in lexiographic
order then so is the list of generated words.

>>> for word in all_combinations("01", 3):
...     print word
...
000
001
010
011
100
101
110
111
"""
if size == 0:
return

N = len(chars)
gens = [once(chars, N**(size-1))]
for i in range(size-2, -1, -1):
gens.append(cycle(chars, N**i))

def next(gen):
return gen.next()

while 1:
yield "".join(map(next, gens))

def test():
text = "acgt"
n = 8
i = -1
for i, term in enumerate(all_combinations(text, n)):
print term
print (i+1), "combinations found"
if n:
assert (i+1) == len(text) ** n, (i, text, n)
else:
assert i == -1, i

if __name__ == "__main__":
test()

Andrew
dalke at dalkescientific.com

```