Test if list contains another list

bearophileHUGS at lycos.com bearophileHUGS at lycos.com
Fri Sep 26 22:39:16 CEST 2008

```I suggest Python programmers to fill the holes in the Python std lib
with some debugged & tuned implementations, their "bag of tricks", so
they don't have to re-invent and debug things all the time. This works
well with Psyco:

def issubseq(sub, items):
"""issubseq(sub, items): return true if the sequence 'sub' is a
contiguous
subsequence of the 'items' sequence.

>>> issubseq()
Traceback (most recent call last):
...
TypeError: issubseq() takes exactly 2 arguments (0 given)
>>> issubseq("abc")
Traceback (most recent call last):
...
TypeError: issubseq() takes exactly 2 arguments (1 given)
>>> issubseq(1, [1, 2, 3])
Traceback (most recent call last):
...
TypeError: 'int' object is not iterable
>>> isi = lambda s,i: int(issubseq(s,i))
>>> isi([], [])
1
>>> isi("a", "")
0
>>> isi("", "a")
1
>>> isi("", "aaa")
1
>>> isi("a", "a")
1
>>> isi("ab", "bab")
1
>>> isi("ab", "bab")
1
>>> isi("ba", "bbb")
0
>>> isi("bab", "ab")
0
>>> isi(("a", "b"), ("a","a","b"))
1
>>> isi(("a", "b"), ("a","a","c"))
0
>>> isi([1,2,1], [3,5, 1,2,4, 1,2,1, 6])
1
>>> isi([1,2,1], [3,5, 1,2,4, 1,2,3, 6])
0
>>> l = [1] * 50 + [1,2,3] + [4] * 50
>>> isi([1,2,3], l)
1
>>> l = [1] * 50 + [1,2,4] + [5] * 50
>>> isi([1,2,3], l)
0
"""
if not hasattr(sub, "__getitem__"):
sub = list(sub)

len_sub = len(sub)
if len_sub == 0:
return True

try:
if not len(items) or len(items) < len_sub:
return False
except TypeError:
pass

if sub == items:
return True

table = [0] * (len_sub + 1)

# building prefix-function
m = 0
for i in xrange(1, len_sub):
while m > 0 and sub[m] != sub[i]:
m = table[m - 1]
if sub[m] == sub[i]:
m += 1
table[i] = m

# searching
m, i = 0, 0
for x in items:
while m > 0 and sub[m] != x:
m = table[m - 1]
if sub[m] == x:
m += 1
if m == len_sub:
return True
i += 1

return False

Usage:

>>> from util import issubseq # uses Psyco
>>> from timing import timeit
>>> a = range(1000000) + range(1000) + range(100000)
>>> sub = range(1000)
>>> len(a)
1101000
>>> timeit(issubseq, sub, a)
(True, 0.0)
>>> a = range(999) * 10000 + range(1000) + range(10000)
>>> sub = range(1000)
>>> timeit(issubseq, sub, a)
(True, 0.20000000000000001)

Bye,
bearophile

```