Test if list contains another list
bearophileHUGS at lycos.com
bearophileHUGS at lycos.com
Fri Sep 26 16:39:16 EDT 2008
I suggest Python programmers to fill the holes in the Python std lib
with some debugged & tuned implementations, their "bag of tricks", so
they don't have to re-invent and debug things all the time. This works
well with Psyco:
def issubseq(sub, items):
"""issubseq(sub, items): return true if the sequence 'sub' is a
contiguous
subsequence of the 'items' sequence.
>>> issubseq()
Traceback (most recent call last):
...
TypeError: issubseq() takes exactly 2 arguments (0 given)
>>> issubseq("abc")
Traceback (most recent call last):
...
TypeError: issubseq() takes exactly 2 arguments (1 given)
>>> issubseq(1, [1, 2, 3])
Traceback (most recent call last):
...
TypeError: 'int' object is not iterable
>>> isi = lambda s,i: int(issubseq(s,i))
>>> isi([], [])
1
>>> isi("a", "")
0
>>> isi("", "a")
1
>>> isi("", "aaa")
1
>>> isi("a", "a")
1
>>> isi("ab", "bab")
1
>>> isi("ab", "bab")
1
>>> isi("ba", "bbb")
0
>>> isi("bab", "ab")
0
>>> isi(("a", "b"), ("a","a","b"))
1
>>> isi(("a", "b"), ("a","a","c"))
0
>>> isi([1,2,1], [3,5, 1,2,4, 1,2,1, 6])
1
>>> isi([1,2,1], [3,5, 1,2,4, 1,2,3, 6])
0
>>> l = [1] * 50 + [1,2,3] + [4] * 50
>>> isi([1,2,3], l)
1
>>> l = [1] * 50 + [1,2,4] + [5] * 50
>>> isi([1,2,3], l)
0
"""
if not hasattr(sub, "__getitem__"):
sub = list(sub)
len_sub = len(sub)
if len_sub == 0:
return True
try:
if not len(items) or len(items) < len_sub:
return False
except TypeError:
pass
if sub == items:
return True
table = [0] * (len_sub + 1)
# building prefix-function
m = 0
for i in xrange(1, len_sub):
while m > 0 and sub[m] != sub[i]:
m = table[m - 1]
if sub[m] == sub[i]:
m += 1
table[i] = m
# searching
m, i = 0, 0
for x in items:
while m > 0 and sub[m] != x:
m = table[m - 1]
if sub[m] == x:
m += 1
if m == len_sub:
return True
i += 1
return False
Usage:
>>> from util import issubseq # uses Psyco
>>> from timing import timeit
>>> a = range(1000000) + range(1000) + range(100000)
>>> sub = range(1000)
>>> len(a)
1101000
>>> timeit(issubseq, sub, a)
(True, 0.0)
>>> a = range(999) * 10000 + range(1000) + range(10000)
>>> sub = range(1000)
>>> timeit(issubseq, sub, a)
(True, 0.20000000000000001)
Bye,
bearophile
More information about the Python-list
mailing list