Test if list contains another list

bearophileHUGS at lycos.com bearophileHUGS at lycos.com
Fri Sep 26 22:39:16 CEST 2008


I suggest Python programmers to fill the holes in the Python std lib
with some debugged & tuned implementations, their "bag of tricks", so
they don't have to re-invent and debug things all the time. This works
well with Psyco:


def issubseq(sub, items):
    """issubseq(sub, items): return true if the sequence 'sub' is a
contiguous
    subsequence of the 'items' sequence.

    >>> issubseq()
    Traceback (most recent call last):
      ...
    TypeError: issubseq() takes exactly 2 arguments (0 given)
    >>> issubseq("abc")
    Traceback (most recent call last):
      ...
    TypeError: issubseq() takes exactly 2 arguments (1 given)
    >>> issubseq(1, [1, 2, 3])
    Traceback (most recent call last):
      ...
    TypeError: 'int' object is not iterable
    >>> isi = lambda s,i: int(issubseq(s,i))
    >>> isi([], [])
    1
    >>> isi("a", "")
    0
    >>> isi("", "a")
    1
    >>> isi("", "aaa")
    1
    >>> isi("a", "a")
    1
    >>> isi("ab", "bab")
    1
    >>> isi("ab", "bab")
    1
    >>> isi("ba", "bbb")
    0
    >>> isi("bab", "ab")
    0
    >>> isi(("a", "b"), ("a","a","b"))
    1
    >>> isi(("a", "b"), ("a","a","c"))
    0
    >>> isi([1,2,1], [3,5, 1,2,4, 1,2,1, 6])
    1
    >>> isi([1,2,1], [3,5, 1,2,4, 1,2,3, 6])
    0
    >>> l = [1] * 50 + [1,2,3] + [4] * 50
    >>> isi([1,2,3], l)
    1
    >>> l = [1] * 50 + [1,2,4] + [5] * 50
    >>> isi([1,2,3], l)
    0
    """
    if not hasattr(sub, "__getitem__"):
        sub = list(sub)

    len_sub = len(sub)
    if len_sub == 0:
        return True

    try:
        if not len(items) or len(items) < len_sub:
            return False
    except TypeError:
        pass

    if sub == items:
        return True

    table = [0] * (len_sub + 1)

    # building prefix-function
    m = 0
    for i in xrange(1, len_sub):
        while m > 0 and sub[m] != sub[i]:
            m = table[m - 1]
        if sub[m] == sub[i]:
            m += 1
        table[i] = m

    # searching
    m, i = 0, 0
    for x in items:
        while m > 0 and sub[m] != x:
            m = table[m - 1]
        if sub[m] == x:
            m += 1
            if m == len_sub:
                return True
        i += 1

    return False


Usage:

>>> from util import issubseq # uses Psyco
>>> from timing import timeit
>>> a = range(1000000) + range(1000) + range(100000)
>>> sub = range(1000)
>>> len(a)
1101000
>>> timeit(issubseq, sub, a)
(True, 0.0)
>>> a = range(999) * 10000 + range(1000) + range(10000)
>>> sub = range(1000)
>>> timeit(issubseq, sub, a)
(True, 0.20000000000000001)

Bye,
bearophile



More information about the Python-list mailing list