Faster way to do this?

Thu May 22 02:47:29 EDT 2003

----- Original Message ----- 
From: "Bengt Richter" <bokr at oz.net>
Newsgroups: comp.lang.python
To: <python-list at python.org>
Sent: Thursday, May 22, 2003 6:39 AM
Subject: Re: Faster way to do this?

This is another way to do it. The trick used is to sort the letters of the
words. This makes the lookup faster at the cost of taking more time to set
up the word dictionary.

It seems to be quite a bit faster than method1 and method2.

import sys
import time

WORD_FILE = 'american-english'

start = time.time()
words = {}
wf = open(WORD_FILE, 'r')
for line in wf:
    word = line.strip().lower()
    key = [char for char in word]
    key.sort()
    key = tuple(key)
    wordList = words.get(key, [])
    wordList.append(word)
    words[key] = wordList
print 'Read %d words in %.2fs' % (len(words), time.time() - start)

def findWords(word, words):
    result = 0
    word = [char for char in word]
    word.sort()
    wordLen = len(word)
    for key in words.keys():
        keyLen = len(key)
        if wordLen >= keyLen:
            i = 0
            for char in word:
                if char == key[i]:
                    i += 1
                    if i == keyLen:
                        result += len(words[key])
                        #for found in words[key]:
                            #result += 1
                            # print found
                        break
                elif char > key[i]:
                    break
    return result

if __name__ == '__main__':
    start = time.time()
    number = findWords(sys.argv[1].lower(), words)
    print "found %d matches in %.2f seconds" % (number, (time.time()-start))

John Wilson
The Wilson Partnership
http://www.wilson.co.uk