[Python-checkins] r69231 - in sandbox/trunk/dbm_sqlite/alt: dbsqlite.py time_sqlite.py

raymond.hettinger python-checkins at python.org
Tue Feb 3 02:00:17 CET 2009


Author: raymond.hettinger
Date: Tue Feb  3 02:00:17 2009
New Revision: 69231

Log:
Faster __contains__ test.

Modified:
   sandbox/trunk/dbm_sqlite/alt/dbsqlite.py
   sandbox/trunk/dbm_sqlite/alt/time_sqlite.py

Modified: sandbox/trunk/dbm_sqlite/alt/dbsqlite.py
==============================================================================
--- sandbox/trunk/dbm_sqlite/alt/dbsqlite.py	(original)
+++ sandbox/trunk/dbm_sqlite/alt/dbsqlite.py	Tue Feb  3 02:00:17 2009
@@ -53,8 +53,8 @@
         return iter(self.keys())
 
     def __contains__(self, key):
-        GET_ITEM = 'SELECT value FROM shelf WHERE key = ?'
-        return self.conn.execute(GET_ITEM, (key,)).fetchone() is not None
+        HAS_ITEM = 'SELECT 1 FROM shelf WHERE key = ?'
+        return self.conn.execute(HAS_ITEM, (key,)).fetchone() is not None
 
     def __getitem__(self, key):
         GET_ITEM = 'SELECT value FROM shelf WHERE key = ?'
@@ -63,7 +63,7 @@
             raise KeyError(key)
         return item[0]
 
-    def __setitem__(self, key, value):       
+    def __setitem__(self, key, value):
         ADD_ITEM = 'REPLACE INTO shelf (key, value) VALUES (?,?)'
         self.conn.execute(ADD_ITEM, (key, value))
         #self.conn.commit()
@@ -71,7 +71,7 @@
     def __delitem__(self, key):
         if key not in self:
             raise KeyError(key)
-        DEL_ITEM = 'DELETE FROM shelf WHERE key = ?'       
+        DEL_ITEM = 'DELETE FROM shelf WHERE key = ?'
         self.conn.execute(DEL_ITEM, (key,))
         #self.conn.commit()
 
@@ -84,8 +84,8 @@
         if kwds:
             self.update(kwds)
 
-    def clear(self):        
-        CLEAR_ALL = 'DELETE FROM shelf;  VACUUM;'        
+    def clear(self):
+        CLEAR_ALL = 'DELETE FROM shelf;  VACUUM;'
         self.conn.executescript(CLEAR_ALL)
         self.conn.commit()
 
@@ -96,27 +96,27 @@
             self.conn = None
 
     def __del__(self):
-        self.close()    
+        self.close()
 
 class ListRepr:
 
     def __repr__(self):
-        return repr(list(self))    
+        return repr(list(self))
 
 class SQLhashKeysView(collections.KeysView, ListRepr):
-    
+
     def __iter__(self):
         GET_KEYS = 'SELECT key FROM shelf ORDER BY ROWID'
         return map(itemgetter(0), self._mapping.conn.cursor().execute(GET_KEYS))
 
 class SQLhashValuesView(collections.ValuesView, ListRepr):
-    
+
     def __iter__(self):
         GET_VALUES = 'SELECT value FROM shelf ORDER BY ROWID'
         return map(itemgetter(0), self._mapping.conn.cursor().execute(GET_VALUES))
 
 class SQLhashItemsView(collections.ValuesView, ListRepr):
-    
+
     def __iter__(self):
         GET_ITEMS = 'SELECT key, value FROM shelf ORDER BY ROWID'
         return iter(self._mapping.conn.cursor().execute(GET_ITEMS))
@@ -131,7 +131,7 @@
     for d in SQLhash(), SQLhash('example'):
         print(list(d), "start")
         d['abc'] = 'lmno'
-        print(d['abc'])    
+        print(d['abc'])
         d['abc'] = 'rsvp'
         d['xyz'] = 'pdq'
         print(d.items())
@@ -140,7 +140,7 @@
         print(list(d), 'list')
         d.update(p='x', q='y', r='z')
         print(d.items())
-        
+
         del d['abc']
         try:
             print(d['abc'])
@@ -148,7 +148,7 @@
             pass
         else:
             raise Exception('oh noooo!')
-        
+
         try:
             del d['abc']
         except KeyError:

Modified: sandbox/trunk/dbm_sqlite/alt/time_sqlite.py
==============================================================================
--- sandbox/trunk/dbm_sqlite/alt/time_sqlite.py	(original)
+++ sandbox/trunk/dbm_sqlite/alt/time_sqlite.py	Tue Feb  3 02:00:17 2009
@@ -22,31 +22,34 @@
 import sqlite3
 import os
 from time import clock
+from itertools import chain
 
-MAKE_shelf = ('''
+MAKE_SHELF = ('''
     DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
     CREATE TABLE IF NOT EXISTS shelf (key TEXT NOT NULL, value TEXT NOT NULL);
 ''', 'UNINDEXED')
-MAKE_shelf_PRIMARY = ('''
+
+MAKE_SHELF_PRIMARY = ('''
     DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
     CREATE TABLE IF NOT EXISTS shelf (key PRIMARY KEY, value TEXT NOT NULL);
 ''', 'PRIMARY')
-MAKE_shelf_UNIQUE = ('''
+
+MAKE_SHELF_UNIQUE = ('''
     DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
     CREATE TABLE IF NOT EXISTS shelf (key TEXT NOT NULL, value TEXT NOT NULL);
     CREATE UNIQUE INDEX IF NOT EXISTS keyndx ON shelf (key);
 ''', 'UNIQ')
 
 
-MAKE_shelf = ('''
+MAKE_SHELF = ('''
     DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
     CREATE TABLE IF NOT EXISTS shelf (key BLOB NOT NULL, value BLOB NOT NULL);
 ''', 'UNINDEXED')
-MAKE_shelf_PRIMARY = ('''
+MAKE_SHELF_PRIMARY = ('''
     DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
     CREATE TABLE IF NOT EXISTS shelf (key BLOB PRIMARY KEY, value BLOB NOT NULL);
 ''', 'PRIMARY')
-MAKE_shelf_UNIQUE = ('''
+MAKE_SHELF_UNIQUE = ('''
     DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
     CREATE TABLE IF NOT EXISTS shelf (key BLOB NOT NULL, value BLOB NOT NULL);
     CREATE UNIQUE INDEX IF NOT EXISTS keyndx ON shelf (key);
@@ -62,6 +65,12 @@
     'SELECT key, value FROM shelf',
 ]
 
+FINDKEY = [
+    'SELECT key FROM shelf WHERE key = ?',
+    'SELECT value FROM shelf WHERE key = ?',
+    'SELECT 1 FROM shelf WHERE key = ?',
+]
+
 def populate(n=100, m=5):
     items = []
     for i in range(n):
@@ -71,7 +80,7 @@
     if len(set(k for k,v in items)) != n:
         return populate(n, m)
     return items
-        
+
 def setup(BUILDER, items=None):
     filename = 'tmpshl'
     try:
@@ -99,32 +108,49 @@
             conn.execute('REPLACE INTO shelf (key, value) VALUES (?, ?)', addlist.pop())
             conn.commit()
 
-def timeit(conn, stmt, n=20):
-    conn.execute(stmt)          # precompile
-    start = clock()   
-    for i in range(n):
-        conn.execute(stmt).fetchall()
+def timeit(conn, stmt, args=(), n=20):
+    if args:
+        conn.execute(stmt, args)    # precompile
+    else:
+        conn.execute(stmt)          # precompile
+    start = clock()
+    if args:
+        for i in range(n):
+            conn.execute(stmt, args).fetchone()
+    else:
+        for i in range(n):
+            conn.execute(stmt).fetchall()
     return '%.2f' % (clock() - start)
-    
+
 n, m = 2000, 6000
 fragment, vacuum = True, True
 seed('xyzpdqbingo')
 items = populate(n)
-dellist = [(randrange(n),) for i in range(m)]
-addlist = populate(m)
-for stmt in SELECTORS:
-    print(stmt)    
-    for builder, name in [MAKE_shelf, MAKE_shelf_PRIMARY, MAKE_shelf_UNIQUE]:
-        conn = setup(builder, items)
-        if fragment:
-            fragmentit(conn, addlist, dellist)
-        if vacuum:
-            conn.execute('VACUUM')
-        print(sorted(timeit(conn, stmt, n=100) for i in range(6)), name)
-    print()
+if 0:
+    dellist = [(randrange(n),) for i in range(m)]
+    addlist = populate(m)
+    for stmt in SELECTORS:
+        print(stmt)
+        for builder, name in [MAKE_SHELF, MAKE_SHELF_PRIMARY, MAKE_SHELF_UNIQUE]:
+            conn = setup(builder, items)
+            if fragment:
+                fragmentit(conn, addlist, dellist)
+            if vacuum:
+                conn.execute('VACUUM')
+            print(sorted(timeit(conn, stmt, (), n=100) for i in range(6)), name)
+        print()
+else:
+    pairs = sample(items, 3)
+    conn = setup(MAKE_SHELF_PRIMARY[0], items)
+    for stmt in FINDKEY:
+        print(stmt)
+        for key in chain.from_iterable(pairs):
+            print(sorted(timeit(conn, stmt, (key,), n=10000) for i in range(6)), key)
+        print()
+
+
 
 
-    
 ''' Results:
 
 No difference between 'select *', 'select k,v', and 'select * by rowid'
@@ -135,9 +161,12 @@
 The effect of "primary key" and a unique index is the same
 BLOB and TEXT types have the same timing
 
+Search for a single key:
+    Missing key search 8% faster than a found key
+    "SELECT 1" is 3% faster than "SELECT key" which is 3% faster than "SELECT value"
 
 
-Results for unfragmented:  n, m = 2000, 6000
+-----  Unfragmented:  n, m = 2000, 6000
 
 SELECT * FROM shelf
 ['0.38', '0.38', '0.38', '0.39', '0.39', '0.47'] UNINDEXED
@@ -235,4 +264,31 @@
 ['1.29', '1.29', '1.30', '1.30', '1.31', '1.38'] PRIMARY
 ['1.28', '1.30', '1.30', '1.30', '1.33', '1.40'] UNIQ
 
+
+----- Search for a single key (alternating between found and not found) ----
+
+SELECT key FROM shelf WHERE key = ?
+['0.59', '0.59', '0.59', '0.59', '0.60', '0.64'] b'YzLgm'
+['0.55', '0.56', '0.56', '0.56', '0.56', '0.58'] b'rsgVk'
+['0.59', '0.59', '0.59', '0.59', '0.59', '0.59'] b'Echad'
+['0.55', '0.56', '0.56', '0.56', '0.56', '0.56'] b'uMwIL'
+['0.59', '0.59', '0.59', '0.59', '0.59', '0.60'] b'jgmOI'
+['0.55', '0.55', '0.56', '0.56', '0.56', '0.56'] b'oNJuV'
+
+SELECT value FROM shelf WHERE key = ?
+['0.61', '0.62', '0.62', '0.62', '0.62', '0.62'] b'YzLgm'
+['0.56', '0.57', '0.57', '0.57', '0.57', '0.57'] b'rsgVk'
+['0.61', '0.61', '0.61', '0.62', '0.62', '0.62'] b'Echad'
+['0.57', '0.57', '0.57', '0.57', '0.57', '0.58'] b'uMwIL'
+['0.61', '0.61', '0.61', '0.62', '0.62', '0.62'] b'jgmOI'
+['0.56', '0.56', '0.57', '0.57', '0.57', '0.57'] b'oNJuV'
+
+SELECT 1 FROM shelf WHERE key = ?
+['0.57', '0.57', '0.58', '0.58', '0.58', '0.58'] b'YzLgm'
+['0.55', '0.55', '0.55', '0.55', '0.55', '0.56'] b'rsgVk'
+['0.57', '0.57', '0.57', '0.58', '0.58', '0.58'] b'Echad'
+['0.55', '0.55', '0.55', '0.56', '0.56', '0.56'] b'uMwIL'
+['0.57', '0.57', '0.57', '0.58', '0.58', '0.58'] b'jgmOI'
+['0.55', '0.55', '0.55', '0.56', '0.56', '0.57'] b'oNJuV'
+
 '''


More information about the Python-checkins mailing list