[Spambayes] dbm on windows, hopefully for the last time

Neale Pickett neale at woozle.org
Tue Dec 3 17:19:52 2002


What do you all think of this: new option "dbm_type" which can be
"best", "db3hash", "dbhash", "gdbm", or "dumbdbm".  If it's "best", then
the best available dbm implementation will be used.  Note that "best" on
Windows excludes "dbhash".

So now, you get the best one your platform supports by default.  Or you
can specify a specific dbm if you like that better.

This will remove the "anydbm" module, but add a tiny "dbmstorage"
module.

Please let me know what you think.  I'll check it in if I don't get any
"no, don't do that" comments.


Here's the diff:

Index: Options.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Options.py,v
retrieving revision 1.78
diff -u -r1.78 Options.py
--- Options.py	26 Nov 2002 00:43:51 -0000	1.78
+++ Options.py	3 Dec 2002 17:13:20 -0000
@@ -372,6 +372,10 @@
 
 [globals]
 verbose: False
+# What DBM storage type should we use?  Must be best, db3hash, dbhash,
+# gdbm, dumbdbm.  Windows folk should steer clear of dbhash.  Default is
+# "best", which will pick the best DBM type available on your platform.
+dbm_type: best
 """
 
 int_cracker = ('getint', None)
@@ -460,6 +464,7 @@
                 'html_ui_launch_browser': boolean_cracker,
                 },
     'globals': {'verbose': boolean_cracker,
+                'dbm_type': string_cracker,
                 },
 }
 
Index: anydbm.py
===================================================================
RCS file: anydbm.py
diff -N anydbm.py
--- anydbm.py	2 Dec 2002 20:23:39 -0000	1.3
+++ /dev/null	1 Jan 1970 00:00:00 -0000
@@ -1,57 +0,0 @@
-#! /usr/bin/env python
-"""Generic interface to all dbm clones.
-
-This is just like anydbm from the Python distribution, except that this
-one leaves out the "dbm" type on Windows, since reliable reports have it
-that this module is antiquated and most dreadful.
-
-"""
-
-import sys
-
-try:
-    class error(Exception):
-        pass
-except (NameError, TypeError):
-    error = "anydbm.error"
-
-if sys.platform in ["win32"]:
-    # dbm on windows is awful.
-    _names = ["bsddb3", "gdbm", "dumbdbm"]
-else:
-    _names = ["dbhash", "gdbm", "dbm", "dumbdbm"]
-_errors = [error]
-_defaultmod = None
-
-for _name in _names:
-    try:
-        _mod = __import__(_name)
-    except ImportError:
-        continue
-    if not _defaultmod:
-        _defaultmod = _mod
-    _errors.append(_mod.error)
-
-if not _defaultmod:
-    raise ImportError, "no dbm clone found; tried %s" % _names
-
-error = tuple(_errors)
-
-def open(file, flag = 'r', mode = 0666):
-    # guess the type of an existing database
-    from whichdb import whichdb
-    result=whichdb(file)
-    if result is None:
-        # db doesn't exist
-        if 'c' in flag or 'n' in flag:
-            # file doesn't exist and the new
-            # flag was used so use default type
-            mod = _defaultmod
-        else:
-            raise error, "need 'c' or 'n' flag to open new db"
-    elif result == "":
-        # db type cannot be determined
-        raise error, "db type could not be determined"
-    else:
-        mod = __import__(result)
-    return mod.open(file, flag, mode)
Index: dbmstorage.py
===================================================================
RCS file: dbmstorage.py
diff -N dbmstorage.py
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ dbmstorage.py	3 Dec 2002 17:13:20 -0000
@@ -0,0 +1,53 @@
+"""Wrapper to open an appropriate dbm storage type."""
+
+from Options import options
+
+class error(Exception):
+    pass
+
+def open_db3hash(*args):
+    """Open a bsddb3 hash."""
+    import bsddb3
+    return bsddb3.hashopen(*args)
+
+def open_dbhash(*args):
+    """Open a bsddb hash.  Don't use this on Windows."""
+    import bsddb
+    return bsddb.hashopen(*args)
+
+def open_gdbm(*args):
+    """Open a gdbm database."""
+    import gdbm
+    return gdbm.open(*args)
+
+def open_dumbdbm(*args):
+    """Open a dumbdbm database."""
+    import dumbdbm
+    return dumbdbm.open(*args)
+
+def open_best(*args):
+    if sys.platform == "win32":
+        funcs = [open_db3hash, open_gdbm, open_dumbdbm]
+    else:
+        funcs = [open_db3hash, open_dbhash, open_gdbm, open_dumbdbm]
+    for f in funcs:
+        try:
+            return f(*args)
+        except ImportError:
+            pass
+    raise error("No dbm modules available!")
+
+open_funcs = {
+    "best": open_best,
+    "db3hash": open_db3hash,
+    "dbhash": open_dbhash,
+    "gdbm": open_gdbm,
+    "dumbdbm": open_dumbdbm,
+    }
+
+def open(*args):
+    dbm_type = options.dbm_type.lower()
+    f = open_funcs.get(dbm_type)
+    if not f:
+        raise error("Unknown dbm type in options file")
+    return f(*args)
Index: storage.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/storage.py,v
retrieving revision 1.5
diff -u -r1.5 storage.py
--- storage.py	2 Dec 2002 06:02:03 -0000	1.5
+++ storage.py	3 Dec 2002 17:13:20 -0000
@@ -51,6 +51,7 @@
 import cPickle as pickle
 import errno
 import shelve
+import dbmstorage
 
 PICKLE_TYPE = 1
 NO_UPDATEPROBS = False   # Probabilities will not be autoupdated with training
@@ -130,7 +131,8 @@
         if options.verbose:
             print 'Loading state from',self.db_name,'database'
 
-        self.db = shelve.DbfilenameShelf(self.db_name, self.mode)
+        self.dbm = dbmstorage.open(self.db_name, self.mode)
+        self.db = shelve.Shelf(self.dbm)
 
         if self.db.has_key(self.statekey):
             t = self.db[self.statekey]







More information about the Spambayes mailing list