[Spambayes-checkins] spambayes/Outlook2000 manager.py,1.100,1.101

Tony Meyer anadelonbrin at users.sourceforge.net
Mon Dec 20 04:37:47 CET 2004


Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1418/Outlook2000

Modified Files:
	manager.py 
Log Message:
Allow Outlook users to select their storage method in exactly the same way as other
 users can.  This isn't exposed via the GUI, so few will notice, and just about everyone
 will continue on with the default (bsddb at the moment, still).

Also add a wrapper for the ZODB (FileStorage) storage class.  I've been using this
 for a while now, and it seems to work fine.

Remove some checks that no longer need to be done (that the messageinfo db has the
 same length as the tokens db).  This isn't the case now that we store info about
 classified messages, and was only to track down a (solved) problem, anyway.

Index: manager.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v
retrieving revision 1.100
retrieving revision 1.101
diff -C2 -d -r1.100 -r1.101
*** manager.py	3 Dec 2004 21:43:19 -0000	1.100
--- manager.py	20 Dec 2004 03:37:38 -0000	1.101
***************
*** 81,100 ****
          this_filename = os.path.abspath(sys.argv[0])
  
  # See if we can use the new bsddb module. (The old one is unreliable
  # on Windows, so we don't use that)
! try:
!     import bsddb3 as bsddb
!     # bsddb3 is definitely not broken
!     use_db = True
! except ImportError:
!     # Not using the 3rd party bsddb3, so try the one in the std library
      try:
          import bsddb
-         use_db = hasattr(bsddb, "db") # This name is not in the old one.
      except ImportError:
!         # No DB library at all!
!         assert not hasattr(sys, "frozen"), \
!                "Don't build binary versions without bsddb!"
!         use_db = False
  
  # This is a little bit of a hack <wink>.  We are generally in a child
--- 81,102 ----
          this_filename = os.path.abspath(sys.argv[0])
  
+ # Ensure that a bsddb module is available if we are frozen.
  # See if we can use the new bsddb module. (The old one is unreliable
  # on Windows, so we don't use that)
! if hasattr(sys, "frozen"):
!     try:
!         import bsddb3
!     except ImportError:
!         bsddb3 = None
      try:
          import bsddb
      except ImportError:
!         bsddb = None
!     else:
!         # This name is not in the old (bad) one.
!         if not hasattr(bsddb, "db"):
!             bsddb = None
!     assert bsddb or bsddb3, \
!            "Don't build binary versions without bsddb!"
  
  # This is a little bit of a hack <wink>.  We are generally in a child
***************
*** 181,186 ****
      db_extension = None # for pychecker - overwritten by subclass
      def __init__(self, bayes_base_name, mdb_base_name):
!         self.bayes_filename = bayes_base_name + self.db_extension
!         self.mdb_filename = mdb_base_name + self.db_extension
      def new_bayes(self):
          # Just delete the file and do an "open"
--- 183,190 ----
      db_extension = None # for pychecker - overwritten by subclass
      def __init__(self, bayes_base_name, mdb_base_name):
!         self.bayes_filename = bayes_base_name.encode(filesystem_encoding) + \
!                               self.db_extension
!         self.mdb_filename = mdb_base_name.encode(filesystem_encoding) + \
!                             self.db_extension
      def new_bayes(self):
          # Just delete the file and do an "open"
***************
*** 197,201 ****
          bayes.close()
      def open_mdb(self):
!         return bayes_message.open_storage(self.mdb_filename, self.klass)
      def store_mdb(self, mdb):
          mdb.store()
--- 201,209 ----
          bayes.close()
      def open_mdb(self):
!         # MessageInfo storage types may lag behind, so use pickle if the
!         # matching type isn't available.
!         if self.klass in bayes_message._storage_types.keys():
!             return bayes_message.open_storage(self.mdb_filename, self.klass)
!         return bayes_message.open_storage(self.mdb_filename, "pickle")
      def store_mdb(self, mdb):
          mdb.store()
***************
*** 214,222 ****
      db_extension = ".db"
      klass = "dbm"
-     def __init__(self, bayes_base_name, mdb_base_name):
-         self.bayes_filename = bayes_base_name.encode(filesystem_encoding) + \
-                               self.db_extension
-         self.mdb_filename = mdb_base_name.encode(filesystem_encoding) + \
-                             self.db_extension
      def new_mdb(self):
          try:
--- 222,225 ----
***************
*** 228,231 ****
--- 231,238 ----
          return True # True means only changed records get actually written
  
+ class ZODBStorageManager(DBStorageManager):
+     db_extension = ".fs"
+     klass = "zodb"
+ 
  # Encapsulates our entire classification database
  # This allows a couple of different "databases" to be open at once
***************
*** 256,262 ****
          self.logger.LogDebug(0, "Bayes database initialized with "
                     "%d spam and %d good messages" % (bayes.nspam, bayes.nham))
!         if len(message_db) != bayes.nham + bayes.nspam:
!             print "*** - message database has %d messages - bayes has %d - something is screwey" % \
!                     (len(message_db), bayes.nham + bayes.nspam)
          self.bayes = bayes
          self.message_db = message_db
--- 263,272 ----
          self.logger.LogDebug(0, "Bayes database initialized with "
                     "%d spam and %d good messages" % (bayes.nspam, bayes.nham))
!         # Once, we checked that the message database was the same length
!         # as the training database here.  However, we now store information
!         # about messages that are classified but not trained in the message
!         # database, so the lengths will not be equal (unless all messages
!         # are trained).  That step doesn't really gain us anything, anyway,
!         # since it no longer would tell us useful information, so remove it.
          self.bayes = bayes
          self.message_db = message_db
***************
*** 288,296 ****
          start = time.clock()
          bayes = self.bayes
-         # Try and work out where this count sometimes goes wrong.
-         if bayes.nspam + bayes.nham != len(self.message_db):
-             print "WARNING: Bayes database has %d messages, " \
-                   "but training database has %d" % \
-                   (bayes.nspam + bayes.nham, len(self.message_db))
  
          if self.logger.verbose:
--- 298,301 ----
***************
*** 328,332 ****
  
  def GetStorageManagerClass():
!     return [PickleStorageManager, DBStorageManager][use_db]
  
  # Our main "bayes manager"
--- 333,353 ----
  
  def GetStorageManagerClass():
!     # We used to enforce this so that all binary users used bsddb, and
!     # unless they modified the source, so would all source users.  We
!     # would like more flexibility now, so we match what the rest of the
!     # applications do - this isn't exposed via the GUI, so Outlook users
!     # still get bsddb by default, and have to fiddle with a text file
!     # to change that.
!     use_db = bayes_options["Storage", "persistent_use_database"]
!     available = {"pickle" : PickleStorageManager,
!                  "dbm"    : DBStorageManager,
!                  "zodb"   : ZODBStorageManager,
!                  }
!     if use_db not in available:
!         # User is trying to use something fancy which isn't available.
!         # Fall back on bsddb.
!         print use_db, "storage type not available.  Using bsddb."
!         use_db = "dbm"
!     return available[use_db]
  
  # Our main "bayes manager"



More information about the Spambayes-checkins mailing list