[Spambayes-checkins] spambayes/spambayes UserInterface.py, 1.48, 1.49 message.py, 1.62, 1.63

Tony Meyer anadelonbrin at users.sourceforge.net
Tue Dec 21 22:37:10 CET 2004


Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23974/spambayes

Modified Files:
	UserInterface.py message.py 
Log Message:
Store a stats start date in the message database, so that we can 'reset' the statistics
 without removing the whole message database.  If the database grows too large now,
 we can also age out data from it if we want to.

Allow message.Messages to have their id specified on creation.  Also allow specification
 of the messageinfo_db, which means one object can be shared rather than each message
 creating it's own one.  Only the test suite uses this at the moment, but other code
 should, too.

Update a few comments in UserInterface.py and use keyword args when calling the Stats.

Index: UserInterface.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/UserInterface.py,v
retrieving revision 1.48
retrieving revision 1.49
diff -C2 -d -r1.48 -r1.49
*** UserInterface.py	10 Aug 2004 14:20:24 -0000	1.48
--- UserInterface.py	21 Dec 2004 21:37:06 -0000	1.49
***************
*** 503,506 ****
--- 503,512 ----
          # the database later.   This is a temporary implementation -
          # it should keep a Corpus of trained messages.
+         # XXX Temporary, heh.  One of the problems with this is that
+         # XXX these files get opened in whatever happens to be the cwd.
+         # XXX I don't think anyone uses these anyway, but we should fix
+         # XXX this for 1.1.  I think that creating a new message in the
+         # XXX Ham/Spam corpus would work, and not interfere with anything.
+         # XXX We could later search for them, too, which would be a bonus.
          if isSpam:
              f = open("_pop3proxyspam.mbox", "a")
***************
*** 512,515 ****
--- 518,524 ----
          self.flush()
          for message in messages:
+             # XXX Here, we should really use the message.Message class,
+             # XXX so that the messageinfo database is updated (and so
+             # XXX the stats are correct, and so on).
              tokens = tokenizer.tokenize(message)
              self.classifier.learn(tokens, isSpam)
***************
*** 913,919 ****
          # rather than regenerating it every time.  If people complain
          # about it being too slow, then do this!
          s = Stats.Stats()
          self._writePreamble("Statistics")
!         stats = s.GetStats()
          stats = self._buildBox("Statistics", None, "<br/><br/>".join(stats))
          self.write(stats)
--- 922,930 ----
          # rather than regenerating it every time.  If people complain
          # about it being too slow, then do this!
+         # XXX The Stats object should be generated once, when we start up,
+         # XXX and then just called, here.
          s = Stats.Stats()
          self._writePreamble("Statistics")
!         stats = s.GetStats(use_html=True)
          stats = self._buildBox("Statistics", None, "<br/><br/>".join(stats))
          self.write(stats)

Index: message.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/message.py,v
retrieving revision 1.62
retrieving revision 1.63
diff -C2 -d -r1.62 -r1.63
*** message.py	17 Dec 2004 01:23:36 -0000	1.62
--- message.py	21 Dec 2004 21:37:06 -0000	1.63
***************
*** 83,86 ****
--- 83,87 ----
  import sys
  import types
+ import time
  import math
  import re
***************
*** 111,114 ****
--- 112,117 ----
  CRLF_RE = re.compile(r'\r\n|\r|\n')
  
+ STATS_START_KEY = "Statistics start date"
+ 
  class MessageInfoBase(object):
      def __init__(self, db_name):
***************
*** 118,121 ****
--- 121,134 ----
          return len(self.db)
  
+     def get_statistics_start_date(self):
+         if STATS_START_KEY in self.db:
+             return self.db[STATS_START_KEY]
+         else:
+             return None
+ 
+     def set_statistics_start_date(self, date):
+         self.db[STATS_START_KEY] = date
+         self.store()
+ 
      def load_msg(self, msg):
          if self.db is not None:
***************
*** 150,154 ****
                          return
                      else:
!                         print >> sys.stderr, "Unknown message info type"
                          sys.exit(1)
                  for att, val in attributes:
--- 163,168 ----
                          return
                      else:
!                         print >> sys.stderr, "Unknown message info type", \
!                               attributes
                          sys.exit(1)
                  for att, val in attributes:
***************
*** 157,161 ****
      def store_msg(self, msg):
          if self.db is not None:
!             attributes = []
              for att in msg.stored_attributes:
                  attributes.append((att, getattr(msg, att)))
--- 171,175 ----
      def store_msg(self, msg):
          if self.db is not None:
!             attributes = [("date_modified", time.time())]
              for att in msg.stored_attributes:
                  attributes.append((att, getattr(msg, att)))
***************
*** 264,273 ****
      '''An email.Message.Message extended for SpamBayes'''
  
!     def __init__(self):
          email.Message.Message.__init__(self)
  
          # persistent state
!         nm, typ = database_type()
!         self.message_info_db = open_storage(nm, typ)
          self.stored_attributes = ['c', 't',]
          self.getDBKey = self.getId
--- 278,291 ----
      '''An email.Message.Message extended for SpamBayes'''
  
!     def __init__(self, id=None, message_info_db=None):
          email.Message.Message.__init__(self)
  
          # persistent state
!         # (non-persistent state includes all of email.Message.Message state)
!         if message_info_db is not None:
!             self.message_info_db = message_info_db
!         else:
!             nm, typ = database_type()
!             self.message_info_db = open_storage(nm, typ)
          self.stored_attributes = ['c', 't',]
          self.getDBKey = self.getId
***************
*** 276,280 ****
          self.t = None
  
!         # non-persistent state includes all of email.Message.Message state
  
      # This function (and it's hackishness) can be avoided by using
--- 294,299 ----
          self.t = None
  
!         if id is not None:
!             self.setId(id)
  
      # This function (and it's hackishness) can be avoided by using
***************
*** 315,318 ****
--- 334,340 ----
              raise TypeError, "Id must be a string"
  
+         if id == STATS_START_KEY:
+             raise ValueError, "MsgId must not be" + STATS_START_KEY
+ 
          self.id = id
          self.message_info_db.load_msg(self)



More information about the Spambayes-checkins mailing list