[Spambayes-checkins] spambayes/Outlook2000 addin.py,1.33,1.34 msgstore.py,1.30,1.31

Mark Hammond mhammond@users.sourceforge.net
Thu Nov 14 07:04:48 2002


Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory usw-pr-cvs1:/tmp/cvs-serv14133

Modified Files:
	addin.py msgstore.py 
Log Message:
Process all missed messages at startup.  "missed" is defined as both unread,
and missing our "Spam" field.  This should be quite fast (unless, of 
course, if finds your huge folder is all unread and unscored!)


Index: addin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v
retrieving revision 1.33
retrieving revision 1.34
diff -C2 -d -r1.33 -r1.34
*** addin.py	14 Nov 2002 02:52:50 -0000	1.33
--- addin.py	14 Nov 2002 07:04:45 -0000	1.34
***************
*** 124,127 ****
--- 124,153 ----
  # Whew - we seem to have all the COM support we need - let's rock!
  
+ # Function to filter a message - note it is a msgstore msg, not an
+ # outlook one
+ def ProcessMessage(msgstore_message, manager):
+     if msgstore_message.GetField(manager.config.field_score_name) is not None:
+         # Already seem this message - user probably moving it back
+         # after incorrect classification.
+         # If enabled, re-train as Ham
+         # otherwise just ignore.
+         if manager.config.training.train_recovered_spam:
+             subject = msgstore_message.GetSubject()
+             import train
+             print "Training on message '%s' - " % subject,
+             if train.train_message(msgstore_message, False, manager, rescore = True):
+                 print "trained as good"
+             else:
+                 print "already was trained as good"
+             assert train.been_trained_as_ham(msgstore_message, manager)
+         return
+     if manager.config.filter.enabled:
+         import filter
+         disposition = filter.filter_message(msgstore_message, manager)
+         print "Message '%s' had a Spam classification of '%s'" \
+               % (msgstore_message.GetSubject(), disposition)
+     else:
+         print "Spam filtering is disabled - ignoring new message"
+ 
  # Button/Menu and other UI event handler classes
  class ButtonEvent:
***************
*** 157,182 ****
          #     PR_TRANSPORT_MESSAGE_HEADERS
          msgstore_message = self.manager.message_store.GetMessage(item)
!         if msgstore_message.GetField(self.manager.config.field_score_name) is not None:
!             # Already seem this message - user probably moving it back
!             # after incorrect classification.
!             # If enabled, re-train as Ham
!             # otherwise just ignore.
!             if self.manager.config.training.train_recovered_spam:
!                 subject = item.Subject.encode("mbcs", "replace")
!                 import train
!                 print "Training on message '%s' - " % subject,
!                 if train.train_message(msgstore_message, False, self.manager, rescore = True):
!                     print "trained as good"
!                 else:
!                     print "already was trained as good"
!                 assert train.been_trained_as_ham(msgstore_message, self.manager)
!             return
!         if self.manager.config.filter.enabled:
!             import filter
!             disposition = filter.filter_message(msgstore_message, self.manager)
!             print "Message '%s' had a Spam classification of '%s'" \
!                   % (item.Subject.encode("ascii", "replace"), disposition)
!         else:
!             print "Spam filtering is disabled - ignoring new message"
  
  # Event fired when item moved into the Spam folder.
--- 183,187 ----
          #     PR_TRANSPORT_MESSAGE_HEADERS
          msgstore_message = self.manager.message_store.GetMessage(item)
!         ProcessMessage(msgstore_message, self.manager)
  
  # Event fired when item moved into the Spam folder.
***************
*** 458,462 ****
              self.explorer_events.OnFolderSwitch()
  
!             # The main tool-bar dropdown with all out entries.
              # Add a pop-up menu to the toolbar
              popup = toolbar.Controls.Add(
--- 463,467 ----
              self.explorer_events.OnFolderSwitch()
  
!             # The main tool-bar dropdown with all our entries.
              # Add a pop-up menu to the toolbar
              popup = toolbar.Controls.Add(
***************
*** 482,485 ****
--- 487,497 ----
  
          self.FiltersChanged()
+         if self.manager.config.filter.enabled:
+             try:
+                 self.ProcessMissedMessages()
+             except:
+                 print "Error processing missed messages!"
+                 import traceback
+                 traceback.print_exc()
  
      def _AddPopup(self, parent, target, target_args, **item_attrs):
***************
*** 491,494 ****
--- 503,524 ----
              setattr(item, attr, val)
          self.buttons.append(item)
+ 
+     def ProcessMissedMessages(self):
+         # This could possibly spawn threads if it was too slow!
+         from time import clock
+         config = self.manager.config.filter
+         manager = self.manager
+         field_name = manager.config.field_score_name
+         for folder in manager.message_store.GetFolderGenerator(
+                                     config.watch_folder_ids,
+                                     config.watch_include_sub):
+             num = 0
+             start = clock()
+             for message in folder.GetNewUnscoredMessageGenerator(field_name):
+                 ProcessMessage(message, manager)
+                 num += 1
+             # See if perf hurts anyone too much.
+             print "Processing %d missed spam in folder '%s' took %gms" \
+                   % (num, folder.name, clock()-start*1000)
  
      def FiltersChanged(self):

Index: msgstore.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/msgstore.py,v
retrieving revision 1.30
retrieving revision 1.31
diff -C2 -d -r1.30 -r1.31
*** msgstore.py	14 Nov 2002 03:59:21 -0000	1.30
--- msgstore.py	14 Nov 2002 07:04:45 -0000	1.31
***************
*** 58,61 ****
--- 58,64 ----
          # should get their own methods
          raise NotImplementedError
+     def GetSubject(self):
+         # Get the subject - function as it may require a trip to the store!
+         raise NotImplementedError
      def GetField(self, name):
          # Abstractly get a user field name/id to a field value.
***************
*** 292,295 ****
--- 295,326 ----
                                        item_id, row[1][1], row[2][1])
  
+     def GetNewUnscoredMessageGenerator(self, scoreFieldName):
+         folder = self.msgstore._OpenEntry(self.id)
+         table = folder.GetContentsTable(0)
+         # Resolve the field name
+         resolve_props = ( (mapi.PS_PUBLIC_STRINGS, "Spam"), )
+         resolve_ids = folder.GetIDsFromNames(resolve_props, 0)
+         field_id = PROP_TAG( PT_I4, PROP_ID(resolve_ids[0]))
+         # Setup the properties we want to read.
+         prop_ids = PR_ENTRYID, PR_SEARCH_KEY, PR_CONTENT_UNREAD
+         table.SetColumns(prop_ids, 0)
+         # Set up the restriction
+         prop_restriction = (mapi.RES_PROPERTY,   # a property restriction
+                                (mapi.RELOP_EQ,      # check for equality
+                                 PR_CONTENT_UNREAD,   # of the unread flag
+                                 (PR_CONTENT_UNREAD, True))
+                             )
+         exist_restriction = mapi.RES_EXIST, (field_id,)
+         not_exist_restriction = mapi.RES_NOT, (exist_restriction,)
+         restriction = (mapi.RES_AND, (prop_restriction, not_exist_restriction))
+         table.Restrict(restriction, 0)
+         while 1:
+             rows = table.QueryRows(70, 0)
+             if len(rows) == 0:
+                 break
+             for row in rows:
+                 item_id = self.id[0], row[0][1] # assume in same store as folder!
+                 yield MAPIMsgStoreMsg(self.msgstore, self,
+                                       item_id, row[1][1], row[2][1])
  
  class MAPIMsgStoreMsg(MsgStoreMsg):
***************
*** 299,302 ****
--- 330,334 ----
          self.mapi_object = None
          self.id = entryid
+         self.subject = None
          # Search key is the only reliable thing after a move/copy operation
          # only problem is that it can potentially be changed - however, the
***************
*** 313,317 ****
          else:
              urs = "unread"
!         return "<%s, (%s) id=%s/%s>" % (self.__class__.__name__,
                                       urs,
                                       mapi.HexFromBin(self.id[0]),
--- 345,350 ----
          else:
              urs = "unread"
!         return "<%s, '%s' (%s) id=%s/%s>" % (self.__class__.__name__,
!                                      self.GetSubject(),
                                       urs,
                                       mapi.HexFromBin(self.id[0]),
***************
*** 329,332 ****
--- 362,370 ----
      def GetID(self):
          return mapi.HexFromBin(self.id[0]), mapi.HexFromBin(self.id[1])
+ 
+     def GetSubject(self):
+         if self.subject is None:
+             self.subject = self.GetField(PR_SUBJECT_A,)
+         return self.subject
  
      def GetOutlookItem(self):





More information about the Spambayes-checkins mailing list