[Spambayes-checkins] spambayes/Outlook2000 config.py, 1.16, 1.17 addin.py, 1.78, 1.79

Mark Hammond mhammond at users.sourceforge.net
Sun Jul 27 17:49:34 EDT 2003


Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1:/tmp/cvs-serv17738

Modified Files:
	config.py addin.py 
Log Message:
Add a new experimental 'timer'.  This should prevent the addin from ever
missing messages, and should also prevent SpamBayes from upsetting the
builtin filters.

NOTE: You need a new win32all to enable the timer (but no need to upgrade
if you don't want to use it.


Index: config.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/config.py,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** config.py	20 Jul 2003 14:25:42 -0000	1.16
--- config.py	27 Jul 2003 23:49:32 -0000	1.17
***************
*** 108,111 ****
--- 108,136 ----
          INTEGER, RESTORE),
      ),
+     # Experimental options may change, may get removed, and *will* get moved
+     # should they be kept.
+     "Experimental" : (
+         ("timer_start_delay", "The interval, (in ms) before the timer starts.", 0,
+             """Once a new item is received in the inbox, SpamBayes will begin
+             processing messages after the given delay.  If a new message arrives
+             during this period, the timer will be reset and delay will start again.""",
+             INTEGER, RESTORE),
+         ("timer_interval", "The interval between subsequent timer checks (in ms)", 1000,
+             """Once the a new message timer found a new message, how long should
+             SpamBayes wait before checking for another new message, assuming no
+             other new messages arrive.  Should a new message arrive during this
+             process, the timer will reset, meaning that timer_start_delay will
+             elapse before the process begins again.""",
+             INTEGER, RESTORE),
+         ("timer_only_receive_folders",
+             "Should the timer only be used for 'Inbox' type folders", True,
+             """The point of using a timer is to prevent the SpamBayes filter
+             getting in the way the builtin Outlook rules.  Therefore, is it 
+             generally only necessary to use a timer for folders that have new
+             items being delivered directly to them.  Folders that are not inbox
+             style folders generally are not subject to builtin filtering, so
+             generally have no problems filtering messages in 'real time'.""",
+             BOOLEAN, RESTORE),
+     ),
      "Training" : (
      (FolderIDOption,

Index: addin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v
retrieving revision 1.78
retrieving revision 1.79
diff -C2 -d -r1.78 -r1.79
*** addin.py	26 Jul 2003 00:57:35 -0000	1.78
--- addin.py	27 Jul 2003 23:49:32 -0000	1.79
***************
*** 47,50 ****
--- 47,52 ----
  import win32gui, win32con, win32clipboard # for button images!
  
+ import timer, thread
+ 
  toolbar_name = "SpamBayes"
  
***************
*** 176,180 ****
      # I considered checking if the "save spam score" option is enabled - but
      # even when enabled, this sometimes fails (IMAP, hotmail)
!     # Best we ca do not is to assume if it is read, we have seen it.
      return msgstore_message.GetReadState()
  
--- 178,182 ----
      # I considered checking if the "save spam score" option is enabled - but
      # even when enabled, this sometimes fails (IMAP, hotmail)
!     # Best we can do now is to assume if it is read, we have seen it.
      return msgstore_message.GetReadState()
  
***************
*** 227,256 ****
  class _BaseItemsEvent:
      def Init(self, target, application, manager):
          self.application = application
          self.manager = manager
          self.target = target
      def Close(self):
          self.application = self.manager = self.target = None
  
- class FolderItemsEvent(_BaseItemsEvent):
      def OnItemAdd(self, item):
-         # Note:  There's no distinction made here between msgs that have
-         # been received, and, e.g., msgs that were sent and moved from the
-         # Sent Items folder.  It would be good not to train on the latter,
-         # since it's simply not received email.  An article on the web said
-         # the distinction can't be made with 100% certainty, but that a good
-         # heuristic is to believe that a msg has been received iff at least
-         # one of these properties has a sensible value:
-         #     PR_RECEIVED_BY_EMAIL_ADDRESS
-         #     PR_RECEIVED_BY_NAME
-         #     PR_RECEIVED_BY_ENTRYID
-         #     PR_TRANSPORT_MESSAGE_HEADERS
          # Callback from Outlook - locale may have changed.
          locale.setlocale(locale.LC_NUMERIC, "C") # see locale comments above
          self.manager.LogDebug(2, "OnItemAdd event for folder", self,
                                "with item", item)
!         msgstore_message = self.manager.message_store.GetMessage(item)
!         if msgstore_message is not None:
!             ProcessMessage(msgstore_message, self.manager)
  
  # Event fired when item moved into the Spam folder.
--- 229,356 ----
  class _BaseItemsEvent:
      def Init(self, target, application, manager):
+         self.owner_thread_ident = thread.get_ident() # check we arent multi-threaded
          self.application = application
          self.manager = manager
          self.target = target
+         self.use_timer = False
+     def ReInit(self):
+         pass
      def Close(self):
          self.application = self.manager = self.target = None
+         self.close() # the events
+ 
+ class HamFolderItemsEvent(_BaseItemsEvent):
+     def Init(self, *args):
+         _BaseItemsEvent.Init(self, *args)
+         
+         start_delay = self.manager.config.experimental.timer_start_delay
+         interval = self.manager.config.experimental.timer_interval
+         use_timer = start_delay and interval
+         if use_timer:
+             # The user wants to use a timer - see if we should only enable
+             # the timer for known 'inbox' folders, or for all watched folders.
+             is_inbox = self.target.IsReceiveFolder()
+             if not is_inbox and not self.manager.config.experimental.timer_only_receive_folders:
+                 use_timer = False
+ 
+         if use_timer and not hasattr(timer, "__version__"):
+             # No binaries will see this.
+             print "*" * 50
+             print "SORRY: You have tried to enable the timer, but you have a"
+             print "leaky version of the 'timer' module.  These leaks prevent"
+             print "Outlook from shutting down.  Please update win32all to post 154"
+             print "The timer is NOT enabled..."
+             print "*" * 50
+             use_timer = False
+ 
+         # Good chance someone will assume timer is seconds, not ms.
+         if use_timer and (start_delay < 500 or interval < 500):
+             print "*" * 50
+             print "The timer is configured to fire way too often " \
+                   "(delay=%s milliseconds, interval=%s milliseconds)" \
+                   % (start_delay, interval)
+             print "This is very high, and is likely to starve Outlook and the "
+             print "SpamBayes addin.  Please adjust your configuration"
+             print "The timer is NOT enabled..."
+             print "*" * 50
+             use_timer = False
+ 
+         self.use_timer = use_timer
+         self.timer_id = None
+ 
+     def ReInit(self):
+         # We may have swapped between timer and non-timer.
+         if self.use_timer:
+             self._KillTimer()
+         self.Init(self, self.target, self.application, self.manager)
+ 
+     def Close(self, *args):
+         self._KillTimer()
+         _BaseItemsEvent.Close(self, *args)
+     def _DoStartTimer(self, delay):
+         assert thread.get_ident() == self.owner_thread_ident
+         assert self.timer_id is None, "Shouldn't start a timer when already have one"
+         # And start a new timer.
+         assert delay, "No delay means no timer!"
+         self.timer_id = timer.set_timer(delay, self._TimerFunc)
+         self.manager.LogDebug(1, "New message timer started - id=%d, delay=%d" % (self.timer_id, delay))
+ 
+     def _StartTimer(self):
+         # First kill any existing timer
+         self._KillTimer()
+         # And start a new timer.
+         delay = self.manager.config.experimental.timer_start_delay
+         field_name = self.manager.config.general.field_score_name
+         self.timer_generator = self.target.GetNewUnscoredMessageGenerator(field_name)
+         self._DoStartTimer(delay)
+ 
+     def _KillTimer(self):
+         assert thread.get_ident() == self.owner_thread_ident
+         if self.timer_id is not None:
+             self.manager.LogDebug(2, "The timer with id=%d was stopped" % self.timer_id)
+             timer.kill_timer(self.timer_id)
+             self.timer_id = None
+ 
+     def _TimerFunc(self, event, time):
+         # Kill the timer first
+         assert thread.get_ident() == self.owner_thread_ident
+         self._KillTimer()
+         assert self.timer_generator, "Can't have a timer with no generator"
+         # Callback from Outlook - locale may have changed.
+         locale.setlocale(locale.LC_NUMERIC, "C") # see locale comments above
+         # Find a single to item process
+         # If we did manage to process one, start a new timer.
+         # If we didn't, we are done and can wait until some external
+         # event triggers a new timer.
+         try:
+             item = self.timer_generator.next()
+         except StopIteration:
+             # No items left in our generator
+             self.timer_generator = None
+             self.manager.LogDebug(1, "The new message timer found no new items, so is stopping")
+         else:
+             # We have an item to process - do it.
+             try:
+                 ProcessMessage(item, self.manager)
+             finally:
+                 # And setup the timer for the next check.
+                 delay = self.manager.config.experimental.timer_interval
+                 self._DoStartTimer(delay)
  
      def OnItemAdd(self, item):
          # Callback from Outlook - locale may have changed.
          locale.setlocale(locale.LC_NUMERIC, "C") # see locale comments above
          self.manager.LogDebug(2, "OnItemAdd event for folder", self,
                                "with item", item)
!         # Due to the way our "missed message" indicator works, we do
!         # a quick check here for "UnRead".  If UnRead, we assume it is very
!         # new and use our timer.  If not unread, we know our missed message
!         # generator would miss it, so we process it synchronously.
!         if not self.use_timer or not item.UnRead:
!             msgstore_message = self.manager.message_store.GetMessage(item)
!             if msgstore_message is not None:
!                 ProcessMessage(msgstore_message, self.manager)
!         else:
!             self._StartTimer()
  
  # Event fired when item moved into the Spam folder.
***************
*** 916,927 ****
                                      config.watch_folder_ids,
                                      config.watch_include_sub):
!             num = 0
!             start = clock()
!             for message in folder.GetNewUnscoredMessageGenerator(field_name):
!                 ProcessMessage(message, manager)
!                 num += 1
!             # See if perf hurts anyone too much.
!             print "Processing %d missed spam in folder '%s' took %gms" \
!                   % (num, folder.name, (clock()-start)*1000)
  
      def FiltersChanged(self):
--- 1016,1033 ----
                                      config.watch_folder_ids,
                                      config.watch_include_sub):
!             event_hook = self._GetHookForFolder(folder)
!             if event_hook.use_timer:
!                 print "Processing missed spam in folder '%s' by starting a timer" \
!                       % (folder.name,)
!                 event_hook._StartTimer()
!             else:
!                 num = 0
!                 start = clock()
!                 for message in folder.GetNewUnscoredMessageGenerator(field_name):
!                     ProcessMessage(message, manager)
!                     num += 1
!                 # See if perf hurts anyone too much.
!                 print "Processing %d missed spam in folder '%s' took %gms" \
!                       % (num, folder.name, (clock()-start)*1000)
  
      def FiltersChanged(self):
***************
*** 939,943 ****
              self._HookFolderEvents(config.watch_folder_ids,
                                     config.watch_include_sub,
!                                    FolderItemsEvent)
              )
          # For spam manually moved
--- 1045,1049 ----
              self._HookFolderEvents(config.watch_folder_ids,
                                     config.watch_include_sub,
!                                    HamFolderItemsEvent)
              )
          # For spam manually moved
***************
*** 950,956 ****
          for k in self.folder_hooks.keys():
              if not new_hooks.has_key(k):
!                 self.folder_hooks[k]._obj_.close()
          self.folder_hooks = new_hooks
  
      def _HookFolderEvents(self, folder_ids, include_sub, HandlerClass):
          new_hooks = {}
--- 1056,1067 ----
          for k in self.folder_hooks.keys():
              if not new_hooks.has_key(k):
!                 self.folder_hooks[k].Close()
          self.folder_hooks = new_hooks
  
+     def _GetHookForFolder(self, folder):
+         ret = self.folder_hooks[folder.id]
+         assert ret.target == folder
+         return ret
+ 
      def _HookFolderEvents(self, folder_ids, include_sub, HandlerClass):
          new_hooks = {}
***************
*** 960,964 ****
              if existing is None or existing.__class__ != HandlerClass:
                  folder = msgstore_folder.GetOutlookItem()
!                 name = folder.Name.encode("mbcs", "replace")
                  try:
                      new_hook = DispatchWithEvents(folder.Items, HandlerClass)
--- 1071,1075 ----
              if existing is None or existing.__class__ != HandlerClass:
                  folder = msgstore_folder.GetOutlookItem()
!                 name = msgstore_folder.name
                  try:
                      new_hook = DispatchWithEvents(folder.Items, HandlerClass)
***************
*** 967,971 ****
                      new_hook = None
                  if new_hook is not None:
!                     new_hook.Init(folder, self.application, self.manager)
                      new_hooks[msgstore_folder.id] = new_hook
                      self.manager.EnsureOutlookFieldsForFolder(msgstore_folder.GetID())
--- 1078,1082 ----
                      new_hook = None
                  if new_hook is not None:
!                     new_hook.Init(msgstore_folder, self.application, self.manager)
                      new_hooks[msgstore_folder.id] = new_hook
                      self.manager.EnsureOutlookFieldsForFolder(msgstore_folder.GetID())
***************
*** 973,981 ****
              else:
                  new_hooks[msgstore_folder.id] = existing
          return new_hooks
  
      def OnDisconnection(self, mode, custom):
          print "SpamBayes - Disconnecting from Outlook"
!         self.folder_hooks = None
          self.application = None
          self.explorers_events = None
--- 1084,1096 ----
              else:
                  new_hooks[msgstore_folder.id] = existing
+                 exiting.ReInit()
          return new_hooks
  
      def OnDisconnection(self, mode, custom):
          print "SpamBayes - Disconnecting from Outlook"
!         if self.folder_hooks:
!             for hook in self.folder_hooks.values():
!                 hook.Close()
!             self.folder_hooks = None
          self.application = None
          self.explorers_events = None





More information about the Spambayes-checkins mailing list