[Spambayes-checkins] spambayes/spambayes oe_mailbox.py,1.3,1.4

Tony Meyer anadelonbrin at users.sourceforge.net
Sun Dec 21 21:28:28 EST 2003


Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv4421/spambayes

Modified Files:
	oe_mailbox.py 
Log Message:
Move OE specific stuff out from UserInterface.py to oe_mailbox.py

Index: oe_mailbox.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/oe_mailbox.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** oe_mailbox.py	16 Dec 2003 05:06:34 -0000	1.3
--- oe_mailbox.py	22 Dec 2003 02:28:26 -0000	1.4
***************
*** 1,2 ****
--- 1,4 ----
+ from __future__ import generators
+ 
  # This module is part of the spambayes project, which is Copyright 2002-3
  # The Python Software Foundation and is covered by the Python Software
***************
*** 7,14 ****
--- 9,25 ----
  
  __author__ = "Romain Guy"
+ __credits__ = "All the SpamBayes folk"
  
  import binascii
  import os
  import struct
+ import msgs
+ import StringIO
+ import sys
+ 
+ if sys.platform == "win32":
+     import win32api
+     import win32con
+     from win32com.shell import shell, shellcon
  
  ###########################################################################
***************
*** 416,419 ****
--- 427,579 ----
          return self.dbxText
  
+ # This started its SpamBayes life as a private method of the UserInterface
+ # class, but is really a general purpose (Outlook Express) function.
+ def convertToMbox(content):
+     """Check if the given buffer is in a non-mbox format, and convert it
+     into mbox format if so.  If it's already an mbox, return it unchanged.
+     """
+ 
+     dbxStream = StringIO.StringIO(content)
+     header = dbxFileHeader(dbxStream)
+ 
+     if header.isValid() and header.isMessages():
+         file_info_len = dbxFileHeader.FH_FILE_INFO_LENGTH
+         fh_entries = dbxFileHeader.FH_ENTRIES
+         fh_ptr = dbxFileHeader.FH_TREE_ROOT_NODE_PTR
+         
+         info = dbxFileInfo(dbxStream, header.getEntry(file_info_len))
+         entries = header.getEntry(fh_entries)
+         address = header.getEntry(fh_ptr)
+         
+         if address and entries:
+             tree = dbxTree(dbxStream, address, entries)
+             dbxBuffer = ""
+ 
+             for i in range(entries):
+                 address = tree.getValue(i)
+                 messageInfo = dbxMessageInfo(dbxStream, address)
+ 
+                 if messageInfo.isIndexed(dbxMessageInfo.MI_MESSAGE_ADDRESS):
+                     address = dbxMessageInfo.MI_MESSAGE_ADDRESS
+                     messageAddress = messageInfo.getValueAsLong(address)
+                     message = dbxMessage(dbxStream, messageAddress)
+ 
+                     # This fakes up a from header to conform to mbox
+                     # standards.  It would be better to extract this
+                     # data from the message itself, as this will
+                     # result in incorrect tokens.
+                     dbxBuffer += "From spambayes at spambayes.org %s\n%s" \
+                                  % (strftime("%a %b %d %H:%M:%S MET %Y",
+                                              gmtime()), message.getText())
+             content = dbxBuffer
+     dbxStream.close()
+     return content
+ 
+ def OEStoreRoot():
+     """Return the path to the Outlook Express Store Root.
+ 
+     Tested with Outlook Express 5.0 with Windows XP."""
+     if sys.platform != "win32":
+         # AFAIK, there is only a Win32 OE, and a Mac OE.
+         # The Mac OE should be easy enough, but I don't know
+         # where the dbx files are stored (I presume they are in the
+         # same format).
+         raise NotImplementedError
+     
+     reg = win32api.RegOpenKeyEx(win32con.HKEY_USERS, "")
+     user_index = 0
+     while True:
+         # Loop through all the users
+         try:
+             user_name = "%s\\Identities" % \
+                         (win32api.RegEnumKey(reg, user_index),)
+         except win32api.error:
+             break
+         user_index += 1
+         try:
+             user_key = win32api.RegOpenKeyEx(win32con.HKEY_USERS, user_name)
+         except win32api.error:
+             # Not this one
+             continue
+ 
+         identity_index = 0
+         while True:
+             # Loop through all the identities
+             try:
+                 identity_name = win32api.RegEnumKey(user_key,
+                                                     identity_index)
+             except win32api.error:
+                 break
+             identity_index += 1
+             
+             subkey_name = "%s\\%s\\%s" % (user_name, identity_name,
+                                           "Software\\Microsoft\\Outlook " \
+                                           "Express\\5.0")
+             try:
+                 subkey = win32api.RegOpenKeyEx(win32con.HKEY_USERS,
+                                                subkey_name, 0,
+                                                win32con.KEY_READ)
+             except win32api.error:
+                 # Not this user
+                 continue
+ 
+             try:
+                 raw = win32api.RegQueryValueEx(subkey, "Store Root")
+             except win32api.error:
+                 break
+             UserDirectory = shell.SHGetFolderPath \
+                             (0, shellcon.CSIDL_LOCAL_APPDATA, 0, 0)
+             raw = raw[0].replace("%UserProfile%\\Local Settings\\" \
+                                  "Application Data", UserDirectory)
+             return raw
+ 
+ ## For use by the test tools.
+ class OEMsg(msgs.Msg):
+     def __init__(self, guts, id):
+         self.tag = id
+         self.guts = guts
+ 
+ # The iterator yields a stream of Msg objects, taken from a list of
+ # dbx files.
+ class OEMsgStream(msgs.MsgStream):
+     def __init__(self, tag, dbxes, keep=None):
+         msgs.MsgStream.__init__(self, tag, dbxes, keep)
+ 
+     def produce(self):
+         if self.keep is None:
+             for dbx in self.directories:
+                 folder = convertToMbox(file(dbx))
+                 all = folder.split("\nFrom ") # XXX Is this right?
+                 count = 0
+                 for msg in all:
+                     id = "%s::%s" % (dbx, count)
+                     count += 1
+                     yield OEMsg(msg, id)
+             return
+         # We only want part of the msgs.  Shuffle each directory list, but
+         # in such a way that we'll get the same result each time this is
+         # called on the same directory list.
+         for directory in self.directories:
+             folder = convertToMbox(file(dbx))
+             all = folder.split("\nFrom ") # XXX Is this right?
+             random.seed(hash(max(all)) ^ SEED) # reproducible across calls
+             random.shuffle(all)
+             del all[self.keep:]
+             all.sort()  # for consistency with MsgStream
+             count = 0
+             for msg in all:
+                 id = "%s::%s" % (dbx, count)
+                 count += 1
+                 yield OEMsg(msg, id)
+ 
+ class OEHamStream(msgs.HamStream):
+     def __init__(self, tag, dbxes, train=0):
+         msgs.HamStream.__init__(self, tag, dbxes, train)
+ 
+ class OESpamStream(msgs.SpamStream):
+     def __init__(self, tag, dbxes, train=0):
+         msgs.SpamStream.__init__(self, tag, dbxes, train)
+ 
+ 
  ###########################################################################
  ## TEST DRIVER
***************
*** 442,446 ****
          sys.exit()
  
!     MAILBOX_DIR = args[0]
  
      files = [os.path.join(MAILBOX_DIR, file) for file in \
--- 602,606 ----
          sys.exit()
  
!     MAILBOX_DIR = args[0]  
  
      files = [os.path.join(MAILBOX_DIR, file) for file in \
***************
*** 468,485 ****
                      tree = dbxTree(dbx, address, entries)
  
!                     for i in range(entries):
!                         address = tree.getValue(i)
!                         messageInfo = dbxMessageInfo(dbx, address)
  
!                         if messageInfo.isIndexed(dbxMessageInfo.MI_MESSAGE_ADDRESS):
!                             messageAddress = messageInfo.getValueAsLong(dbxMessageInfo.MI_MESSAGE_ADDRESS)
!                             message        = dbxMessage(dbx, messageAddress)
  
!                             if print_message:
!                                 print
!                                 print "Message :", messageInfo.getString(dbxMessageInfo.MI_SUBJECT)
!                                 print "=" * (len(messageInfo.getString(dbxMessageInfo.MI_SUBJECT)) + 9)
!                                 print
!                                 print message.getText()
  
          except Exception, (strerror):
--- 628,645 ----
                      tree = dbxTree(dbx, address, entries)
  
!                 for i in range(entries):
!                     address = tree.getValue(i)
!                     messageInfo = dbxMessageInfo(dbx, address)
  
!                     if messageInfo.isIndexed(dbxMessageInfo.MI_MESSAGE_ADDRESS):
!                         messageAddress = messageInfo.getValueAsLong(dbxMessageInfo.MI_MESSAGE_ADDRESS)
!                         message        = dbxMessage(dbx, messageAddress)
  
!                         if print_message:
!                             print
!                             print "Message :", messageInfo.getString(dbxMessageInfo.MI_SUBJECT)
!                             print "=" * (len(messageInfo.getString(dbxMessageInfo.MI_SUBJECT)) + 9)
!                             print
!                             print message.getText()
  
          except Exception, (strerror):





More information about the Spambayes-checkins mailing list