[Spambayes-checkins] spambayes mboxutils.py,1.3,1.4

Skip Montanaro montanaro@users.sourceforge.net
Wed Nov 6 01:57:42 2002


Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv12413

Modified Files:
	mboxutils.py 
Log Message:
Add get_message() factory function ripped from
tokenizer.Tokenizer.get_message().  Replace usage of _factory() with it.



Index: mboxutils.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/mboxutils.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** mboxutils.py	27 Oct 2002 21:35:00 -0000	1.3
--- mboxutils.py	6 Nov 2002 01:57:39 -0000	1.4
***************
*** 24,27 ****
--- 24,28 ----
  import email
  import mailbox
+ import email.Message
  
  class DirOfTxtFileMailbox:
***************
*** 44,54 ****
              f.close()
  
- def _factory(fp):
-     # Helper for getmbox
-     try:
-         return email.message_from_file(fp)
-     except email.Errors.MessageParseError:
-         return ''
- 
  def _cat(seqs):
      for seq in seqs:
--- 45,48 ----
***************
*** 74,78 ****
          for name in names:
              filename = os.path.join(mhpath, name)
!             mbox = mailbox.MHMailbox(filename, _factory)
              mboxes.append(mbox)
          if len(mboxes) == 1:
--- 68,72 ----
          for name in names:
              filename = os.path.join(mhpath, name)
!             mbox = mailbox.MHMailbox(filename, get_message)
              mboxes.append(mbox)
          if len(mboxes) == 1:
***************
*** 85,95 ****
          # if the pathname contains /Mail/, else a DirOfTxtFileMailbox.
          if os.path.exists(os.path.join(name, 'cur')):
!             mbox = mailbox.Maildir(name, _factory)
          elif name.find("/Mail/") >= 0:
!             mbox = mailbox.MHMailbox(name, _factory)
          else:
!             mbox = DirOfTxtFileMailbox(name, _factory)
      else:
          fp = open(name, "rb")
!         mbox = mailbox.PortableUnixMailbox(fp, _factory)
      return iter(mbox)
--- 79,120 ----
          # if the pathname contains /Mail/, else a DirOfTxtFileMailbox.
          if os.path.exists(os.path.join(name, 'cur')):
!             mbox = mailbox.Maildir(name, get_message)
          elif name.find("/Mail/") >= 0:
!             mbox = mailbox.MHMailbox(name, get_message)
          else:
!             mbox = DirOfTxtFileMailbox(name, get_message)
      else:
          fp = open(name, "rb")
!         mbox = mailbox.PortableUnixMailbox(fp, get_message)
      return iter(mbox)
+ 
+ def get_message(obj):
+     """Return an email Message object.
+ 
+     The argument may be a Message object already, in which case it's
+     returned as-is.
+ 
+     If the argument is a string or file-like object (supports read()),
+     the email package is used to create a Message object from it.  This
+     can fail if the message is malformed.  In that case, the headers
+     (everything through the first blank line) are thrown out, and the
+     rest of the text is wrapped in a bare email.Message.Message.
+     """
+ 
+     if isinstance(obj, email.Message.Message):
+         return obj
+     # Create an email Message object.
+     if hasattr(obj, "read"):
+         obj = obj.read()
+     try:
+         msg = email.message_from_string(obj)
+     except email.Errors.MessageParseError:
+         # Wrap the raw text in a bare Message object.  Since the
+         # headers are most likely damaged, we can't use the email
+         # package to parse them, so just get rid of them first.
+         i = obj.find('\n\n')
+         if i >= 0:
+             obj = obj[i+2:]     # strip headers
+         msg = email.Message.Message()
+         msg.set_payload(obj)
+     return msg