[Spambayes-checkins] spambayes mboxutils.py,1.3,1.4
Skip Montanaro
montanaro@users.sourceforge.net
Wed Nov 6 01:57:42 2002
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv12413
Modified Files:
mboxutils.py
Log Message:
Add get_message() factory function ripped from
tokenizer.Tokenizer.get_message(). Replace usage of _factory() with it.
Index: mboxutils.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/mboxutils.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** mboxutils.py 27 Oct 2002 21:35:00 -0000 1.3
--- mboxutils.py 6 Nov 2002 01:57:39 -0000 1.4
***************
*** 24,27 ****
--- 24,28 ----
import email
import mailbox
+ import email.Message
class DirOfTxtFileMailbox:
***************
*** 44,54 ****
f.close()
- def _factory(fp):
- # Helper for getmbox
- try:
- return email.message_from_file(fp)
- except email.Errors.MessageParseError:
- return ''
-
def _cat(seqs):
for seq in seqs:
--- 45,48 ----
***************
*** 74,78 ****
for name in names:
filename = os.path.join(mhpath, name)
! mbox = mailbox.MHMailbox(filename, _factory)
mboxes.append(mbox)
if len(mboxes) == 1:
--- 68,72 ----
for name in names:
filename = os.path.join(mhpath, name)
! mbox = mailbox.MHMailbox(filename, get_message)
mboxes.append(mbox)
if len(mboxes) == 1:
***************
*** 85,95 ****
# if the pathname contains /Mail/, else a DirOfTxtFileMailbox.
if os.path.exists(os.path.join(name, 'cur')):
! mbox = mailbox.Maildir(name, _factory)
elif name.find("/Mail/") >= 0:
! mbox = mailbox.MHMailbox(name, _factory)
else:
! mbox = DirOfTxtFileMailbox(name, _factory)
else:
fp = open(name, "rb")
! mbox = mailbox.PortableUnixMailbox(fp, _factory)
return iter(mbox)
--- 79,120 ----
# if the pathname contains /Mail/, else a DirOfTxtFileMailbox.
if os.path.exists(os.path.join(name, 'cur')):
! mbox = mailbox.Maildir(name, get_message)
elif name.find("/Mail/") >= 0:
! mbox = mailbox.MHMailbox(name, get_message)
else:
! mbox = DirOfTxtFileMailbox(name, get_message)
else:
fp = open(name, "rb")
! mbox = mailbox.PortableUnixMailbox(fp, get_message)
return iter(mbox)
+
+ def get_message(obj):
+ """Return an email Message object.
+
+ The argument may be a Message object already, in which case it's
+ returned as-is.
+
+ If the argument is a string or file-like object (supports read()),
+ the email package is used to create a Message object from it. This
+ can fail if the message is malformed. In that case, the headers
+ (everything through the first blank line) are thrown out, and the
+ rest of the text is wrapped in a bare email.Message.Message.
+ """
+
+ if isinstance(obj, email.Message.Message):
+ return obj
+ # Create an email Message object.
+ if hasattr(obj, "read"):
+ obj = obj.read()
+ try:
+ msg = email.message_from_string(obj)
+ except email.Errors.MessageParseError:
+ # Wrap the raw text in a bare Message object. Since the
+ # headers are most likely damaged, we can't use the email
+ # package to parse them, so just get rid of them first.
+ i = obj.find('\n\n')
+ if i >= 0:
+ obj = obj[i+2:] # strip headers
+ msg = email.Message.Message()
+ msg.set_payload(obj)
+ return msg