maildir->mbox conversion script review

matej matej.cepl at gmail.com
Wed Jan 10 00:57:50 CET 2007


Hi,

I am writing a script to convert couple of thousand emails (in couple
of hundred folders) and before I will get to the hard part -- maintaing
structure folders and subfolders, and maintaing record of the status of
the message, I would like to be sure that I have at least maildir->mbox
conversion right. Could anybody comment on the below shown code please?
Thanks a lot

Matěj

---------------------------------------------------------------------------------------------------------------------
#!/usr/bin/env python
"""mdir2mbx: yet another maildir -> mbox converter

  mdir2mbx [maildirName] [mboxName]

  TODO:
      * convert all (or as many as possible) status flags from KMail
        to Thunderbird.
      * testing, testing, testing
"""
__version__ = "$Revision: 1.2 $"
__author__ = "Matej Cepl <mcepl at redhat.com>"
__copyright__ = "(C) 2007 Matej Cepl. MIT/X11."
__date__ = "$Date: 2007/01/08 23:56:29 $"
___contributors__ = []

import email, email.Errors, email.Header, email.Generator, mailbox
import codecs, sys, cStringIO

class Mailbox(mailbox.UnixMailbox):
   def __init__(self,filename):

mailbox.UnixMailbox.__init__(self,filename,email.message_from_file)
      self.boxname=filename
      self.content = ""

   def add(self,msg):
      fp = cStringIO.StringIO()
      g = email.Generator.Generator(fp, mangle_from_=True,
maxheaderlen=65)
      g.flatten(msg,unixfrom=True)
      self.content += "%s\n\n" % fp.getvalue()

   def write(self):
      outfile=file(self.boxname,"wb")
      outfile.write("%s\n" % self.content)
      outfile.close()

class MyMaildir(mailbox.Maildir):
   def __init__(self,dirname):
      mailbox.Maildir.__init__(self,dirname,email.message_from_file)
      self.dirname = dirname
      self.decfunc = email.Header.decode_header
      self.msg = ""

   def __translateHeader(self,headerName):
      header = email.Header.decode_header(self.msg[headerName])
      string = header[0][0]
      encoding = header[0][1]
      if not(encoding):
         encoding = "ascii"
      outstr = string.decode(encoding,'ignore')
      return outstr

   def listHeaders(self):
      for self.msg in self:
         hdrfrom = self.__translateHeader("From")
         #hdrto = self.__translateHeader("To")
         hdrdate = self.__translateHeader("Date")
         hdrsubject = self.__translateHeader("Subject")
         print "%s;%s;%s" % (hdrfrom,hdrdate,hdrsubject)
         #header =
email.Header.decode_header(self.msg["Message-Id"])[0][0]
         #print "%s;%s" % (self.dirname,header)

   def writeMBox(self,filename):
      mbox = Mailbox(filename)
      for self.msg in self:
         mbox.add(self.msg)
      mbox.write()

if __name__=="__main__":
   obj = MyMaildir(sys.argv[1])
   obj.writeMBox(sys.argv[2])




More information about the Python-list mailing list