[Spambayes] Problems with unheader.py

Greg Ward gward@python.net
Tue, 24 Sep 2002 21:50:02 -0400


On 24 September 2002, I said:
> #1 is easy to fix -- just remove the ability to read from stdin.  Anyone
> care?  It'll make my patch to add Maildir simpler.

Here's my patch to add Maildir support to unheader.py, and remove
read-from-stdin support at the same time.  I'll probably check it in
once I'm convinced it works, unless someone objects.

----------------------------------------------------------------------
--- unheader.py	24 Sep 2002 17:59:58 -0000	1.4
+++ unheader.py	25 Sep 2002 01:47:53 -0000
@@ -2,9 +2,12 @@
 
 import re
 import sys
+import os
+import glob
 import mailbox
 import email.Parser
 import email.Message
+import email.Generator
 import getopt
 
 def unheader(msg, pat):
@@ -55,14 +58,32 @@
             msg.set_payload("\n".join(newbody))
     unheader(msg, "X-Spam-")
 
+def process_message(msg, dosa, pats):
+    if pats is not None:
+        unheader(msg, pats)
+    if dosa:
+        deSA(msg)
+
 def process_mailbox(f, dosa=1, pats=None):
     for msg in mailbox.PortableUnixMailbox(f, Parser().parse):
-        if pats is not None:
-            unheader(msg, pats)
-        if dosa:
-            deSA(msg)
+        process_message(msg, dosa, pats)
         print msg
 
+def process_maildir(d, dosa=1, pats=None):
+    parser = Parser()
+    for fn in glob.glob(os.path.join(d, "cur", "*")):
+        print ("reading from %s..." % fn),
+        file = open(fn)
+        msg = parser.parse(file)
+        process_message(msg, dosa, pats)
+
+        tmpfn = os.path.join(d, "tmp", os.path.basename(fn))
+        tmpfile = open(tmpfn, "w")
+        print "writing to %s" % tmpfn
+        email.Generator.Generator(tmpfile, maxheaderlen=0)(msg, unixfrom=0)
+
+        #os.rename(tmpfn, fn)
+
 def usage():
     print >> sys.stderr, "usage: unheader.py [ -p pat ... ] [ -s ]"
     print >> sys.stderr, "-p pat gives a regex pattern used to eliminate unwanted headers"
@@ -72,8 +93,9 @@
 def main(args):
     headerpats = []
     dosa = 1
+    ismbox = 1
     try:
-        opts, args = getopt.getopt(args, "p:sh")
+        opts, args = getopt.getopt(args, "p:shd")
     except getopt.GetoptError:
         usage()
         sys.exit(1)
@@ -86,15 +108,19 @@
                 headerpats.append(arg)
             elif opt == "-s":
                 dosa = 0
+            elif opt == "-d":
+                ismbox = 0
         pats = headerpats and "|".join(headerpats) or None
-        if not args:
-            f = sys.stdin
-        elif len(args) == 1:
-            f = file(args[0])
-        else:
+
+        if len(args) != 1:
             usage()
             sys.exit(1)
-        process_mailbox(f, dosa, pats)
+
+        if ismbox:
+            f = file(args[0])
+            process_mailbox(f, dosa, pats)
+        else:
+            process_maildir(args[0], dosa, pats)
 
 if __name__ == "__main__":
     main(sys.argv[1:])

------------------------------------------------------------------------

-- 
Greg Ward <gward@python.net>                         http://www.gerg.ca/
Support bacteria -- it's the only culture some people have!