[Spambayes] Problems with unheader.py
Greg Ward
gward@python.net
Tue, 24 Sep 2002 21:50:02 -0400
On 24 September 2002, I said:
> #1 is easy to fix -- just remove the ability to read from stdin. Anyone
> care? It'll make my patch to add Maildir simpler.
Here's my patch to add Maildir support to unheader.py, and remove
read-from-stdin support at the same time. I'll probably check it in
once I'm convinced it works, unless someone objects.
----------------------------------------------------------------------
--- unheader.py 24 Sep 2002 17:59:58 -0000 1.4
+++ unheader.py 25 Sep 2002 01:47:53 -0000
@@ -2,9 +2,12 @@
import re
import sys
+import os
+import glob
import mailbox
import email.Parser
import email.Message
+import email.Generator
import getopt
def unheader(msg, pat):
@@ -55,14 +58,32 @@
msg.set_payload("\n".join(newbody))
unheader(msg, "X-Spam-")
+def process_message(msg, dosa, pats):
+ if pats is not None:
+ unheader(msg, pats)
+ if dosa:
+ deSA(msg)
+
def process_mailbox(f, dosa=1, pats=None):
for msg in mailbox.PortableUnixMailbox(f, Parser().parse):
- if pats is not None:
- unheader(msg, pats)
- if dosa:
- deSA(msg)
+ process_message(msg, dosa, pats)
print msg
+def process_maildir(d, dosa=1, pats=None):
+ parser = Parser()
+ for fn in glob.glob(os.path.join(d, "cur", "*")):
+ print ("reading from %s..." % fn),
+ file = open(fn)
+ msg = parser.parse(file)
+ process_message(msg, dosa, pats)
+
+ tmpfn = os.path.join(d, "tmp", os.path.basename(fn))
+ tmpfile = open(tmpfn, "w")
+ print "writing to %s" % tmpfn
+ email.Generator.Generator(tmpfile, maxheaderlen=0)(msg, unixfrom=0)
+
+ #os.rename(tmpfn, fn)
+
def usage():
print >> sys.stderr, "usage: unheader.py [ -p pat ... ] [ -s ]"
print >> sys.stderr, "-p pat gives a regex pattern used to eliminate unwanted headers"
@@ -72,8 +93,9 @@
def main(args):
headerpats = []
dosa = 1
+ ismbox = 1
try:
- opts, args = getopt.getopt(args, "p:sh")
+ opts, args = getopt.getopt(args, "p:shd")
except getopt.GetoptError:
usage()
sys.exit(1)
@@ -86,15 +108,19 @@
headerpats.append(arg)
elif opt == "-s":
dosa = 0
+ elif opt == "-d":
+ ismbox = 0
pats = headerpats and "|".join(headerpats) or None
- if not args:
- f = sys.stdin
- elif len(args) == 1:
- f = file(args[0])
- else:
+
+ if len(args) != 1:
usage()
sys.exit(1)
- process_mailbox(f, dosa, pats)
+
+ if ismbox:
+ f = file(args[0])
+ process_mailbox(f, dosa, pats)
+ else:
+ process_maildir(args[0], dosa, pats)
if __name__ == "__main__":
main(sys.argv[1:])
------------------------------------------------------------------------
--
Greg Ward <gward@python.net> http://www.gerg.ca/
Support bacteria -- it's the only culture some people have!