[Spambayes-checkins]
spambayes hammiefilter.py,NONE,1.1 README.txt,1.42,1.43
hammie.py,1.38,1.39 mboxutils.py,1.6,1.7
Neale Pickett
npickett@users.sourceforge.net
Sun Nov 17 03:42:39 2002
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv12012
Modified Files:
README.txt hammie.py mboxutils.py
Added Files:
hammiefilter.py
Log Message:
* WordInfo optimization in hammie.py. If you didn't catch it on the
mail list, this is going to make you dbm file smaller, and
unusable by older hammies.
* hammie.py can now take messages on stdin, but it's ugly. If you
want to do this, you should look at hammiefilter.py
* hammiefilter.py is like hammie jr--it takes a single message on
stdin and either scores it or trains on it.
* Modified README to talk about hammiecli.py and new hammiefilter.py
--- NEW FILE: hammiefilter.py ---
#!/usr/bin/env python
## A hammie front-end to make the simple stuff simple.
##
##
## The intent is to call this from procmail and its ilk like so:
##
## :0 fw
## | hammiefilter.py
##
## Then, you can set up your MUA to pipe ham and spam to it, one at a
## time, by calling it with either the -g or -s options, respectively.
##
## Author: Neale Pickett <neale@woozle.org>
##
"""Usage: %(program)s [option]
Where [option] is one of:
-h
show usage and exit
-n
create a new database
-g
train on stdin as a good (ham) message
-s
train on stdin as a bad (spam) message
If neither -g nor -s is given, stdin will be scored: the same message,
with a new header containing the score, will be send to stdout.
"""
import sys
import getopt
import hammie
from Options import options
# See Options.py for explanations of these properties
DBNAME = options.persistent_storage_file
USEDB = options.persistent_use_database
program = sys.argv[0]
def usage(code, msg=''):
"""Print usage message and sys.exit(code)."""
if msg:
print >> sys.stderr, msg
print >> sys.stderr
print >> sys.stderr, __doc__ % globals()
sys.exit(code)
def hammie_open(mode):
b = hammie.createbayes(DBNAME, USEDB, mode)
return hammie.Hammie(b)
def newdb():
hammie_open('n')
print "Created new database in", DBNAME
def filter():
h = hammie_open('r')
msg = sys.stdin.read()
print h.filter(msg)
def train_ham():
h = hammie_open('w')
msg = sys.stdin.read()
h.train_ham(msg)
h.update_probabilities()
def train_spam():
h = hammie_open('w')
msg = sys.stdin.read()
h.train_spam(msg)
h.update_probabilities()
def main():
action = filter
opts, args = getopt.getopt(sys.argv[1:], 'hngs')
for opt, arg in opts:
if opt == '-h':
usage(0)
elif opt == '-g':
action = train_ham
elif opt == '-s':
action = train_spam
elif opt == "-n":
action = newdb
action()
if __name__ == "__main__":
main()
Index: README.txt
===================================================================
RCS file: /cvsroot/spambayes/spambayes/README.txt,v
retrieving revision 1.42
retrieving revision 1.43
diff -C2 -d -r1.42 -r1.43
*** README.txt 13 Nov 2002 18:13:43 -0000 1.42
--- README.txt 17 Nov 2002 03:42:36 -0000 1.43
***************
*** 68,71 ****
--- 68,78 ----
XML-RPC.
+ hammiecli.py
+ A client for hammiesrv.
+
+ hammiefilter.py
+ A simpler hammie front-end that doesn't print anything. Useful for
+ procmail filering and scoring from your MUA.
+
pop3proxy.py
A spam-classifying POP3 proxy. It adds a spam-judgement header to
Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammie.py,v
retrieving revision 1.38
retrieving revision 1.39
diff -C2 -d -r1.38 -r1.39
*** hammie.py 14 Nov 2002 22:00:15 -0000 1.38
--- hammie.py 17 Nov 2002 03:42:37 -0000 1.39
***************
*** 11,18 ****
-g PATH
mbox or directory of known good messages (non-spam) to train on.
! Can be specified more than once.
-s PATH
mbox or directory of known spam messages to train on.
! Can be specified more than once.
-u PATH
mbox of unknown messages. A ham/spam decision is reported for each.
--- 11,18 ----
-g PATH
mbox or directory of known good messages (non-spam) to train on.
! Can be specified more than once, or use - for stdin.
-s PATH
mbox or directory of known spam messages to train on.
! Can be specified more than once, or use - for stdin.
-u PATH
mbox of unknown messages. A ham/spam decision is reported for each.
***************
*** 41,44 ****
--- 41,45 ----
import sys
import os
+ import types
import getopt
import mailbox
***************
*** 110,120 ****
def __getitem__(self, key):
! if self.hash.has_key(key):
! return pickle.loads(self.hash[key])
else:
! raise KeyError(key)
def __setitem__(self, key, val):
! v = pickle.dumps(val, 1)
self.hash[key] = v
--- 111,131 ----
def __getitem__(self, key):
! v = self.hash[key]
! if v[0] == 'W':
! val = pickle.loads(v[1:])
! # We could be sneaky, like pickle.Unpickler.load_inst,
! # but I think that's overly confusing.
! obj = classifier.WordInfo(0)
! obj.__setstate__(val)
! return obj
else:
! return pickle.loads(v)
def __setitem__(self, key, val):
! if isinstance(val, classifier.WordInfo):
! val = val.__getstate__()
! v = 'W' + pickle.dumps(val, 1)
! else:
! v = pickle.dumps(val, 1)
self.hash[key] = v
Index: mboxutils.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/mboxutils.py,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** mboxutils.py 12 Nov 2002 23:16:04 -0000 1.6
--- mboxutils.py 17 Nov 2002 03:42:37 -0000 1.7
***************
*** 1,2 ****
--- 1,3 ----
+ #! /usr/bin/env python
"""Utilities for dealing with various types of mailboxes.
***************
*** 21,24 ****
--- 22,26 ----
import os
+ import sys
import glob
import email
***************
*** 53,56 ****
--- 55,61 ----
def getmbox(name):
"""Return an mbox iterator given a file/directory/folder name."""
+
+ if name == "-":
+ return [get_message(sys.stdin)]
if name.startswith("+"):
More information about the Spambayes-checkins
mailing list