[Spambayes-checkins] spambayes hammiefilter.py,1.9,1.10
Neale Pickett
npickett at users.sourceforge.net
Tue Jan 21 06:51:01 EST 2003
Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv6690
Modified Files:
hammiefilter.py
Log Message:
* hammiefilter now has -t option for filter/train step
* Options has new hammie_train_on_filter and hammie_trained_header options
* hammie.py:Hammie.filter has new train kwarg to support filter/train in
one step.
Index: hammiefilter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammiefilter.py,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** hammiefilter.py 21 Jan 2003 00:17:57 -0000 1.9
--- hammiefilter.py 21 Jan 2003 14:50:25 -0000 1.10
***************
*** 15,19 ****
##
! """Usage: %(program)s [OPTION]
A hammie front-end to make the simple stuff simple. The intent is to call
--- 15,19 ----
##
! """Usage: %(program)s [OPTION]...
A hammie front-end to make the simple stuff simple. The intent is to call
***************
*** 26,51 ****
calling it with either the -g or -s options, respectively.
! Where [OPTION] is one of:
-h
show usage and exit
- -n
- create a new database
- -g
- train on stdin as a good (ham) message
- -s
- train on stdin as a bad (spam) message
-d DBFILE
use database in DBFILE
-D PICKLEFILE
use pickle (instead of database) in PICKLEFILE
-G
! untrain ham on stdin -- only use if you've already trained this
! message!
-S
! untrain spam on stdin -- only use if you've already trained this
! message!
- If neither -g nor -s is given, stdin will be scored: the same message,
- with a new header containing the score, will be send to stdout.
"""
--- 26,54 ----
calling it with either the -g or -s options, respectively.
! [OPTION] is one of:
-h
show usage and exit
-d DBFILE
use database in DBFILE
-D PICKLEFILE
use pickle (instead of database) in PICKLEFILE
+ -n
+ create a new database
+ -g
+ train as a good (ham) message
+ -s
+ train as a bad (spam) message
+ -t
+ filter and train based on the result (you must make sure to
+ untrain all mistakes later)
-G
! untrain ham (only use if you've already trained this message)
-S
! untrain spam (only use if you've already trained this message)
!
! All processing options operate on stdin. If no processing options are
! given, stdin will be scored: the same message, with a new header
! containing the score, will be send to stdout.
"""
***************
*** 53,69 ****
import sys
import getopt
! from spambayes import hammie, Options
# See Options.py for explanations of these properties
program = sys.argv[0]
- # Options
- options = Options.options
- options.mergefiles(['/etc/hammierc',
- os.path.expanduser('~/.hammierc')])
- DBNAME = options.hammiefilter_persistent_storage_file
- DBNAME = os.path.expanduser(DBNAME)
- USEDB = options.hammiefilter_persistent_use_database
-
def usage(code, msg=''):
"""Print usage message and sys.exit(code)."""
--- 56,64 ----
import sys
import getopt
! from spambayes import hammie, Options, mboxutils
# See Options.py for explanations of these properties
program = sys.argv[0]
def usage(code, msg=''):
"""Print usage message and sys.exit(code)."""
***************
*** 77,82 ****
def __init__(self):
options = Options.options
! self.dbname = DBNAME
! self.usedb = USEDB
def newdb(self):
--- 72,80 ----
def __init__(self):
options = Options.options
! options.mergefiles(['/etc/hammierc',
! os.path.expanduser('~/.hammierc')])
! self.dbname = options.hammiefilter_persistent_storage_file
! self.dbname = os.path.expanduser(self.dbname)
! self.usedb = options.hammiefilter_persistent_use_database
def newdb(self):
***************
*** 85,144 ****
print "Created new database in", self.dbname
! def filter(self):
h = hammie.open(self.dbname, self.usedb, 'r')
- msg = sys.stdin.read()
print h.filter(msg)
! def train_ham(self):
h = hammie.open(self.dbname, self.usedb, 'c')
- msg = sys.stdin.read()
h.train_ham(msg)
h.store()
! def train_spam(self):
h = hammie.open(self.dbname, self.usedb, 'c')
- msg = sys.stdin.read()
h.train_spam(msg)
h.store()
! def untrain_ham(self):
h = hammie.open(self.dbname, self.usedb, 'c')
- msg = sys.stdin.read()
h.untrain_ham(msg)
h.store()
! def untrain_spam(self):
h = hammie.open(self.dbname, self.usedb, 'c')
- msg = sys.stdin.read()
h.untrain_spam(msg)
h.store()
def main():
- global DBNAME, USEDB
-
h = HammieFilter()
! action = h.filter
! opts, args = getopt.getopt(sys.argv[1:], 'hngsGSd:D:', ['help'])
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt == '-d':
! USEDB = True
! DBNAME = arg
elif opt == '-D':
! USEDB = False
! DBNAME = arg
elif opt == '-g':
! action = h.train_ham
elif opt == '-s':
! action = h.train_spam
elif opt == '-G':
! action = h.untrain_ham
elif opt == '-S':
! action = h.untrain_spam
elif opt == "-n":
! action = h.newdb
- action()
if __name__ == "__main__":
--- 83,148 ----
print "Created new database in", self.dbname
! def filter(self, msg):
h = hammie.open(self.dbname, self.usedb, 'r')
print h.filter(msg)
! def filter_train(self, msg):
! h = hammie.open(self.dbname, self.usedb, 'c')
! print h.filter(msg, train=True)
!
! def train_ham(self, msg):
h = hammie.open(self.dbname, self.usedb, 'c')
h.train_ham(msg)
h.store()
! def train_spam(self, msg):
h = hammie.open(self.dbname, self.usedb, 'c')
h.train_spam(msg)
h.store()
! def untrain_ham(self, msg):
h = hammie.open(self.dbname, self.usedb, 'c')
h.untrain_ham(msg)
h.store()
! def untrain_spam(self, msg):
h = hammie.open(self.dbname, self.usedb, 'c')
h.untrain_spam(msg)
h.store()
def main():
h = HammieFilter()
! actions = []
! opts, args = getopt.getopt(sys.argv[1:], 'hd:D:ngstGS', ['help'])
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt == '-d':
! h.usedb = True
! h.dbname = arg
elif opt == '-D':
! h.usedb = False
! h.dbname = arg
elif opt == '-g':
! actions.append(h.train_ham)
elif opt == '-s':
! actions.append(h.train_spam)
! elif opt == '-t':
! actions.append(h.filter_train)
elif opt == '-G':
! actions.append(h.untrain_ham)
elif opt == '-S':
! actions.append(h.untrain_spam)
elif opt == "-n":
! h.newdb()
! sys.exit(0)
!
! if actions == []:
! actions = [h.filter]
!
! msg = mboxutils.get_message(sys.stdin)
! for action in actions:
! action(msg)
if __name__ == "__main__":
More information about the Spambayes-checkins
mailing list