[Spambayes-checkins] spambayes/contrib tte.py,1.3,1.4
Skip Montanaro
montanaro at users.sourceforge.net
Fri Feb 13 14:43:39 EST 2004
Update of /cvsroot/spambayes/spambayes/contrib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23880
Modified Files:
tte.py
Log Message:
set a user-settable upper limit on the number of rounds to prevent infinite
loops.
Index: tte.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/contrib/tte.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** tte.py 13 Feb 2004 15:09:33 -0000 1.3
--- tte.py 13 Feb 2004 19:43:36 -0000 1.4
***************
*** 5,9 ****
everything scores properly.
! usage %(prog)s [ -h ] -g file -s file [ -d file | -p file ] [ -m N ]
-h - print this documentation and exit.
--- 5,9 ----
everything scores properly.
! usage %(prog)s [ -h ] -g file -s file [ -d file | -p file ] [ -m N ] [ -r N ]
-h - print this documentation and exit.
***************
*** 19,22 ****
--- 19,25 ----
-m N - train on at most N messages (nham == N/2 and nspam == N/2)
+ -r N - run at most N rounds (default %(MAXROUNDS)s), even if not
+ all messages score correctly
+
See Gary Robinson's blog:
***************
*** 38,41 ****
--- 41,46 ----
prog = os.path.basename(sys.argv[0])
+ MAXROUNDS = 10
+
def usage(msg=None):
if msg is not None:
***************
*** 43,52 ****
print >> sys.stderr, __doc__.strip() % globals()
! def train(store, ham, spam, maxmsgs):
smisses = hmisses = round = 0
ham_cutoff = Options.options["Categorization", "ham_cutoff"]
spam_cutoff = Options.options["Categorization", "spam_cutoff"]
! while hmisses or smisses or round == 0:
hambone = mboxutils.getmbox(ham)
spamcan = mboxutils.getmbox(spam)
--- 48,57 ----
print >> sys.stderr, __doc__.strip() % globals()
! def train(store, ham, spam, maxmsgs, maxrounds):
smisses = hmisses = round = 0
ham_cutoff = Options.options["Categorization", "ham_cutoff"]
spam_cutoff = Options.options["Categorization", "spam_cutoff"]
! while round < maxrounds and (hmisses or smisses or round == 0):
hambone = mboxutils.getmbox(ham)
spamcan = mboxutils.getmbox(spam)
***************
*** 98,105 ****
def main(args):
try:
! opts, args = getopt.getopt(args, "hg:s:d:p:o:m:",
["help", "good=", "spam=",
"database=", "pickle=",
! "option=", "max="])
except getopt.GetoptError, msg:
usage(msg)
--- 103,110 ----
def main(args):
try:
! opts, args = getopt.getopt(args, "hg:s:d:p:o:m:r:",
["help", "good=", "spam=",
"database=", "pickle=",
! "option=", "max=", "maxrounds="])
except getopt.GetoptError, msg:
usage(msg)
***************
*** 108,111 ****
--- 113,117 ----
ham = spam = dbname = usedb = None
maxmsgs = 0
+ maxrounds = MAXROUNDS
for opt, arg in opts:
if opt in ("-h", "--help"):
***************
*** 118,121 ****
--- 124,129 ----
elif opt in ("-m", "--max"):
maxmsgs = int(arg)
+ elif opt in ("-r", "--maxrounds"):
+ maxrounds = int(arg)
elif opt in ('-o', '--option'):
Options.options.set_from_cmdline(arg, sys.stderr)
***************
*** 134,138 ****
store = storage.open_storage(dbname, usedb)
! train(store, ham, spam, maxmsgs)
store.store()
--- 142,146 ----
store = storage.open_storage(dbname, usedb)
! train(store, ham, spam, maxmsgs, maxrounds)
store.store()
More information about the Spambayes-checkins
mailing list