[Spambayes-checkins] spambayes/contrib tte.py,1.3,1.4

Skip Montanaro montanaro at users.sourceforge.net
Fri Feb 13 14:43:39 EST 2004


Update of /cvsroot/spambayes/spambayes/contrib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23880

Modified Files:
	tte.py 
Log Message:
set a user-settable upper limit on the number of rounds to prevent infinite
loops.


Index: tte.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/contrib/tte.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** tte.py	13 Feb 2004 15:09:33 -0000	1.3
--- tte.py	13 Feb 2004 19:43:36 -0000	1.4
***************
*** 5,9 ****
  everything scores properly.
  
! usage %(prog)s [ -h ] -g file -s file [ -d file | -p file ] [ -m N ]
  
  -h      - print this documentation and exit.
--- 5,9 ----
  everything scores properly.
  
! usage %(prog)s [ -h ] -g file -s file [ -d file | -p file ] [ -m N ] [ -r N ]
  
  -h      - print this documentation and exit.
***************
*** 19,22 ****
--- 19,25 ----
  -m N    - train on at most N messages (nham == N/2 and nspam == N/2)
  
+ -r N    - run at most N rounds (default %(MAXROUNDS)s), even if not
+           all messages score correctly
+ 
  See Gary Robinson's blog:
  
***************
*** 38,41 ****
--- 41,46 ----
  prog = os.path.basename(sys.argv[0])
  
+ MAXROUNDS = 10
+ 
  def usage(msg=None):
      if msg is not None:
***************
*** 43,52 ****
      print >> sys.stderr, __doc__.strip() % globals()
  
! def train(store, ham, spam, maxmsgs):
      smisses = hmisses = round = 0
      ham_cutoff = Options.options["Categorization", "ham_cutoff"]
      spam_cutoff = Options.options["Categorization", "spam_cutoff"]
  
!     while hmisses or smisses or round == 0:
          hambone = mboxutils.getmbox(ham)
          spamcan = mboxutils.getmbox(spam)
--- 48,57 ----
      print >> sys.stderr, __doc__.strip() % globals()
  
! def train(store, ham, spam, maxmsgs, maxrounds):
      smisses = hmisses = round = 0
      ham_cutoff = Options.options["Categorization", "ham_cutoff"]
      spam_cutoff = Options.options["Categorization", "spam_cutoff"]
  
!     while round < maxrounds and (hmisses or smisses or round == 0):
          hambone = mboxutils.getmbox(ham)
          spamcan = mboxutils.getmbox(spam)
***************
*** 98,105 ****
  def main(args):
      try:
!         opts, args = getopt.getopt(args, "hg:s:d:p:o:m:",
                                     ["help", "good=", "spam=",
                                      "database=", "pickle=",
!                                     "option=", "max="])
      except getopt.GetoptError, msg:
          usage(msg)
--- 103,110 ----
  def main(args):
      try:
!         opts, args = getopt.getopt(args, "hg:s:d:p:o:m:r:",
                                     ["help", "good=", "spam=",
                                      "database=", "pickle=",
!                                     "option=", "max=", "maxrounds="])
      except getopt.GetoptError, msg:
          usage(msg)
***************
*** 108,111 ****
--- 113,117 ----
      ham = spam = dbname = usedb = None
      maxmsgs = 0
+     maxrounds = MAXROUNDS
      for opt, arg in opts:
          if opt in ("-h", "--help"):
***************
*** 118,121 ****
--- 124,129 ----
          elif opt in ("-m", "--max"):
              maxmsgs = int(arg)
+         elif opt in ("-r", "--maxrounds"):
+             maxrounds = int(arg)
          elif opt in ('-o', '--option'):
              Options.options.set_from_cmdline(arg, sys.stderr)
***************
*** 134,138 ****
      store = storage.open_storage(dbname, usedb)
  
!     train(store, ham, spam, maxmsgs)
  
      store.store()
--- 142,146 ----
      store = storage.open_storage(dbname, usedb)
  
!     train(store, ham, spam, maxmsgs, maxrounds)
  
      store.store()




More information about the Spambayes-checkins mailing list