[Spambayes-checkins] spambayes weaktest.py,1.1,1.2

Rob W.W. Hooft hooft@users.sourceforge.net
Sun Nov 10 12:02:36 2002


Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv22741

Modified Files:
	weaktest.py 
Log Message:
add flexcost; sanitize spacing

Index: weaktest.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/weaktest.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** weaktest.py	9 Nov 2002 21:48:52 -0000	1.1
--- weaktest.py	10 Nov 2002 12:02:33 -0000	1.2
***************
*** 59,63 ****
      nspam = len(spamfns)
      
!     allfns={}
      for fn in spamfns+hamfns:
          allfns[fn] = None
--- 59,63 ----
      nspam = len(spamfns)
      
!     allfns = {}
      for fn in spamfns+hamfns:
          allfns[fn] = None
***************
*** 65,74 ****
      d = hammie.Hammie(hammie.createbayes('weaktest.db', False))
  
!     n=0
!     unsure=0
!     hamtrain=0
!     spamtrain=0
!     fp=0
!     fn=0
      for dir,name, is_spam in allfns.iterkeys():
          n += 1
--- 65,80 ----
      d = hammie.Hammie(hammie.createbayes('weaktest.db', False))
  
!     n = 0
!     unsure = 0
!     hamtrain = 0
!     spamtrain = 0
!     fp = 0
!     fn = 0
!     flexcost = 0
!     FPW = options.best_cutoff_fp_weight
!     FNW = options.best_cutoff_fn_weight
!     UNW = options.best_cutoff_unsure_weight
!     SPC = options.spam_cutoff
!     HC = options.ham_cutoff
      for dir,name, is_spam in allfns.iterkeys():
          n += 1
***************
*** 82,87 ****
          if debug:
              print "score:%.3f"%scr,
!         if scr < hammie.SPAM_THRESHOLD and is_spam:
!             if scr < hammie.HAM_THRESHOLD:
                  fn += 1
                  if debug:
--- 88,96 ----
          if debug:
              print "score:%.3f"%scr,
!         if scr < SPC and is_spam:
!             t = FNW * (SPC - scr) / (SPC - HC)
!             #print "Spam at %.3f costs %.2f"%(scr,t)
!             flexcost += t
!             if scr < HC:
                  fn += 1
                  if debug:
***************
*** 94,104 ****
              d.train_spam(m)
              d.update_probabilities()
!         elif scr > hammie.HAM_THRESHOLD and not is_spam:
!             if scr > hammie.SPAM_THRESHOLD:
                  fp += 1
                  if debug:
                      print "fp"
                  else:
!                     print "fp: %s score:%.4f"%(os.path.join(dir,name),scr)
              else:
                  unsure += 1
--- 103,116 ----
              d.train_spam(m)
              d.update_probabilities()
!         elif scr > HC and not is_spam:
!             t = FPW * (scr - HC) / (SPC - HC)
!             #print "Ham at %.3f costs %.2f"%(scr,t)
!             flexcost += t
!             if scr > SPC:
                  fp += 1
                  if debug:
                      print "fp"
                  else:
!                     print "fp: %s score:%.4f"%(os.path.join(dir, name), scr)
              else:
                  unsure += 1
***************
*** 113,126 ****
          if n % 100 == 0:
              print "%5d trained:%dH+%dS wrds:%d fp:%d fn:%d unsure:%d"%(
!                 n,hamtrain,spamtrain,len(d.bayes.wordinfo),fp,fn,unsure)
!     print "Total messages %d (%d ham and %d spam)"%(len(allfns),nham,nspam)
      print "Total unsure (including 30 startup messages): %d (%.1f%%)"%(
!         unsure,unsure*100.0/len(allfns))
!     print "Trained on %d ham and %d spam"%(hamtrain,spamtrain)
!     print "fp: %d fn: %d"%(fp,fn)
!     FPW = options.best_cutoff_fp_weight
!     FNW = options.best_cutoff_fn_weight
!     UNW = options.best_cutoff_unsure_weight
!     print "Total cost: $%.2f"%(FPW*fp+FNW*fn+UNW*unsure)
      
  def main():
--- 125,136 ----
          if n % 100 == 0:
              print "%5d trained:%dH+%dS wrds:%d fp:%d fn:%d unsure:%d"%(
!                 n, hamtrain, spamtrain, len(d.bayes.wordinfo), fp, fn, unsure)
!     print "Total messages %d (%d ham and %d spam)"%(len(allfns), nham, nspam)
      print "Total unsure (including 30 startup messages): %d (%.1f%%)"%(
!         unsure, unsure * 100.0 / len(allfns))
!     print "Trained on %d ham and %d spam"%(hamtrain, spamtrain)
!     print "fp: %d fn: %d"%(fp, fn)
!     print "Total cost: $%.2f"%(FPW * fp + FNW * fn + UNW * unsure)
!     print "Flex cost: $%.4f"%flexcost
      
  def main():
***************
*** 128,137 ****
  
      try:
!         opts, args = getopt.getopt(sys.argv[1:], 'hn:s:',
!                                    ['ham-keep=', 'spam-keep='])
      except getopt.error, msg:
          usage(1, msg)
  
!     nsets = seed = hamkeep = spamkeep = None
      for opt, arg in opts:
          if opt == '-h':
--- 138,146 ----
  
      try:
!         opts, args = getopt.getopt(sys.argv[1:], 'hn:')
      except getopt.error, msg:
          usage(1, msg)
  
!     nsets = None
      for opt, arg in opts:
          if opt == '-h':





More information about the Spambayes-checkins mailing list