[Spambayes-checkins] spambayes weaktest.py,1.1,1.2
Rob W.W. Hooft
hooft@users.sourceforge.net
Sun Nov 10 12:02:36 2002
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv22741
Modified Files:
weaktest.py
Log Message:
add flexcost; sanitize spacing
Index: weaktest.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/weaktest.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** weaktest.py 9 Nov 2002 21:48:52 -0000 1.1
--- weaktest.py 10 Nov 2002 12:02:33 -0000 1.2
***************
*** 59,63 ****
nspam = len(spamfns)
! allfns={}
for fn in spamfns+hamfns:
allfns[fn] = None
--- 59,63 ----
nspam = len(spamfns)
! allfns = {}
for fn in spamfns+hamfns:
allfns[fn] = None
***************
*** 65,74 ****
d = hammie.Hammie(hammie.createbayes('weaktest.db', False))
! n=0
! unsure=0
! hamtrain=0
! spamtrain=0
! fp=0
! fn=0
for dir,name, is_spam in allfns.iterkeys():
n += 1
--- 65,80 ----
d = hammie.Hammie(hammie.createbayes('weaktest.db', False))
! n = 0
! unsure = 0
! hamtrain = 0
! spamtrain = 0
! fp = 0
! fn = 0
! flexcost = 0
! FPW = options.best_cutoff_fp_weight
! FNW = options.best_cutoff_fn_weight
! UNW = options.best_cutoff_unsure_weight
! SPC = options.spam_cutoff
! HC = options.ham_cutoff
for dir,name, is_spam in allfns.iterkeys():
n += 1
***************
*** 82,87 ****
if debug:
print "score:%.3f"%scr,
! if scr < hammie.SPAM_THRESHOLD and is_spam:
! if scr < hammie.HAM_THRESHOLD:
fn += 1
if debug:
--- 88,96 ----
if debug:
print "score:%.3f"%scr,
! if scr < SPC and is_spam:
! t = FNW * (SPC - scr) / (SPC - HC)
! #print "Spam at %.3f costs %.2f"%(scr,t)
! flexcost += t
! if scr < HC:
fn += 1
if debug:
***************
*** 94,104 ****
d.train_spam(m)
d.update_probabilities()
! elif scr > hammie.HAM_THRESHOLD and not is_spam:
! if scr > hammie.SPAM_THRESHOLD:
fp += 1
if debug:
print "fp"
else:
! print "fp: %s score:%.4f"%(os.path.join(dir,name),scr)
else:
unsure += 1
--- 103,116 ----
d.train_spam(m)
d.update_probabilities()
! elif scr > HC and not is_spam:
! t = FPW * (scr - HC) / (SPC - HC)
! #print "Ham at %.3f costs %.2f"%(scr,t)
! flexcost += t
! if scr > SPC:
fp += 1
if debug:
print "fp"
else:
! print "fp: %s score:%.4f"%(os.path.join(dir, name), scr)
else:
unsure += 1
***************
*** 113,126 ****
if n % 100 == 0:
print "%5d trained:%dH+%dS wrds:%d fp:%d fn:%d unsure:%d"%(
! n,hamtrain,spamtrain,len(d.bayes.wordinfo),fp,fn,unsure)
! print "Total messages %d (%d ham and %d spam)"%(len(allfns),nham,nspam)
print "Total unsure (including 30 startup messages): %d (%.1f%%)"%(
! unsure,unsure*100.0/len(allfns))
! print "Trained on %d ham and %d spam"%(hamtrain,spamtrain)
! print "fp: %d fn: %d"%(fp,fn)
! FPW = options.best_cutoff_fp_weight
! FNW = options.best_cutoff_fn_weight
! UNW = options.best_cutoff_unsure_weight
! print "Total cost: $%.2f"%(FPW*fp+FNW*fn+UNW*unsure)
def main():
--- 125,136 ----
if n % 100 == 0:
print "%5d trained:%dH+%dS wrds:%d fp:%d fn:%d unsure:%d"%(
! n, hamtrain, spamtrain, len(d.bayes.wordinfo), fp, fn, unsure)
! print "Total messages %d (%d ham and %d spam)"%(len(allfns), nham, nspam)
print "Total unsure (including 30 startup messages): %d (%.1f%%)"%(
! unsure, unsure * 100.0 / len(allfns))
! print "Trained on %d ham and %d spam"%(hamtrain, spamtrain)
! print "fp: %d fn: %d"%(fp, fn)
! print "Total cost: $%.2f"%(FPW * fp + FNW * fn + UNW * unsure)
! print "Flex cost: $%.4f"%flexcost
def main():
***************
*** 128,137 ****
try:
! opts, args = getopt.getopt(sys.argv[1:], 'hn:s:',
! ['ham-keep=', 'spam-keep='])
except getopt.error, msg:
usage(1, msg)
! nsets = seed = hamkeep = spamkeep = None
for opt, arg in opts:
if opt == '-h':
--- 138,146 ----
try:
! opts, args = getopt.getopt(sys.argv[1:], 'hn:')
except getopt.error, msg:
usage(1, msg)
! nsets = None
for opt, arg in opts:
if opt == '-h':
More information about the Spambayes-checkins
mailing list