[Spambayes-checkins] spambayes table.py,1.4,1.5

Anthony Baxter anthonybaxter@users.sourceforge.net
Wed Nov 6 22:12:52 2002


Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv15111

Modified Files:
	table.py 
Log Message:
added '-m' option to print means for each row.

little bit of a cleanup.


Index: table.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/table.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** table.py	26 Oct 2002 15:30:23 -0000	1.4
--- table.py	6 Nov 2002 22:12:48 -0000	1.5
***************
*** 2,6 ****
  
  """
! table.py base1 base2 ... baseN
  
  Combines output from base1.txt, base2.txt, etc., which are created by
--- 2,6 ----
  
  """
! table.py [-m] base1 base2 ... baseN
  
  Combines output from base1.txt, base2.txt, etc., which are created by
***************
*** 8,15 ****
  comparison statistics to stdout.  Each input file is represented by
  one column in the table.
- """
  
! import sys
! import re
  
  # Return
--- 8,15 ----
  comparison statistics to stdout.  Each input file is represented by
  one column in the table.
  
! Optional argument -m shows a final column with the mean value of each
! statistic.
! """
  
  # Return
***************
*** 46,56 ****
          line = get()
          if line.startswith('-> <stat> tested'):
!             # -> <stat> tested 1910 hams & 948 spams against 2741 hams & 948 spams
!             #  0      1      2    3    4 5   6
              print line,
  
          elif line.find(' items; mean ') > 0 and line.find('for all runs') > 0:
!             # -> <stat> Ham scores for all runs: 2741 items; mean 0.86; sdev 6.28
!             #                                             0          1          2
              vals = line.split(';')
              mean = float(vals[1].split()[-1])
--- 46,56 ----
          line = get()
          if line.startswith('-> <stat> tested'):
!             # <stat> tested 1910 hams & 948 spams against 2741 hams & 948 spams
!             #      1      2    3    4 5   6
              print line,
  
          elif line.find(' items; mean ') > 0 and line.find('for all runs') > 0:
!             # <stat> Ham scores for all runs: 2741 items; mean 0.86; sdev 6.28
!             #                                          0          1          2
              vals = line.split(';')
              mean = float(vals[1].split()[-1])
***************
*** 103,184 ****
          return fn
  
! fname = "filename: "
! fnam2 = "          "
! ratio = "ham:spam: "
! rat2  = "          "
! fptot = "fp total: "
! fpper = "fp %:     "
! fntot = "fn total: "
! fnper = "fn %:     "
! untot = "unsure t: "
! unper = "unsure %: "
! rcost = "real cost:"
! bcost = "best cost:"
  
! hmean = "h mean:   "
! hsdev = "h sdev:   "
! smean = "s mean:   "
! ssdev = "s sdev:   "
! meand = "mean diff:"
! kval  = "k:        "
  
! for filename in sys.argv[1:]:
!     filename = windowsfy(filename)
!     (htest, stest, fp, fn, un, fpp, fnp, unp, cost, bestcost,
!      hamdevall, spamdevall) = suck(file(filename))
!     if filename.endswith('.txt'):
!         filename = filename[:-4]
!     filename = filename[filename.rfind('/')+1:]
!     filename = filename[filename.rfind("\\")+1:]
!     if len(fname) > len(fnam2):
!         fname += "        "
!         fname = fname[0:(len(fnam2) + 8)]
!         fnam2 += " %7s" % filename
!     else:
!         fnam2 += "        "
!         fnam2 = fnam2[0:(len(fname) + 8)]
!         fname += " %7s" % filename
!     if len(ratio) > len(rat2):
!         ratio += "        "
!         ratio = ratio[0:(len(rat2) + 8)]
!         rat2  += " %7s" % ("%d:%d" % (htest, stest))
!     else:
!         rat2  += "        "
!         rat2  = rat2[0:(len(ratio) + 8)]
!         ratio += " %7s" % ("%d:%d" % (htest, stest))
!     fptot += "%8d"   % fp
!     fpper += "%8.2f" % fpp
!     fntot += "%8d"   % fn
!     fnper += "%8.2f" % fnp
!     untot += "%8d"   % un
!     unper += "%8.2f" % unp
!     rcost += "%8s"   % ("$%.2f" % cost)
!     bcost += "%8s"   % ("$%.2f" % bestcost)
!     hmean += "%8.2f" % hamdevall[0]
!     hsdev += "%8.2f" % hamdevall[1]
!     smean += "%8.2f" % spamdevall[0]
!     ssdev += "%8.2f" % spamdevall[1]
!     meand += "%8.2f" % (spamdevall[0] - hamdevall[0])
!     k = (spamdevall[0] - hamdevall[0]) / (spamdevall[1] + hamdevall[1])
!     kval  += "%8.2f" % k
  
! print fname
! if len(fnam2.strip()) > 0:
!     print fnam2
! print ratio
! if len(rat2.strip()) > 0:
!     print rat2
! print fptot
! print fpper
! print fntot
! print fnper
! print untot
! print unper
! print rcost
! print bcost
! print hmean
! print hsdev
! print smean
! print ssdev
! print meand
! print kval
--- 103,231 ----
          return fn
  
! def table():
!     import getopt, sys
  
!     showMean = 0
  
!     fname = "filename: "
!     fnam2 = "          "
!     ratio = "ham:spam: "
!     rat2  = "          "
!     fptot = "fp total: "
!     fpper = "fp %:     "
!     fntot = "fn total: "
!     fnper = "fn %:     "
!     untot = "unsure t: "
!     unper = "unsure %: "
!     rcost = "real cost:"
!     bcost = "best cost:"
  
!     hmean = "h mean:   "
!     hsdev = "h sdev:   "
!     smean = "s mean:   "
!     ssdev = "s sdev:   "
!     meand = "mean diff:"
!     kval  = "k:        "
! 
!     tfptot = tfpper = tfntot = tfnper = tuntot = tunper = trcost = tbcost = \
!     thmean = thsdev = tsmean = tssdev = tmeand = tkval =  0
! 
!     args, fileargs = getopt.getopt(sys.argv[1:], 'm')
!     for arg, val in args:
!         if arg == "-m":
!             showMean = 1
! 
!     for filename in fileargs:
!         filename = windowsfy(filename)
!         (htest, stest, fp, fn, un, fpp, fnp, unp, cost, bestcost,
!          hamdevall, spamdevall) = suck(file(filename))
!         if filename.endswith('.txt'):
!             filename = filename[:-4]
!         filename = filename[filename.rfind('/')+1:]
!         filename = filename[filename.rfind("\\")+1:]
!         if len(fname) > len(fnam2):
!             fname += "        "
!             fname = fname[0:(len(fnam2) + 8)]
!             fnam2 += " %7s" % filename
!         else:
!             fnam2 += "        "
!             fnam2 = fnam2[0:(len(fname) + 8)]
!             fname += " %7s" % filename
!         if len(ratio) > len(rat2):
!             ratio += "        "
!             ratio = ratio[0:(len(rat2) + 8)]
!             rat2  += " %7s" % ("%d:%d" % (htest, stest))
!         else:
!             rat2  += "        "
!             rat2  = rat2[0:(len(ratio) + 8)]
!             ratio += " %7s" % ("%d:%d" % (htest, stest))
!         fptot += "%8d"   % fp
!         tfptot += fp
!         fpper += "%8.2f" % fpp
!         tfpper += fpp
!         fntot += "%8d"   % fn
!         tfntot += fn
!         fnper += "%8.2f" % fnp
!         tfnper += fnp
!         untot += "%8d"   % un
!         tuntot += un
!         unper += "%8.2f" % unp
!         tunper += unp
!         rcost += "%8s"   % ("$%.2f" % cost)
!         trcost += cost
!         bcost += "%8s"   % ("$%.2f" % bestcost)
!         tbcost += bestcost
!         hmean += "%8.2f" % hamdevall[0]
!         thmean += hamdevall[0]
!         hsdev += "%8.2f" % hamdevall[1]
!         thsdev += hamdevall[1]
!         smean += "%8.2f" % spamdevall[0]
!         tsmean += spamdevall[0]
!         ssdev += "%8.2f" % spamdevall[1]
!         tssdev += spamdevall[1]
!         meand += "%8.2f" % (spamdevall[0] - hamdevall[0])
!         tmeand += (spamdevall[0] - hamdevall[0])
!         k = (spamdevall[0] - hamdevall[0]) / (spamdevall[1] + hamdevall[1])
!         kval  += "%8.2f" % k
!         tkval  += k
! 
!     nfiles = len(fileargs)
!     if nfiles and showMean:
!         fptot += "%12d"   % (tfptot/nfiles)
!         fpper += "%12.2f" % (tfpper/nfiles)
!         fntot += "%12d"   % (tfntot/nfiles)
!         fnper += "%12.2f" % (tfnper/nfiles)
!         untot += "%12d"   % (tuntot/nfiles)
!         unper += "%12.2f" % (tunper/nfiles)
!         rcost += "%12s"   % ("$%.2f" % (trcost/nfiles))
!         bcost += "%12s"   % ("$%.2f" % (tbcost/nfiles))
!         hmean += "%12.2f" % (thmean/nfiles)
!         hsdev += "%12.2f" % (thsdev/nfiles)
!         smean += "%12.2f" % (tsmean/nfiles)
!         ssdev += "%12.2f" % (tssdev/nfiles)
!         meand += "%12.2f" % (tmeand/nfiles)
!         kval  += "%12.2f" % (tkval/nfiles)
! 
!     print fname
!     if len(fnam2.strip()) > 0:
!         print fnam2
!     print ratio
!     if len(rat2.strip()) > 0:
!         print rat2
!     print fptot
!     print fpper
!     print fntot
!     print fnper
!     print untot
!     print unper
!     print rcost
!     print bcost
!     print hmean
!     print hsdev
!     print smean
!     print ssdev
!     print meand
!     print kval
! 
! if __name__ == "__main__":
!     table()