[Spambayes-checkins] spambayes simplexloop.py,NONE,1.1

Rob W.W. Hooft hooft@users.sourceforge.net
Fri Nov 15 21:35:17 2002


Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv10258

Added Files:
	simplexloop.py 
Log Message:
more generic simplex optimizer; accepts any command line as argument and will optimize the cost it reports in its last line of output by tuning 5 parameters

--- NEW FILE: simplexloop.py ---
#
# Optimize parameters
#
"""Usage: %(program)s  [options] -c command

Where:
    -h
        Show usage and exit.
    -c command
        The command to be run, with all its options. 
        The last line of output from this program should
        match 'YYYYYYY cost: $xxxx.xx'
        (i.e. the third word of the last line should be the value to be
         minimized, preceded by a dollar sign)
        I have used
         "python2.3 timcv.py -n 10 --spam-keep=600 --ham-keep=600 -s 12345"

This program will overwrite bayescustomize.ini!
"""

import sys

def usage(code, msg=''):
    """Print usage message and sys.exit(code)."""
    if msg:
        print >> sys.stderr, msg
        print >> sys.stderr
    print >> sys.stderr, __doc__ % globals()
    sys.exit(code)

program = sys.argv[0]

import Options

start = (Options.options.unknown_word_prob,
         Options.options.minimum_prob_strength,
         Options.options.unknown_word_strength,
         Options.options.spam_cutoff,
         Options.options.ham_cutoff)
err = (0.01, 0.01, 0.01, 0.005, 0.01)

def mkini(vars):
    f=open('bayescustomize.ini', 'w')
    f.write("""
[Classifier]
unknown_word_prob = %.6f
minimum_prob_strength = %.6f
unknown_word_strength = %.6f

[TestDriver]
spam_cutoff = %.4f
ham_cutoff = %.4f
"""%tuple(vars))
    f.close()

def score(vars):
    import os
    mkini(vars)
    status = os.system('%s > loop.out'%command)
    if status != 0:
        print >> sys.stderr, "Error status from subcommand"
        sys.exit(status)
    f = open('loop.out', 'r')
    txt = f.readlines()
    # Extract the flex cost field.
    cost = float(txt[-1].split()[2][1:])
    f.close()
    # print ''.join(txt[-4:])[:-1]
    print "x=%.4f p=%.4f s=%.4f sc=%.3f hc=%.3f %.2f"%(tuple(vars)+(cost,))
    return -cost

def main():
    import optimize
    finish=optimize.SimplexMaximize(start,err,score)
    mkini(finish)
    print "Best result left in bayescustomize.ini"

if __name__ == "__main__":
    import getopt

    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hc:')
    except getopt.error, msg:
        usage(1, msg)

    command = None
    for opt, arg in opts:
        if opt == '-h':
            usage(0)
        elif opt == '-c':
            command = arg

    if args:
        usage(1, "Positional arguments not supported")
    if command is None:
        usage(1, "-c is required")

    main()





More information about the Spambayes-checkins mailing list