[Spambayes-checkins] spambayes pop3proxy.py,1.16,1.17
Richie Hindle
richiehindle@users.sourceforge.net
Wed Nov 20 12:45:24 2002
Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv21143
Modified Files:
pop3proxy.py
Log Message:
o Multiple server support - the old ini-file settings are deprecated;
see Options.py
o Added a 'defer' choice in addition to discard/ham/spam - thanks to
Skip for the suggestion.
o The training page now groups by X-Hammie-Disposition - thanks again
to Skip.
o Added a Save Database button to the status panel.
o Added nspam and nham to the status panel.
o Fixed several Mac-related problems reported by François, whereby I
needed to use longs for timestamps.
Index: pop3proxy.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** pop3proxy.py 18 Nov 2002 19:14:48 -0000 1.16
--- pop3proxy.py 20 Nov 2002 12:45:21 -0000 1.17
***************
*** 53,58 ****
Web training interface:
- o Include more stats in the Status box - it's easy to lose track of
- where you are when testing.
o Functional tests.
o Review already-trained messages, and purge them.
--- 53,56 ----
***************
*** 80,85 ****
o Possibly integrate Tim Stone's SMTP code - make it use async, make
the training code update (rather than replace!) the database.
- o Option to keep trained messages and view potential FPs and FNs to
- correct them.
o Allow use of the UI without the POP3 proxy.
o Remove any existing X-Hammie-Disposition header from incoming emails.
--- 78,81 ----
***************
*** 107,115 ****
o Classify a web page given a URL.
o Graphs. Of something. Who cares what?
o Zoe...!
"""
! import os, sys, re, operator, errno, getopt, cPickle, cStringIO, time, bisect
import socket, asyncore, asynchat, cgi, urlparse, webbrowser
import Bayes, tokenizer, mboxutils
--- 103,112 ----
o Classify a web page given a URL.
o Graphs. Of something. Who cares what?
+ o NNTP proxy.
o Zoe...!
"""
! import os, sys, re, operator, errno, getopt, string, cStringIO, time, bisect
import socket, asyncore, asynchat, cgi, urlparse, webbrowser
import Bayes, tokenizer, mboxutils
***************
*** 477,481 ****
# The message name is the time it arrived, with a uniquifier
# appended if two arrive within one clock tick of each other.
! messageName = "%10.10d" % time.time()
if messageName == state.lastBaseMessageName:
state.lastBaseMessageName = messageName
--- 474,478 ----
# The message name is the time it arrived, with a uniquifier
# appended if two arrive within one clock tick of each other.
! messageName = "%10.10d" % long(time.time())
if messageName == state.lastBaseMessageName:
state.lastBaseMessageName = messageName
***************
*** 603,612 ****
<br>\n"""
! summary = """POP3 proxy running on port <b>%(proxyPort)d</b>,
! proxying to <b>%(serverName)s:%(serverPort)d</b>.<br>
Active POP3 conversations: <b>%(activeSessions)d</b>.<br>
POP3 conversations this session: <b>%(totalSessions)d</b>.<br>
Emails classified this session: <b>%(numSpams)d</b> spam,
! <b>%(numHams)d</b> ham, <b>%(numUnsure)d</b> unsure.
"""
--- 600,614 ----
<br>\n"""
! summary = """POP3 proxy running on <b>%(proxyPortsString)s</b>,
! proxying to <b>%(serversString)s</b>.<br>
Active POP3 conversations: <b>%(activeSessions)d</b>.<br>
POP3 conversations this session: <b>%(totalSessions)d</b>.<br>
Emails classified this session: <b>%(numSpams)d</b> spam,
! <b>%(numHams)d</b> ham, <b>%(numUnsure)d</b> unsure.<br>
! Total emails trained: Spam: <b>%(nspam)d</b>
! Ham: <b>%(nham)d</b><br>
! <form action='save' method='POST'>
! <input type='submit' value='Save database'>
! </form>
"""
***************
*** 620,628 ****
using the <a href='review'>Review messages</a> page."""
! reviewHeader = """<p>These are unclassified emails, which you can use to
! train the classifier. Check the Discard / Ham / Spam
! buttton for each email, then click 'Train' below. (To
! discard the whole page, leave everything with Discard
! checked and click 'Train'.)</p>
<form action='review' method='GET'>
<input type='hidden' name='prior' value='%d'>
--- 622,630 ----
using the <a href='review'>Review messages</a> page."""
! reviewHeader = """<p>These are untrained emails, which you can use to
! train the classifier. Check the Discard / Defer / Ham /
! Spam buttton for each email, then click 'Train' below.
! (Defer leaves the message here, to be trained on
! later.)</p>
<form action='review' method='GET'>
<input type='hidden' name='prior' value='%d'>
***************
*** 639,644 ****
<form action='review' method='POST'>
<table class='messagetable' cellpadding='0' cellspacing='0'>
! <tr><td><b>Subject:</b></td><td><b>From:</b></td>
! <td><b>Discard / Ham / Spam</b></td></tr>"""
upload = """<form action='%s' method='POST'
--- 641,649 ----
<form action='review' method='POST'>
<table class='messagetable' cellpadding='0' cellspacing='0'>
! """
!
! reviewSubheader = """<tr><td><b>Messages classified as %s:</b></td>
! <td><b>From:</b></td>
! <td><b>Discard / Defer / Ham / Spam</b></td></tr>"""
upload = """<form action='%s' method='POST'
***************
*** 769,773 ****
homeLink = "<a href='home'>Home</a> > %s" % name
if showImage:
! image = "<img src='/helmet.gif' align='absmiddle'> "
else:
image = ""
--- 774,778 ----
homeLink = "<a href='home'>Home</a> > %s" % name
if showImage:
! image = "<img src='helmet.gif' align='absmiddle'> "
else:
image = ""
***************
*** 796,800 ****
def onHome(self, params):
"""Serve up the homepage."""
! body = (self.pageSection % ('Status', self.summary % state.__dict__)+
self.pageSection % ('Train on proxied messages', self.review)+
self.pageSection % ('Train on a given message', self.train)+
--- 801,807 ----
def onHome(self, params):
"""Serve up the homepage."""
! stateDict = state.__dict__
! stateDict.update(state.bayes.__dict__)
! body = (self.pageSection % ('Status', self.summary % stateDict)+
self.pageSection % ('Train on proxied messages', self.review)+
self.pageSection % ('Train on a given message', self.train)+
***************
*** 803,813 ****
self.push(body)
def onShutdown(self, params):
"""Shutdown the server, saving the pickle if requested to do so."""
if params['how'].lower().find('save') >= 0:
! if not state.useDB and state.databaseFilename:
! self.push("<b>Saving...</b>")
! self.push(' ') # Acts as a flush for small buffers.
! state.bayes.store()
self.push("<b>Shutdown</b>. Goodbye.</div></body></html>")
self.push(' ')
--- 810,828 ----
self.push(body)
+ def doSave(self):
+ """Saves the database. Worker for onSave and onShutdown."""
+ self.push("<b>Saving... ")
+ self.push(' ')
+ state.bayes.store()
+ self.push("Done</b>.")
+
+ def onSave(self, params):
+ """Command handler for "Save"."""
+ self.doSave()
+
def onShutdown(self, params):
"""Shutdown the server, saving the pickle if requested to do so."""
if params['how'].lower().find('save') >= 0:
! self.doSave()
self.push("<b>Shutdown</b>. Goodbye.</div></body></html>")
self.push(' ')
***************
*** 845,849 ****
for that message. This is the time that the message was received,
not the Date header."""
! return int(key[:10])
def getTimeRange(self, timestamp):
--- 860,864 ----
for that message. This is the time that the message was received,
not the Date header."""
! return long(key[:10])
def getTimeRange(self, timestamp):
***************
*** 879,884 ****
# Find the subset of the keys within this range.
! startKeyIndex = bisect.bisect(allKeys, "%d" % start)
! endKeyIndex = bisect.bisect(allKeys, "%d" % end)
keys = allKeys[startKeyIndex:endKeyIndex]
keys.reverse()
--- 894,899 ----
# Find the subset of the keys within this range.
! startKeyIndex = bisect.bisect(allKeys, "%d" % long(start))
! endKeyIndex = bisect.bisect(allKeys, "%d" % long(end))
keys = allKeys[startKeyIndex:endKeyIndex]
keys.reverse()
***************
*** 896,911 ****
return keys, date, prior, start, end
! def onReview(self, params):
! """Present a list of message for (re)training."""
! # This is the radio group for training/discarding.
! trainRadio = """<input type='radio' name='classify:%s'
! value='discard' checked>
! <input type='radio' name='classify:%s' value='ham'>
! <input type='radio' name='classify:%s' value='spam'>"""
# Train/discard sumbitted messages.
id = ''
numTrained = 0
for key, value in params.items():
if key.startswith('classify:'):
--- 911,947 ----
return keys, date, prior, start, end
! def appendMessages(self, lines, keyedMessages, judgement):
! """Appends the lines of a table of messages to 'lines'."""
! buttons = """<input type='radio' name='classify:%s' value='discard'>
! <input type='radio' name='classify:%s' value='defer' %s>
! <input type='radio' name='classify:%s' value='ham' %s>
! <input type='radio' name='classify:%s' value='spam' %s>"""
! stripe = 0
! for key, message in keyedMessages:
! # Parse the message and get the relevant headers.
! subject = self.trimAndQuote(message["Subject"] or "(none)", 50)
! from_ = self.trimAndQuote(message["From"] or "(none)", 40)
! # Output the table row for this message.
! defer = ham = spam = ""
! if judgement == options.header_spam_string:
! spam='checked'
! elif judgement == options.header_ham_string:
! ham='checked'
! elif judgement == options.header_unsure_string:
! defer='checked'
! radioGroup = buttons % (key, key, defer, key, ham, key, spam)
! stripeClass = ['stripe_on', 'stripe_off'][stripe]
! lines.append("""<tr class='%s'><td>%s</td><td>%s</td>
! <td align='middle'>%s</td></tr>""" % \
! (stripeClass, subject, from_, radioGroup))
! stripe = stripe ^ 1
+ def onReview(self, params):
+ """Present a list of message for (re)training."""
# Train/discard sumbitted messages.
id = ''
numTrained = 0
+ numDeferred = 0
for key, value in params.items():
if key.startswith('classify:'):
***************
*** 915,921 ****
elif value == 'ham':
targetCorpus = state.hamCorpus
! else: # Discard
targetCorpus = None
! state.unknownCorpus.removeMessage(state.unknownCorpus[id])
if targetCorpus:
try:
--- 951,963 ----
elif value == 'ham':
targetCorpus = state.hamCorpus
! elif value == 'discard':
targetCorpus = None
! try:
! state.unknownCorpus.removeMessage(state.unknownCorpus[id])
! except KeyError:
! pass # Must be a reload.
! else: # defer
! targetCorpus = None
! numDeferred += 1
if targetCorpus:
try:
***************
*** 939,946 ****
self.push("Done.</b></p>")
! # After submitting a page, display the prior page or the next one.
! # Derive the day of the submitted page from the ID of the last
! # processed message.
! if id:
start = self.keyToTimestamp(id)
_, _, prior, _, next = self.buildReviewKeys(start)
--- 981,992 ----
self.push("Done.</b></p>")
! # If any messages were deferred, show the same page again.
! if numDeferred > 0:
! start = self.keyToTimestamp(id)
!
! # Else after submitting a whole page, display the prior page or the
! # next one. Derive the day of the submitted page from the ID of the
! # last processed message.
! elif id:
start = self.keyToTimestamp(id)
_, _, prior, _, next = self.buildReviewKeys(start)
***************
*** 960,965 ****
start = 0
! # Present the list of messages in reverse order of appearance.
keys, date, prior, this, next = self.buildReviewKeys(start)
if keys:
priorState = nextState = ""
--- 1006,1024 ----
start = 0
! # Build the lists of messages: spams, hams and unsure.
keys, date, prior, this, next = self.buildReviewKeys(start)
+ keyedMessages = {options.header_spam_string: [],
+ options.header_ham_string: [],
+ options.header_unsure_string: []}
+ for key in keys:
+ # Parse the message and get the judgement header.
+ cachedMessage = state.unknownCorpus[key]
+ message = mboxutils.get_message(cachedMessage.getSubstance())
+ judgement = message[options.hammie_header_name] or \
+ options.header_unsure_string
+ keyedMessages[judgement].append((key, message))
+
+ # Present the list of messages in their groups in reverse order of
+ # appearance.
if keys:
priorState = nextState = ""
***************
*** 969,996 ****
nextState = 'disabled'
lines = [self.reviewHeader % (prior, next, priorState, nextState)]
! stripe = 0
! for key in keys:
! # Parse the message and get the relevant headers.
! cachedMessage = state.unknownCorpus[key]
! message = mboxutils.get_message(cachedMessage.getSubstance())
! subject = self.trimAndQuote(message["Subject"] or "(none)", 50)
! from_ = self.trimAndQuote(message["From"] or "(none)", 40)
- # Output the table row for this message.
- key = cachedMessage.key()
- radioGroup = trainRadio % (key, key, key)
- stripeClass = ['stripe_on', 'stripe_off'][stripe]
- lines.append("""<tr class='%s'><td>%s</td><td>%s</td>
- <td align='middle'>%s</td></tr>""" % \
- (stripeClass, subject, from_, radioGroup))
- stripe = stripe ^ 1
lines.append("""<tr><td></td><td></td><td align='middle'> <br>
<input type='submit' value='Train'></td></tr>""")
lines.append("</table></form>")
content = "\n".join(lines)
! title = "Unclassified messages received on %s" % date
else:
! content = "<p>There are no unclassified messages to display.</p>"
! title = "No unclassified messages"
self.push(self.pageSection % (title, content))
--- 1028,1047 ----
nextState = 'disabled'
lines = [self.reviewHeader % (prior, next, priorState, nextState)]
! for header, type in ((options.header_spam_string, 'Spam'),
! (options.header_ham_string, 'Ham'),
! (options.header_unsure_string, 'Unsure')):
! if keyedMessages[header]:
! lines.append("<tr><td> </td><td></td><td></td></tr>")
! lines.append(self.reviewSubheader % type)
! self.appendMessages(lines, keyedMessages[header], header)
lines.append("""<tr><td></td><td></td><td align='middle'> <br>
<input type='submit' value='Train'></td></tr>""")
lines.append("</table></form>")
content = "\n".join(lines)
! title = "Untrained messages received on %s" % date
else:
! content = "<p>There are no untrained messages to display.</p>"
! title = "No untrained messages"
self.push(self.pageSection % (title, content))
***************
*** 1047,1054 ****
self.logFile = open('_pop3proxy.log', 'wb', 0)
! # Load up the default settings from Option.py / bayescustomize.ini
! self.proxyPort = options.pop3proxy_port
! self.serverName = options.pop3proxy_server_name
! self.serverPort = options.pop3proxy_server_port
self.databaseFilename = options.persistent_storage_file
self.useDB = options.persistent_use_database
--- 1098,1134 ----
self.logFile = open('_pop3proxy.log', 'wb', 0)
! # Load up the old proxy settings from Options.py / bayescustomize.ini
! # and give warnings if they're present. XXX Remove these soon.
! if options.pop3proxy_port != 110 or \
! options.pop3proxy_server_name != '' or \
! options.pop3proxy_server_port != 110:
! print "\n pop3proxy_port, pop3proxy_server_name and"
! print " pop3proxy_server_port are deprecated! Please use"
! print " pop3proxy_servers and pop3proxy_ports instead.\n"
! self.servers = [(options.pop3proxy_server_name,
! options.pop3proxy_server_port)]
! self.proxyPorts = [options.pop3proxy_port]
!
! # Load the new proxy settings - these will override the old ones
! # if both are present.
! if options.pop3proxy_servers:
! self.servers = []
! for server in options.pop3proxy_servers.split(','):
! server = server.strip()
! if server.find(':') > -1:
! server, port = server.split(':', 1)
! else:
! port = '110'
! self.servers.append((server, int(port)))
!
! if options.pop3proxy_ports:
! splitPorts = options.pop3proxy_ports.split(',')
! self.proxyPorts = map(int, map(string.strip, splitPorts))
!
! if len(self.servers) != len(self.proxyPorts):
! print "pop3proxy_servers & pop3proxy_ports are different lengths!"
! sys.exit()
!
! # Load up the other settings from Option.py / bayescustomize.ini
self.databaseFilename = options.persistent_storage_file
self.useDB = options.persistent_use_database
***************
*** 1074,1077 ****
--- 1154,1164 ----
self.uniquifier = 2
+ def buildServerStrings(self):
+ """After the server details have been set up, this creates string
+ versions of the details, for display in the Status panel."""
+ serverStrings = ["%s:%s" % (s, p) for s, p in self.servers]
+ self.serversString = ', '.join(serverStrings)
+ self.proxyPortsString = ', '.join(map(str, self.proxyPorts))
+
def createWorkers(self):
"""Using the options that were initialised in __init__ and then
***************
*** 1117,1125 ****
! def main(serverName, serverPort, proxyPort,
! uiPort, launchUI, databaseFilename, useDB):
"""Runs the proxy forever or until a 'KILL' command is received or
someone hits Ctrl+Break."""
! BayesProxyListener(serverName, serverPort, proxyPort)
UserInterfaceListener(uiPort)
if launchUI:
--- 1204,1212 ----
! def main(servers, proxyPorts, uiPort, launchUI):
"""Runs the proxy forever or until a 'KILL' command is received or
someone hits Ctrl+Break."""
! for (server, serverPort), proxyPort in zip(servers, proxyPorts):
! BayesProxyListener(server, serverPort, proxyPort)
UserInterfaceListener(uiPort)
if launchUI:
***************
*** 1382,1386 ****
state.databaseFilename = arg
elif opt == '-l':
! state.proxyPort = int(arg)
elif opt == '-u':
state.uiPort = int(arg)
--- 1469,1473 ----
state.databaseFilename = arg
elif opt == '-l':
! state.proxyPorts = [int(arg)]
elif opt == '-u':
state.uiPort = int(arg)
***************
*** 1393,1396 ****
--- 1480,1484 ----
if runSelfTest:
print "\nRunning self-test...\n"
+ state.buildServerStrings()
test()
print "Self-test passed." # ...else it would have asserted.
***************
*** 1403,1420 ****
elif 0 <= len(args) <= 2:
# Normal usage, with optional server name and port number.
! if len(args) >= 1:
! state.serverName = args[0]
! if len(args) >= 2:
! state.serverPort = int(args[1])
! if not state.serverName:
print >>sys.stderr, \
("Error: You must give a POP3 server name, either in\n"
! "bayescustomize.ini as pop3proxy_server_name or on the\n"
"command line. pop3server.py -h prints a usage message.")
else:
! main(state.serverName, state.serverPort, state.proxyPort,
! state.uiPort, state.launchUI, state.databaseFilename,
! state.useDB)
else:
--- 1491,1507 ----
elif 0 <= len(args) <= 2:
# Normal usage, with optional server name and port number.
! if len(args) == 1:
! state.servers = [(args[0], 110)]
! elif len(args) == 2:
! state.servers = [(args[0], int(args[1]))]
! if not state.servers or not state.servers[0][0]:
print >>sys.stderr, \
("Error: You must give a POP3 server name, either in\n"
! "bayescustomize.ini as pop3proxy_servers or on the\n"
"command line. pop3server.py -h prints a usage message.")
else:
! state.buildServerStrings()
! main(state.servers, state.proxyPorts, state.uiPort, state.launchUI)
else:
More information about the Spambayes-checkins
mailing list