[Python-checkins] r56565 - tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py
erik.forsberg
python-checkins at python.org
Fri Jul 27 12:05:31 CEST 2007
Author: erik.forsberg
Date: Fri Jul 27 12:05:30 2007
New Revision: 56565
Added:
tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py
- copied, changed from r56540, tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py
Removed:
tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py
Log:
Renamed spamcheck.py to spambayes.py.
Modified to new antispam functionality - instead of simply rejecting,
set a spam score on all msg and file instances.
Copied: tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py (from r56540, tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py)
==============================================================================
--- tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py (original)
+++ tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py Fri Jul 27 12:05:30 2007
@@ -11,28 +11,70 @@
import xmlrpclib
import socket
+import time
from roundup.exceptions import Reject
-def check_spam(_db, _klass, _nodeid, newvalues):
- """Auditor to score a website submission."""
+def extract_classinfo(db, klass, nodeid, newvalues):
+ if None == nodeid:
+ node = newvalues
+ content = newvalues['content']
+ else:
+ node = db.getnode(klass.classname, nodeid)
+ content = klass.get(nodeid, 'content')
+
+ if node.has_key('creation') or node.has_key('date'):
+ nodets = node.get('creation', node.get('date')).timestamp()
+ else:
+ nodets = time.time()
+
+ if node.has_key('author') or node.has_key('creator'):
+ authorid = node.get('author', node.get('creator'))
+ else:
+ authorid = db.getuid()
+
+ authorage = nodets - db.getnode('user', authorid)['creation'].timestamp()
- spambayes_uri = _db.config.detectors['SPAMBAYES_URI']
- spam_cutoff = float(_db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
+ tokens = ["klass:%s" % klass.classname,
+ "author:%s" % authorid,
+ "authorage:%d" % int(authorage)]
+ return (content, tokens)
+
+def check_spambayes(db, content, tokens):
+ spambayes_uri = db.config.detectors['SPAMBAYES_URI']
server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
+
try:
- prob = server.score(newvalues, [], {})
+ prob = server.score({'content':content}, tokens, {})
+ return (True, prob)
except (socket.error, xmlrpclib.Error), e:
- pass
+ return (False, str(e))
+
+
+def check_spam(db, klass, nodeid, newvalues):
+ """Auditor to score a website submission."""
+
+
+ if newvalues.has_key('spambayes_score'):
+ if not "coordinator" in [x.lower().strip() for x in db.user.get(db.getuid(), 'roles').split(",")]:
+ raise ValueError, "Only Coordinators may explicitly assign spambayes_score"
+ # Don't do anything if we're explicitly setting the score
+ return
+
+ (content, tokens) = extract_classinfo(db, klass, nodeid, newvalues)
+ (success, other) = check_spambayes(db, content, tokens)
+ if success:
+ newvalues['spambayes_score'] = other
+ newvalues['spambayes_misclassified'] = False
else:
- if prob >= spam_cutoff:
- raise Reject("Looks like spam to me - prob=%.3f" % prob)
+ newvalues['spambayes_score'] = -1
+ newvalues['spambayes_misclassified'] = True
def init(database):
"""Initialize auditor."""
- database.issue.audit('create', check_spam)
- database.issue.audit('set', check_spam)
+ database.msg.audit('create', check_spam)
+ database.msg.audit('set', check_spam)
database.file.audit('create', check_spam)
database.file.audit('set', check_spam)
Deleted: /tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py
==============================================================================
--- /tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py Fri Jul 27 12:05:30 2007
+++ (empty file)
@@ -1,38 +0,0 @@
-"""
-spamcheck.py - Auditor that consults a SpamBayes server and scores all form
-submissions. Submissions which are deemed to be spam are rejected. For the
-time being only reject submissions which are assumed to be spam (score >=
-SPAM_CUTOFF). Once a reasonable body of ham and spam submissions have been
-built up you can consider whether to also reject unsure submissions (score >
-HAM_CUTOFF). The current settings make it less likely that you'll reject
-valid submissions at the expense of manual checks to correct spammy items
-which snuck by the screen.
-"""
-
-import xmlrpclib
-import socket
-
-from roundup.exceptions import Reject
-
-def check_spam(_db, _klass, _nodeid, newvalues):
- """Auditor to score a website submission."""
-
- spambayes_uri = _db.config.detectors['SPAMBAYES_URI']
- spam_cutoff = float(_db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
-
-
- server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
- try:
- prob = server.score(newvalues, [], {})
- except (socket.error, xmlrpclib.Error), e:
- pass
- else:
- if prob >= spam_cutoff:
- raise Reject("Looks like spam to me - prob=%.3f" % prob)
-
-def init(database):
- """Initialize auditor."""
- database.issue.audit('create', check_spam)
- database.issue.audit('set', check_spam)
- database.file.audit('create', check_spam)
- database.file.audit('set', check_spam)
More information about the Python-checkins
mailing list