[Python-checkins] r56565 - tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py

erik.forsberg python-checkins at python.org
Fri Jul 27 12:05:31 CEST 2007


Author: erik.forsberg
Date: Fri Jul 27 12:05:30 2007
New Revision: 56565

Added:
   tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py
      - copied, changed from r56540, tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py
Removed:
   tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py
Log:

Renamed spamcheck.py to spambayes.py.

Modified to new antispam functionality - instead of simply rejecting,
set a spam score on all msg and file instances.


Copied: tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py (from r56540, tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py)
==============================================================================
--- tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py	(original)
+++ tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py	Fri Jul 27 12:05:30 2007
@@ -11,28 +11,70 @@
 
 import xmlrpclib
 import socket
+import time
 
 from roundup.exceptions import Reject
 
-def check_spam(_db, _klass, _nodeid, newvalues):
-    """Auditor to score a website submission."""
+def extract_classinfo(db, klass, nodeid, newvalues):
+    if None == nodeid:
+        node = newvalues
+        content = newvalues['content']
+    else:
+        node = db.getnode(klass.classname, nodeid)
+        content = klass.get(nodeid, 'content')
+
+    if node.has_key('creation') or node.has_key('date'):
+        nodets = node.get('creation', node.get('date')).timestamp()
+    else:
+        nodets = time.time()
+
+    if node.has_key('author') or node.has_key('creator'):
+        authorid = node.get('author', node.get('creator'))
+    else:
+        authorid = db.getuid()
+
+    authorage = nodets - db.getnode('user', authorid)['creation'].timestamp()
 
-    spambayes_uri = _db.config.detectors['SPAMBAYES_URI']
-    spam_cutoff = float(_db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
+    tokens = ["klass:%s" % klass.classname,
+              "author:%s" % authorid,
+              "authorage:%d" % int(authorage)]
 
 
+    return (content, tokens)
+
+def check_spambayes(db, content, tokens):
+    spambayes_uri = db.config.detectors['SPAMBAYES_URI']
     server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
+
     try:
-        prob = server.score(newvalues, [], {})
+        prob = server.score({'content':content}, tokens, {})
+        return (True, prob)
     except (socket.error, xmlrpclib.Error), e:
-        pass
+        return (False, str(e))
+    
+
+def check_spam(db, klass, nodeid, newvalues):
+    """Auditor to score a website submission."""
+
+
+    if newvalues.has_key('spambayes_score'):
+        if not "coordinator" in [x.lower().strip() for x in db.user.get(db.getuid(), 'roles').split(",")]:
+            raise ValueError, "Only Coordinators may explicitly assign spambayes_score"
+        # Don't do anything if we're explicitly setting the score        
+        return
+
+    (content, tokens) = extract_classinfo(db, klass, nodeid, newvalues)
+    (success, other) = check_spambayes(db, content, tokens)
+    if success:
+        newvalues['spambayes_score'] = other
+        newvalues['spambayes_misclassified'] = False
     else:
-        if prob >= spam_cutoff:
-            raise Reject("Looks like spam to me - prob=%.3f" % prob)
+        newvalues['spambayes_score'] = -1
+        newvalues['spambayes_misclassified'] = True
 
 def init(database):
     """Initialize auditor."""
-    database.issue.audit('create', check_spam)
-    database.issue.audit('set', check_spam)
+    database.msg.audit('create', check_spam)
+    database.msg.audit('set', check_spam)
     database.file.audit('create', check_spam)
     database.file.audit('set', check_spam)

Deleted: /tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py
==============================================================================
--- /tracker/instances/python-dev-spambayes-integration/detectors/spamcheck.py	Fri Jul 27 12:05:30 2007
+++ (empty file)
@@ -1,38 +0,0 @@
-"""
-spamcheck.py - Auditor that consults a SpamBayes server and scores all form
-submissions.  Submissions which are deemed to be spam are rejected.  For the
-time being only reject submissions which are assumed to be spam (score >=
-SPAM_CUTOFF).  Once a reasonable body of ham and spam submissions have been
-built up you can consider whether to also reject unsure submissions (score >
-HAM_CUTOFF).  The current settings make it less likely that you'll reject
-valid submissions at the expense of manual checks to correct spammy items
-which snuck by the screen.
-"""
-
-import xmlrpclib
-import socket
-
-from roundup.exceptions import Reject
-
-def check_spam(_db, _klass, _nodeid, newvalues):
-    """Auditor to score a website submission."""
-
-    spambayes_uri = _db.config.detectors['SPAMBAYES_URI']
-    spam_cutoff = float(_db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
-
-
-    server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
-    try:
-        prob = server.score(newvalues, [], {})
-    except (socket.error, xmlrpclib.Error), e:
-        pass
-    else:
-        if prob >= spam_cutoff:
-            raise Reject("Looks like spam to me - prob=%.3f" % prob)
-
-def init(database):
-    """Initialize auditor."""
-    database.issue.audit('create', check_spam)
-    database.issue.audit('set', check_spam)
-    database.file.audit('create', check_spam)
-    database.file.audit('set', check_spam)


More information about the Python-checkins mailing list