[Python-checkins] r56571 - in tracker/instances: python-dev-spambayes-integration/detectors/spambayes.py python-dev-spambayes-integration/extensions/spambayes.py spambayes_integration spambayes_integration/detectors spambayes_integration/detectors/spambayes.py spambayes_integration/extensions spambayes_integration/extensions/spambayes.py
erik.forsberg
python-checkins at python.org
Fri Jul 27 15:39:27 CEST 2007
Author: erik.forsberg
Date: Fri Jul 27 15:39:27 2007
New Revision: 56571
Added:
tracker/instances/spambayes_integration/
tracker/instances/spambayes_integration/detectors/
tracker/instances/spambayes_integration/detectors/spambayes.py
- copied unchanged from r56565, tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py
tracker/instances/spambayes_integration/extensions/
tracker/instances/spambayes_integration/extensions/spambayes.py
- copied unchanged from r56568, tracker/instances/python-dev-spambayes-integration/extensions/spambayes.py
Removed:
tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py
tracker/instances/python-dev-spambayes-integration/extensions/spambayes.py
Log:
Moving spambayes integration into its own directory for easier use by multiple trackers
Deleted: /tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py
==============================================================================
--- /tracker/instances/python-dev-spambayes-integration/detectors/spambayes.py Fri Jul 27 15:39:27 2007
+++ (empty file)
@@ -1,80 +0,0 @@
-"""
-spamcheck.py - Auditor that consults a SpamBayes server and scores all form
-submissions. Submissions which are deemed to be spam are rejected. For the
-time being only reject submissions which are assumed to be spam (score >=
-SPAM_CUTOFF). Once a reasonable body of ham and spam submissions have been
-built up you can consider whether to also reject unsure submissions (score >
-HAM_CUTOFF). The current settings make it less likely that you'll reject
-valid submissions at the expense of manual checks to correct spammy items
-which snuck by the screen.
-"""
-
-import xmlrpclib
-import socket
-import time
-
-from roundup.exceptions import Reject
-
-def extract_classinfo(db, klass, nodeid, newvalues):
- if None == nodeid:
- node = newvalues
- content = newvalues['content']
- else:
- node = db.getnode(klass.classname, nodeid)
- content = klass.get(nodeid, 'content')
-
- if node.has_key('creation') or node.has_key('date'):
- nodets = node.get('creation', node.get('date')).timestamp()
- else:
- nodets = time.time()
-
- if node.has_key('author') or node.has_key('creator'):
- authorid = node.get('author', node.get('creator'))
- else:
- authorid = db.getuid()
-
- authorage = nodets - db.getnode('user', authorid)['creation'].timestamp()
-
- tokens = ["klass:%s" % klass.classname,
- "author:%s" % authorid,
- "authorage:%d" % int(authorage)]
-
-
- return (content, tokens)
-
-def check_spambayes(db, content, tokens):
- spambayes_uri = db.config.detectors['SPAMBAYES_URI']
- server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
-
- try:
- prob = server.score({'content':content}, tokens, {})
- return (True, prob)
- except (socket.error, xmlrpclib.Error), e:
- return (False, str(e))
-
-
-def check_spam(db, klass, nodeid, newvalues):
- """Auditor to score a website submission."""
-
-
- if newvalues.has_key('spambayes_score'):
- if not "coordinator" in [x.lower().strip() for x in db.user.get(db.getuid(), 'roles').split(",")]:
- raise ValueError, "Only Coordinators may explicitly assign spambayes_score"
- # Don't do anything if we're explicitly setting the score
- return
-
- (content, tokens) = extract_classinfo(db, klass, nodeid, newvalues)
- (success, other) = check_spambayes(db, content, tokens)
- if success:
- newvalues['spambayes_score'] = other
- newvalues['spambayes_misclassified'] = False
- else:
- newvalues['spambayes_score'] = -1
- newvalues['spambayes_misclassified'] = True
-
-def init(database):
- """Initialize auditor."""
- database.msg.audit('create', check_spam)
- database.msg.audit('set', check_spam)
- database.file.audit('create', check_spam)
- database.file.audit('set', check_spam)
Deleted: /tracker/instances/python-dev-spambayes-integration/extensions/spambayes.py
==============================================================================
--- /tracker/instances/python-dev-spambayes-integration/extensions/spambayes.py Fri Jul 27 15:39:27 2007
+++ (empty file)
@@ -1,111 +0,0 @@
-from roundup.cgi.actions import Action
-from roundup.cgi.exceptions import *
-
-import xmlrpclib, socket
-
-def extract_classinfo(db, classname, nodeid):
- node = db.getnode(classname, nodeid)
-
- authorage = node['creation'].timestamp() - \
- db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp()
-
- tokens = ["klass:%s" % classname,
- "author:%s" % node.get('author', node.get('creator')),
- "authorage:%d" % int(authorage)]
-
- klass = db.getclass(classname)
- return (klass.get(nodeid, 'content'), tokens)
-
-def train_spambayes(db, content, tokens, is_spam):
- spambayes_uri = db.config.detectors['SPAMBAYES_URI']
-
- server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
- try:
- server.train({'content':content}, tokens, {}, is_spam)
- return (True, None)
- except (socket.error, xmlrpclib.Error), e:
- return (False, str(e))
-
-
-class SpambayesClassify(Action):
- def handle(self):
- (content, tokens) = extract_classinfo(self.db,
- self.classname, self.nodeid)
-
- if self.form.has_key("trainspam"):
- is_spam = True
- elif self.form.has_key("trainham"):
- is_spam = False
-
- (status, errmsg) = train_spambayes(self.db, content, tokens,
- is_spam)
-
- node = self.db.getnode(self.classname, self.nodeid)
- props = {}
-
- if status:
- if node.get('spambayes_misclassified', False):
- props['spambayes_misclassified':True]
-
- props['spambayes_score'] = 1.0
-
- s = " SPAM"
- if not is_spam:
- props['spambayes_score'] = 0.0
- s = " HAM"
- self.client.ok_message.append(self._('Message classified as') + s)
- else:
- self.client.error_message.append(self._('Unable to classify message, got error:') + errmsg)
-
- klass = self.db.getclass(self.classname)
- klass.set(self.nodeid, **props)
- self.db.commit()
-
- def permission(self):
- roles = set(self.db.user.get(self.userid, 'roles').lower().split(","))
- allowed = set(self.db.config.detectors['SPAMBAYES_MAY_CLASSIFY'].lower().split(","))
-
- if not bool(roles.intersection(allowed)):
- raise Unauthorised("You do not have permission to train spambayes")
- Action.permission(self)
-
-
-def sb_is_spam(obj):
- cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
- try:
- score = obj['spambayes_score']
- except KeyError:
- return False
- return score >= cutoff_score
-
-def sb_is_view_ok(obj):
- if not sb_is_spam(obj):
- return True
- roles = set(obj._db.user.get(obj._client.userid,
- 'roles').lower().split(","))
- allowed = set(obj._db.config.detectors['SPAMBAYES_MAY_VIEW_SPAM'].lower().split(","))
-
- return bool(roles.intersection(allowed))
-
-def sb_may_report_misclassified(obj):
- roles = set(obj._db.user.get(obj._client.userid,
- 'roles').lower().split(","))
- allowed = set(obj._db.config.detectors['SPAMBAYES_MAY_REPORT_MISCLASSIFIED'].lower().split(","))
-
- return bool(roles.intersection(allowed))
-
-def sb_may_classify(obj):
- roles = set(obj._db.user.get(obj._client.userid,
- 'roles').lower().split(","))
- allowed = set(obj._db.config.detectors['SPAMBAYES_MAY_CLASSIFY'].lower().split(","))
-
- return bool(roles.intersection(allowed))
-
-def init(instance):
- instance.registerAction("spambayes_classify", SpambayesClassify)
- instance.registerUtil('sb_is_spam', sb_is_spam)
- instance.registerUtil('sb_is_view_ok', sb_is_view_ok)
- instance.registerUtil('sb_may_report_misclassified',
- sb_may_report_misclassified)
- instance.registerUtil('sb_may_classify', sb_may_classify)
-
More information about the Python-checkins
mailing list