[Spambayes-checkins] spambayes/spambayes storage.py,1.17,1.18
Tony Meyer
anadelonbrin at users.sourceforge.net
Wed Aug 6 23:04:11 EDT 2003
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv26785/spambayes
Modified Files:
storage.py
Log Message:
Add a MySQLdb based classifier.
Index: storage.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/storage.py,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** storage.py 7 Aug 2003 01:39:12 -0000 1.17
--- storage.py 7 Aug 2003 05:04:09 -0000 1.18
***************
*** 6,9 ****
--- 6,11 ----
PickledClassifier - Classifier that uses a pickle db
DBDictClassifier - Classifier that uses a shelve db
+ PGClassifier - Classifier that uses postgres
+ mySQLClassifier - Classifier that uses mySQL
Trainer - Classifier training observer
SpamTrainer - Trainer for spam
***************
*** 13,16 ****
--- 15,20 ----
*Classifier are subclasses of Classifier (classifier.Classifier)
that add automatic state store/restore function to the Classifier class.
+ All SQL based classifiers are subclasses of SQLClassifier, which is a
+ subclass of Classifier.
PickledClassifier is a Classifier class that uses a cPickle
***************
*** 412,415 ****
--- 416,502 ----
self.nspam = 0
self.nham = 0
+
+
+ class mySQLClassifier(SQLClassifier):
+ '''Classifier object persisted in a mySQL database
+
+ It is assumed that the database already exists, and that the mySQL
+ server is currently running.'''
+
+ def __init__(self, data_source_name):
+ self.table_definition = ("create table bayes ("
+ " word varchar(255) not null default '',"
+ " nspam integer not null default 0,"
+ " nham integer not null default 0,"
+ " primary key(word)"
+ ");")
+ self.host = "localhost"
+ self.username = "root"
+ self.password = ""
+ db_name = "spambayes"
+ source_info = data_source_name.split()
+ for info in source_info:
+ if info.startswith("host"):
+ self.host = info[5:]
+ elif info.startswith("user"):
+ self.username = info[5:]
+ elif info.startswith("pass"):
+ self.username = info[5:]
+ elif info.startswith("dbname"):
+ db_name = info[7:]
+ SQLClassifier.__init__(self, db_name)
+
+ def cursor(self):
+ return self.db.cursor()
+
+ def fetchall(self, c):
+ return c.fetchall()
+
+ def commit(self, c):
+ self.db.commit()
+
+ def load(self):
+ '''Load state from database'''
+
+ import MySQLdb
+
+ if options.verbose:
+ print 'Loading state from',self.db_name,'database'
+
+ self.db = MySQLdb.connect(host=self.host, db=self.db_name,
+ user=self.username, passwd=self.password)
+
+ c = self.cursor()
+ try:
+ c.execute("select count(*) from bayes")
+ except MySQLdb.ProgrammingError:
+ self.db.rollback()
+ self.create_bayes()
+
+ if self._has_key(self.statekey):
+ row = self._get_row(self.statekey)
+ self.nspam = int(row[1])
+ self.nham = int(row[2])
+ if options.verbose:
+ print '%s is an existing database, with %d spam and %d ham' \
+ % (self.db_name, self.nspam, self.nham)
+ else:
+ # new database
+ if options.verbose:
+ print self.db_name,'is a new database'
+ self.nspam = 0
+ self.nham = 0
+
+ def _wordinfoget(self, word):
+ if isinstance(word, unicode):
+ word = word.encode("utf-8")
+
+ row = self._get_row(word)
+ if row:
+ item = self.WordInfoClass()
+ item.__setstate__((row[1], row[2]))
+ return item
+ else:
+ return None
More information about the Spambayes-checkins
mailing list