[Spambayes-checkins]
spambayes Bayes.py,1.5.2.4,1.5.2.5 classifier.py,1.53.2.2,1.53.2.3
hammie.py,1.40.2.1,1.40.2.2 hammiefilter.py,1.2.2.2,1.2.2.3
Neale Pickett
npickett@users.sourceforge.net
Thu Nov 21 04:27:30 2002
- Previous message: [Spambayes-checkins]
spambayes Bayes.py,1.5.2.3,1.5.2.4 Options.py,1.72.2.3,1.72.2.4
classifier.py,1.53.2.1,1.53.2.2 hammiefilter.py,1.2.2.1,1.2.2.2
- Next message: [Spambayes-checkins] spambayes hammiecli.py,1.2,1.3
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv27763
Modified Files:
Tag: hammie-playground
Bayes.py classifier.py hammie.py hammiefilter.py
Log Message:
* A few more MetaInfo class-related changes which I somehow
overlooked. hammiefilter will need to start with a new database.
Index: Bayes.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Bayes.py,v
retrieving revision 1.5.2.4
retrieving revision 1.5.2.5
diff -C2 -d -r1.5.2.4 -r1.5.2.5
*** Bayes.py 21 Nov 2002 04:16:36 -0000 1.5.2.4
--- Bayes.py 21 Nov 2002 04:27:26 -0000 1.5.2.5
***************
*** 224,238 ****
if self.wordinfo.has_key(self.statekey):
!
! self.nham, self.nspam = self.wordinfo[self.statekey]
if Corpus.Verbose:
! print '%s is an existing DBDict, with %d ham and %d spam' \
! % (self.db_name, self.nham, self.nspam)
else:
# new dbdict
if Corpus.Verbose:
print self.db_name,'is a new DBDict'
- self.nham = 0
- self.nspam = 0
def store(self):
--- 224,235 ----
if self.wordinfo.has_key(self.statekey):
! self.meta = self.wordinfo[self.statekey]
if Corpus.Verbose:
! print '%s is an existing DBDict' \
! % (self.db_name)
else:
# new dbdict
if Corpus.Verbose:
print self.db_name,'is a new DBDict'
def store(self):
***************
*** 242,246 ****
print 'Persisting',self.db_name,'state in DBDict'
! self.wordinfo[self.statekey] = (self.nham, self.nspam)
self.wordinfo.sync()
--- 239,243 ----
print 'Persisting',self.db_name,'state in DBDict'
! self.wordinfo[self.statekey] = self.meta
self.wordinfo.sync()
Index: classifier.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/classifier.py,v
retrieving revision 1.53.2.2
retrieving revision 1.53.2.3
diff -C2 -d -r1.53.2.2 -r1.53.2.3
*** classifier.py 21 Nov 2002 04:16:36 -0000 1.53.2.2
--- classifier.py 21 Nov 2002 04:27:27 -0000 1.53.2.3
***************
*** 57,63 ****
"""
def __init__(self):
! self._nham = 0
! self._nspam = 0
! self.revision = 0
def __repr__(self):
--- 57,61 ----
"""
def __init__(self):
! self.__setstate__((0, 0))
def __repr__(self):
***************
*** 71,74 ****
--- 69,73 ----
def __setstate__(self, t):
(self._nham, self._nspam) = t
+ self.revision = 0
def nham(self):
***************
*** 380,384 ****
spamprob = chi2_spamprob
! def learn(self, wordstream, is_spam, update_word_probabilities=True):
"""Teach the classifier by example.
--- 379,383 ----
spamprob = chi2_spamprob
! def learn(self, wordstream, is_spam):
"""Teach the classifier by example.
***************
*** 397,403 ****
"""
! self._add_msg(wordstream, is_spam, update_word_probabilities)
! def unlearn(self, wordstream, is_spam, update_word_probabilities=True):
"""In case of pilot error, call unlearn ASAP after screwing up.
--- 396,402 ----
"""
! self._add_msg(wordstream, is_spam)
! def unlearn(self, wordstream, is_spam):
"""In case of pilot error, call unlearn ASAP after screwing up.
***************
*** 405,409 ****
"""
! self._remove_msg(wordstream, is_spam, update_word_probabilities)
def update_probabilities(self):
--- 404,408 ----
"""
! self._remove_msg(wordstream, is_spam)
def update_probabilities(self):
***************
*** 443,447 ****
# appears in a msg, but distorting spamprob doesn't appear a correct way
# to exploit it.
! def _add_msg(self, wordstream, is_spam, update_word_probabilities):
if is_spam:
self.meta.incr_spam()
--- 442,446 ----
# appears in a msg, but distorting spamprob doesn't appear a correct way
# to exploit it.
! def _add_msg(self, wordstream, is_spam):
if is_spam:
self.meta.incr_spam()
***************
*** 461,472 ****
record.hamcount += 1
! if update_word_probabilities:
! self.update_word_probability(word, record)
! else:
! # Needed to tell a persistent DB that the content changed.
! wordinfo[word] = record
! def _remove_msg(self, wordstream, is_spam, update_word_probabilities):
if is_spam:
if self.meta.nspam() <= 0:
--- 460,468 ----
record.hamcount += 1
! # Needed to tell a persistent DB that the content changed.
! wordinfo[word] = record
! def _remove_msg(self, wordstream, is_spam):
if is_spam:
if self.meta.nspam() <= 0:
***************
*** 491,496 ****
if record.hamcount == 0 == record.spamcount:
del wordinfo[word]
- elif update_word_probabilities:
- update_word_probability(word, record)
else:
# Needed to tell a persistent DB that the content
--- 487,490 ----
Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammie.py,v
retrieving revision 1.40.2.1
retrieving revision 1.40.2.2
diff -C2 -d -r1.40.2.1 -r1.40.2.2
*** hammie.py 19 Nov 2002 23:45:24 -0000 1.40.2.1
--- hammie.py 21 Nov 2002 04:27:27 -0000 1.40.2.2
***************
*** 136,140 ****
"""
! self.bayes.learn(tokenize(msg), is_spam, False)
def train_ham(self, msg):
--- 136,140 ----
"""
! self.bayes.learn(tokenize(msg), is_spam)
def train_ham(self, msg):
***************
*** 161,180 ****
self.train(msg, True)
-
- def update_probabilities(self, store=True):
- """Update probability values.
-
- You would want to call this after a training session. It's
- pretty slow, so if you have a lot of messages to train, wait
- until you're all done before calling this.
-
- Unless store is false, the peristent store will be written after
- updating probabilities.
-
- """
-
- self.bayes.update_probabilities()
- if store:
- self.store()
def store(self):
--- 161,164 ----
Index: hammiefilter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammiefilter.py,v
retrieving revision 1.2.2.2
retrieving revision 1.2.2.3
diff -C2 -d -r1.2.2.2 -r1.2.2.3
*** hammiefilter.py 21 Nov 2002 04:16:36 -0000 1.2.2.2
--- hammiefilter.py 21 Nov 2002 04:27:27 -0000 1.2.2.3
***************
*** 77,81 ****
msg = sys.stdin.read()
h.train_ham(msg)
- h.update_probabilities()
h.store()
--- 77,80 ----
***************
*** 84,88 ****
msg = sys.stdin.read()
h.train_spam(msg)
- h.update_probabilities()
h.store()
--- 83,86 ----
- Previous message: [Spambayes-checkins]
spambayes Bayes.py,1.5.2.3,1.5.2.4 Options.py,1.72.2.3,1.72.2.4
classifier.py,1.53.2.1,1.53.2.2 hammiefilter.py,1.2.2.1,1.2.2.2
- Next message: [Spambayes-checkins] spambayes hammiecli.py,1.2,1.3
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the Spambayes-checkins
mailing list