[Spambayes-checkins] spambayes Bayes.py,1.5.2.4,1.5.2.5 classifier.py,1.53.2.2,1.53.2.3 hammie.py,1.40.2.1,1.40.2.2 hammiefilter.py,1.2.2.2,1.2.2.3

Neale Pickett npickett@users.sourceforge.net
Thu Nov 21 04:27:30 2002


Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv27763

Modified Files:
      Tag: hammie-playground
	Bayes.py classifier.py hammie.py hammiefilter.py 
Log Message:
* A few more MetaInfo class-related changes which I somehow
  overlooked.  hammiefilter will need to start with a new database.


Index: Bayes.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Bayes.py,v
retrieving revision 1.5.2.4
retrieving revision 1.5.2.5
diff -C2 -d -r1.5.2.4 -r1.5.2.5
*** Bayes.py	21 Nov 2002 04:16:36 -0000	1.5.2.4
--- Bayes.py	21 Nov 2002 04:27:26 -0000	1.5.2.5
***************
*** 224,238 ****
  
          if self.wordinfo.has_key(self.statekey):
! 
!             self.nham, self.nspam = self.wordinfo[self.statekey]
              if Corpus.Verbose:
!                 print '%s is an existing DBDict, with %d ham and %d spam' \
!                       % (self.db_name, self.nham, self.nspam)
          else:
              # new dbdict
              if Corpus.Verbose:
                  print self.db_name,'is a new DBDict'
-             self.nham = 0
-             self.nspam = 0
  
      def store(self):
--- 224,235 ----
  
          if self.wordinfo.has_key(self.statekey):
!             self.meta = self.wordinfo[self.statekey]
              if Corpus.Verbose:
!                 print '%s is an existing DBDict' \
!                       % (self.db_name)
          else:
              # new dbdict
              if Corpus.Verbose:
                  print self.db_name,'is a new DBDict'
  
      def store(self):
***************
*** 242,246 ****
              print 'Persisting',self.db_name,'state in DBDict'
  
!         self.wordinfo[self.statekey] = (self.nham, self.nspam)
          self.wordinfo.sync()
  
--- 239,243 ----
              print 'Persisting',self.db_name,'state in DBDict'
  
!         self.wordinfo[self.statekey] = self.meta
          self.wordinfo.sync()
  

Index: classifier.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/classifier.py,v
retrieving revision 1.53.2.2
retrieving revision 1.53.2.3
diff -C2 -d -r1.53.2.2 -r1.53.2.3
*** classifier.py	21 Nov 2002 04:16:36 -0000	1.53.2.2
--- classifier.py	21 Nov 2002 04:27:27 -0000	1.53.2.3
***************
*** 57,63 ****
      """
      def __init__(self):
!         self._nham = 0
!         self._nspam = 0
!         self.revision = 0
  
      def __repr__(self):
--- 57,61 ----
      """
      def __init__(self):
!         self.__setstate__((0, 0))
  
      def __repr__(self):
***************
*** 71,74 ****
--- 69,73 ----
      def __setstate__(self, t):
          (self._nham, self._nspam) = t
+         self.revision = 0
  
      def nham(self):
***************
*** 380,384 ****
          spamprob = chi2_spamprob
  
!     def learn(self, wordstream, is_spam, update_word_probabilities=True):
          """Teach the classifier by example.
  
--- 379,383 ----
          spamprob = chi2_spamprob
  
!     def learn(self, wordstream, is_spam):
          """Teach the classifier by example.
  
***************
*** 397,403 ****
          """
  
!         self._add_msg(wordstream, is_spam, update_word_probabilities)
  
!     def unlearn(self, wordstream, is_spam, update_word_probabilities=True):
          """In case of pilot error, call unlearn ASAP after screwing up.
  
--- 396,402 ----
          """
  
!         self._add_msg(wordstream, is_spam)
  
!     def unlearn(self, wordstream, is_spam):
          """In case of pilot error, call unlearn ASAP after screwing up.
  
***************
*** 405,409 ****
          """
  
!         self._remove_msg(wordstream, is_spam, update_word_probabilities)
  
      def update_probabilities(self):
--- 404,408 ----
          """
  
!         self._remove_msg(wordstream, is_spam)
  
      def update_probabilities(self):
***************
*** 443,447 ****
      # appears in a msg, but distorting spamprob doesn't appear a correct way
      # to exploit it.
!     def _add_msg(self, wordstream, is_spam, update_word_probabilities):
          if is_spam:
              self.meta.incr_spam()
--- 442,446 ----
      # appears in a msg, but distorting spamprob doesn't appear a correct way
      # to exploit it.
!     def _add_msg(self, wordstream, is_spam):
          if is_spam:
              self.meta.incr_spam()
***************
*** 461,472 ****
                  record.hamcount += 1
                  
!             if update_word_probabilities:
!                 self.update_word_probability(word, record)
!             else:
!                 # Needed to tell a persistent DB that the content changed.
!                 wordinfo[word] = record
  
  
!     def _remove_msg(self, wordstream, is_spam, update_word_probabilities):
          if is_spam:
              if self.meta.nspam() <= 0:
--- 460,468 ----
                  record.hamcount += 1
                  
!             # Needed to tell a persistent DB that the content changed.
!             wordinfo[word] = record
  
  
!     def _remove_msg(self, wordstream, is_spam):
          if is_spam:
              if self.meta.nspam() <= 0:
***************
*** 491,496 ****
                  if record.hamcount == 0 == record.spamcount:
                      del wordinfo[word]
-                 elif update_word_probabilities:
-                     update_word_probability(word, record)
                  else:
                      # Needed to tell a persistent DB that the content
--- 487,490 ----

Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammie.py,v
retrieving revision 1.40.2.1
retrieving revision 1.40.2.2
diff -C2 -d -r1.40.2.1 -r1.40.2.2
*** hammie.py	19 Nov 2002 23:45:24 -0000	1.40.2.1
--- hammie.py	21 Nov 2002 04:27:27 -0000	1.40.2.2
***************
*** 136,140 ****
          """
  
!         self.bayes.learn(tokenize(msg), is_spam, False)
  
      def train_ham(self, msg):
--- 136,140 ----
          """
  
!         self.bayes.learn(tokenize(msg), is_spam)
  
      def train_ham(self, msg):
***************
*** 161,180 ****
  
          self.train(msg, True)
- 
-     def update_probabilities(self, store=True):
-         """Update probability values.
- 
-         You would want to call this after a training session.  It's
-         pretty slow, so if you have a lot of messages to train, wait
-         until you're all done before calling this.
- 
-         Unless store is false, the peristent store will be written after
-         updating probabilities.
- 
-         """
- 
-         self.bayes.update_probabilities()
-         if store:
-             self.store()
  
      def store(self):
--- 161,164 ----

Index: hammiefilter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammiefilter.py,v
retrieving revision 1.2.2.2
retrieving revision 1.2.2.3
diff -C2 -d -r1.2.2.2 -r1.2.2.3
*** hammiefilter.py	21 Nov 2002 04:16:36 -0000	1.2.2.2
--- hammiefilter.py	21 Nov 2002 04:27:27 -0000	1.2.2.3
***************
*** 77,81 ****
          msg = sys.stdin.read()
          h.train_ham(msg)
-         h.update_probabilities()
          h.store()
  
--- 77,80 ----
***************
*** 84,88 ****
          msg = sys.stdin.read()
          h.train_spam(msg)
-         h.update_probabilities()
          h.store()
  
--- 83,86 ----





More information about the Spambayes-checkins mailing list