[Spambayes-checkins] spambayes/spambayes Options.py,1.7,1.8 hammie.py,1.3,1.4

Neale Pickett npickett at users.sourceforge.net
Tue Jan 21 21:23:20 EST 2003


Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv13378/spambayes

Modified Files:
	Options.py hammie.py 
Log Message:
* Fix function name in hammie.py
* Expound upon docstrings in hammie.py
* Options.py will now look for bayescustomize.ini and ~/.spambayesrc.
  Hopefully some non-Unix folks will update this with sensible defaults
  for their platforms.
* hammiefilter has a ton of new options -- check the docstring


Index: Options.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** Options.py	21 Jan 2003 14:50:26 -0000	1.7
--- Options.py	22 Jan 2003 05:23:17 -0000	1.8
***************
*** 557,559 ****
      options.mergefiles(alternate.split())
  else:
!     options.mergefiles(['bayescustomize.ini'])
--- 557,565 ----
      options.mergefiles(alternate.split())
  else:
!     alts = []
!     for path in ['bayescustomize.ini', '~/.spambayesrc']:
!         epath = os.path.expanduser(path)
!         if os.path.exists(epath):
!             alts.append(epath)
!     if alts:
!         options.mergefiles(alts)

Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/hammie.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** hammie.py	21 Jan 2003 14:50:27 -0000	1.3
--- hammie.py	22 Jan 2003 05:23:17 -0000	1.4
***************
*** 76,81 ****
          If 'train' is True, also train on the result of scoring the
          message (ie. train as ham if it's ham, train as spam if it's
!         spam).  You'll want to be very dilligent about retraining
!         mistakes if you use this.
  
          All defaults for optional parameters come from the Options file.
--- 76,82 ----
          If 'train' is True, also train on the result of scoring the
          message (ie. train as ham if it's ham, train as spam if it's
!         spam).  If the message already has a trained header, it will be
!         untrained first.  You'll want to be very dilligent about
!         retraining mistakes if you use this option.
  
          All defaults for optional parameters come from the Options file.
***************
*** 103,122 ****
          except KeyError:
              pass
          prob, clues = self._scoremsg(msg, True)
          if prob < ham_cutoff:
              is_spam = False
!             trained = options.header_ham_string
!             disp = trained
          elif prob > spam_cutoff:
              is_spam = True
!             trained = options.header_spam_string
!             disp = trained
          else:
              is_spam = False
-             trained = options.header_ham_string
              disp = options.header_unsure_string
          if train:
!             self.train(msg, is_spam)
!             msg.add_header(options.hammie_trained_header, trained)
          disp += ("; %."+str(options.header_score_digits)+"f") % prob
          if options.header_score_logarithm:
--- 104,121 ----
          except KeyError:
              pass
+         if train:
+             self.untrain_from_header(msg)
          prob, clues = self._scoremsg(msg, True)
          if prob < ham_cutoff:
              is_spam = False
!             disp = options.header_ham_string
          elif prob > spam_cutoff:
              is_spam = True
!             disp = options.header_spam_string
          else:
              is_spam = False
              disp = options.header_unsure_string
          if train:
!             self.train(msg, is_spam, True)
          disp += ("; %."+str(options.header_score_digits)+"f") % prob
          if options.header_score_logarithm:
***************
*** 129,139 ****
                  x=-math.log10(1.0-prob)
                  disp += " (%d)"%x
          msg.add_header(header, disp)
          if debug:
              disp = self.formatclues(clues)
              msg.add_header(debugheader, disp)
          return msg.as_string(unixfrom=(msg.get_unixfrom() is not None))
  
!     def train(self, msg, is_spam):
          """Train bayes with a message.
  
--- 128,140 ----
                  x=-math.log10(1.0-prob)
                  disp += " (%d)"%x
+         del msg[header]
          msg.add_header(header, disp)
          if debug:
              disp = self.formatclues(clues)
+             del msg[debugheader]
              msg.add_header(debugheader, disp)
          return msg.as_string(unixfrom=(msg.get_unixfrom() is not None))
  
!     def train(self, msg, is_spam, add_header=False):
          """Train bayes with a message.
  
***************
*** 142,148 ****
--- 143,159 ----
          is_spam should be 1 if the message is spam, 0 if not.
  
+         If add_header is True, add a header with how it was trained (in
+         case we need to untrain later)
+ 
          """
  
          self.bayes.learn(tokenize(msg), is_spam)
+         if add_header:
+             if is_spam:
+                 trained = options.header_spam_string
+             else:
+                 trained = options.header_ham_string
+             del msg[options.hammie_trained_header]
+             msg.add_header(options.hammie_trained_header, trained)
  
      def untrain(self, msg, is_spam):
***************
*** 151,155 ****
          msg can be a string, a file object, or a Message object.
  
!         is_spam should be 1 if the message is spam, 0 if not.
  
          """
--- 162,166 ----
          msg can be a string, a file object, or a Message object.
  
!         is_spam should be True if the message is spam, False if not.
  
          """
***************
*** 157,180 ****
          self.bayes.unlearn(tokenize(msg), is_spam)
  
!     def train_ham(self, msg):
          """Train bayes with ham.
  
          msg can be a string, a file object, or a Message object.
  
          """
  
!         self.train(msg, False)
  
!     def train_spam(self, msg):
          """Train bayes with spam.
  
          msg can be a string, a file object, or a Message object.
  
          """
  
!         self.train(msg, True)
  
      def untrain_ham(self, msg):
!         """Untrain bayes with ham.
  
          msg can be a string, a file object, or a Message object.
--- 168,222 ----
          self.bayes.unlearn(tokenize(msg), is_spam)
  
!     def untrain_from_header(self, msg):
!         """Untrain bayes based on X-Spambayes-Trained header.
! 
!         msg can be a string, a file object, or a Message object.
! 
!         If no such header is present, nothing happens.
! 
!         If add_header is True, add a header with how it was trained (in
!         case we need to untrain later)
! 
!         """
! 
!         msg = mboxutils.get_message(msg)
!         trained = msg.get(options.hammie_trained_header)
!         if not trained:
!             return
!         del msg[options.hammie_trained_header]
!         if trained == options.header_ham_string:
!             self.untrain_ham(msg)
!         elif trained == options.header_spam_string:
!             self.untrain_spam(msg)
!         else:
!             raise ValueError('%s header value unrecognized'
!                              % options.hammie_trained_header)
! 
!     def train_ham(self, msg, add_header=False):
          """Train bayes with ham.
  
          msg can be a string, a file object, or a Message object.
  
+         If add_header is True, add a header with how it was trained (in
+         case we need to untrain later)
+ 
          """
  
!         self.train(msg, False, add_header)
  
!     def train_spam(self, msg, add_header=False):
          """Train bayes with spam.
  
          msg can be a string, a file object, or a Message object.
  
+         If add_header is True, add a header with how it was trained (in
+         case we need to untrain later)
+ 
          """
  
!         self.train(msg, True, add_header)
  
      def untrain_ham(self, msg):
!         """Untrain bayes with a message previously trained as ham.
  
          msg can be a string, a file object, or a Message object.
***************
*** 184,189 ****
          self.untrain(msg, False)
  
!     def train_spam(self, msg):
!         """Untrain bayes with spam.
  
          msg can be a string, a file object, or a Message object.
--- 226,231 ----
          self.untrain(msg, False)
  
!     def untrain_spam(self, msg):
!         """Untrain bayes with a message previously traned as spam.
  
          msg can be a string, a file object, or a Message object.





More information about the Spambayes-checkins mailing list