[Spambayes-checkins] spambayes Bayes.py,1.5.2.3,1.5.2.4 Options.py,1.72.2.3,1.72.2.4 classifier.py,1.53.2.1,1.53.2.2 hammiefilter.py,1.2.2.1,1.2.2.2

Neale Pickett npickett@users.sourceforge.net
Thu Nov 21 04:16:39 2002


Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv25529

Modified Files:
      Tag: hammie-playground
	Bayes.py Options.py classifier.py hammiefilter.py 
Log Message:
Bayes.py: __init__ cleanup

Options.py: moved persistent_storage_file out to hammiefilter and
            pop3proxy sections.
	    
classifier.py: New MetaInfo class which keeps counters
	    for nham and nspam, also a revision, incremented every
	    time either is changed.
					  
            WordInfo class calculates probabilty on the fly iff
	    MetaInfo revision has changed since last calculation.

            Probabilities are no longer stored in the persisitent
            databases.
	       
hammiefilter.py: takes advantage of all this stuff :)




Index: Bayes.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Bayes.py,v
retrieving revision 1.5.2.3
retrieving revision 1.5.2.4
diff -C2 -d -r1.5.2.3 -r1.5.2.4
*** Bayes.py	21 Nov 2002 02:58:37 -0000	1.5.2.3
--- Bayes.py	21 Nov 2002 04:16:36 -0000	1.5.2.4
***************
*** 71,74 ****
--- 71,75 ----
          '''Constructor(database name)'''
  
+         classifier.Bayes.__init__(self)
          self.db_name = db_name
          self.load()
***************
*** 186,190 ****
              # We could be sneaky, like pickle.Unpickler.load_inst,
              # but I think that's overly confusing.
!             obj = classifier.WordInfo(0)
              obj.__setstate__(val)
              return obj
--- 187,191 ----
              # We could be sneaky, like pickle.Unpickler.load_inst,
              # but I think that's overly confusing.
!             obj = classifier.WordInfo()
              obj.__setstate__(val)
              return obj
***************
*** 211,215 ****
          self.statekey = "saved state"
  
!         self.load()
  
      def load(self):
--- 212,216 ----
          self.statekey = "saved state"
  
!         PersistentBayes.__init__(self, db_name)
  
      def load(self):

Index: Options.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Options.py,v
retrieving revision 1.72.2.3
retrieving revision 1.72.2.4
diff -C2 -d -r1.72.2.3 -r1.72.2.4
*** Options.py	20 Nov 2002 06:06:27 -0000	1.72.2.3
--- Options.py	21 Nov 2002 04:16:36 -0000	1.72.2.4
***************
*** 346,352 ****
  clue_mailheader_cutoff: 0.5
  
- # The default database path used by hammie
- persistent_storage_file: hammie.db
- 
  [hammiefilter]
  # hammiefilter can use either a database (quick to score one message) or
--- 346,349 ----
***************
*** 354,357 ****
--- 351,355 ----
  # True to use a database by default.
  hammiefilter_persistent_use_database: True
+ hammiefilter_persistent_storage_file: ~/.hammiedb
  
  [pop3proxy]
***************
*** 360,364 ****
  # The only mandatory option is pop3proxy_server_name, eg. pop3.my-isp.com,
  # but that can come from the command line - see "pop3proxy -h".
! pop3proxy_server_name: ""
  pop3proxy_server_port: 110
  pop3proxy_port: 110
--- 358,362 ----
  # The only mandatory option is pop3proxy_server_name, eg. pop3.my-isp.com,
  # but that can come from the command line - see "pop3proxy -h".
! pop3proxy_server_name: 
  pop3proxy_server_port: 110
  pop3proxy_port: 110
***************
*** 369,373 ****
  pop3proxy_unknown_cache: pop3proxy-unknown-cache
  pop3proxy_persistent_use_database: False
! pop3proxy_persistent_storage_file: ""
  
  [html_ui]
--- 367,371 ----
  pop3proxy_unknown_cache: pop3proxy-unknown-cache
  pop3proxy_persistent_use_database: False
! pop3proxy_persistent_storage_file: hammie.db
  
  [html_ui]
***************
*** 433,437 ****
                    },
      'Hammie': {'hammie_header_name': string_cracker,
-                'persistent_storage_file': string_cracker,
                 'clue_mailheader_cutoff': float_cracker,
                 'persistent_use_database': boolean_cracker,
--- 431,434 ----
***************
*** 445,448 ****
--- 442,446 ----
                 },
      'hammiefilter' : {'hammiefilter_persistent_use_database': boolean_cracker,
+                       'hammiefilter_persistent_storage_file': string_cracker,
                        },
      'pop3proxy': {'pop3proxy_server_name': string_cracker,

Index: classifier.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/classifier.py,v
retrieving revision 1.53.2.1
retrieving revision 1.53.2.2
diff -C2 -d -r1.53.2.1 -r1.53.2.2
*** classifier.py	20 Nov 2002 06:06:28 -0000	1.53.2.1
--- classifier.py	21 Nov 2002 04:16:36 -0000	1.53.2.2
***************
*** 32,36 ****
  
  import math
- import time
  from sets import Set
  
--- 32,35 ----
***************
*** 49,90 ****
  PICKLE_VERSION = 1
  
! class WordInfo(object):
!     __slots__ = ('atime',     # when this record was last used by scoring(*)
!                  'spamcount', # # of spams in which this word appears
!                  'hamcount',  # # of hams in which this word appears
!                  'killcount', # # of times this made it to spamprob()'s nbest
!                  'spamprob',  # prob(spam | msg contains this word)
!                 )
  
      # Invariant:  For use in a classifier database, at least one of
      # spamcount and hamcount must be non-zero.
-     #
-     # (*)atime is the last access time, a UTC time.time() value.  It's the
-     # most recent time this word was used by scoring (i.e., by spamprob(),
-     # not by training via learn()); or, if the word has never been used by
-     # scoring, the time the word record was created (i.e., by learn()).
-     # One good criterion for identifying junk (word records that have no
-     # value) is to delete words that haven't been used for a long time.
-     # Perhaps they were typos, or unique identifiers, or relevant to a
-     # once-hot topic or scam that's fallen out of favor.  Whatever, if
-     # a word is no longer being used, it's just wasting space.
  
!     def __init__(self, atime, spamprob=options.unknown_word_prob):
!         self.atime = atime
!         self.spamcount = self.hamcount = self.killcount = 0
!         self.spamprob = spamprob
  
      def __repr__(self):
!         return "WordInfo%r" % repr((self.atime, self.spamcount,
!                                     self.hamcount, self.killcount,
                                      self.spamprob))
  
      def __getstate__(self):
!         return (self.atime, self.spamcount, self.hamcount, self.killcount,
!                 self.spamprob)
  
      def __setstate__(self, t):
!         (self.atime, self.spamcount, self.hamcount, self.killcount,
!          self.spamprob) = t
  
  class Bayes:
--- 48,196 ----
  PICKLE_VERSION = 1
  
! class MetaInfo(object):
!     """Information about the corpora.
! 
!     Contains nham and nspam, used for calculating probabilities.  Also
!     has a revision, incremented every time nham or nspam is adjusted to
!     invalidate any cached probabilities.
!     
!     """
!     def __init__(self):
!         self._nham = 0
!         self._nspam = 0
!         self.revision = 0
! 
!     def __repr__(self):
!         return "MetaInfo%r" % repr((self._nham,
!                                     self._nspam,
!                                     self.revision))
! 
!     def __getstate__(self):
!         return (self._nham, self._nspam)
! 
!     def __setstate__(self, t):
!         (self._nham, self._nspam) = t
! 
!     def nham(self):
!         return self._nham
! 
!     def nspam(self):
!         return self._nspam
! 
!     def incr_rev(self):
!         self.revision += 1
!         
!     def incr_ham(self, amt=1):
!         self._nham += amt
!         self.incr_rev()
  
+     def incr_spam(self, amt=1):
+         self._nspam += 1
+         self.incr_rev()
+     
+ 
+ class WordInfo(object):
      # Invariant:  For use in a classifier database, at least one of
      # spamcount and hamcount must be non-zero.
  
!     def __init__(self):
!         self.__setstate__((0, 0))
  
      def __repr__(self):
!         return "WordInfo%r" % repr((self.spamcount,
!                                     self.hamcount,
                                      self.spamprob))
  
      def __getstate__(self):
!         return (self.spamcount,
!                 self.hamcount)
  
      def __setstate__(self, t):
!         (self.spamcount, self.hamcount) = t
!         self.spamprob = None
!         self.revision = None
! 
!     def _update_probability(self, meta):
!         """Compute and store p(word) = prob(msg is spam | msg contains word).
!         
!         This is the Graham calculation, but stripped of biases, and
!         stripped of clamping into 0.01 thru 0.99.  The Bayesian
!         adjustment following keeps them in a sane range, and one
!         that naturally grows the more evidence there is to back up
!         a probability.
! 
!         Returns True if the probability changed, False otherwise.
!         """
! 
!         nham = float(meta.nham() or 1)
!         nspam = float(meta.nspam() or 1)
! 
!         if options.experimental_ham_spam_imbalance_adjustment:
!             spam2ham = min(nspam / nham, 1.0)
!             ham2spam = min(nham / nspam, 1.0)
!         else:
!             spam2ham = ham2spam = 1.0
! 
!         S = options.unknown_word_strength
!         StimesX = S * options.unknown_word_prob
!                 
!         assert self.hamcount <= nham
!         hamratio = self.hamcount / nham
! 
!         assert self.spamcount <= nspam
!         spamratio = self.spamcount / nspam
! 
!         prob = spamratio / (hamratio + spamratio)
! 
!         # Now do Robinson's Bayesian adjustment.
!         #
!         #         s*x + n*p(w)
!         # f(w) = --------------
!         #           s + n
!         #
!         # I find this easier to reason about like so (equivalent when
!         # s != 0):
!         #
!         #        x - p
!         #  p +  -------
!         #       1 + n/s
!         #
!         # IOW, it moves p a fraction of the distance from p to x, and
!         # less so the larger n is, or the smaller s is.
! 
!         # Experimental:
!         # Picking a good value for n is interesting:  how much empirical
!         # evidence do we really have?  If nham == nspam,
!         # hamcount + spamcount makes a lot of sense, and the code here
!         # does that by default.
!         # But if, e.g., nham is much larger than nspam, p(w) can get a
!         # lot closer to 0.0 than it can get to 1.0.  That in turn makes
!         # strong ham words (high hamcount) much stronger than strong
!         # spam words (high spamcount), and that makes the accidental
!         # appearance of a strong ham word in spam much more damaging than
!         # the accidental appearance of a strong spam word in ham.
!         # So we don't give hamcount full credit when nham > nspam (or
!         # spamcount when nspam > nham):  instead we knock hamcount down
!         # to what it would have been had nham been equal to nspam.  IOW,
!         # we multiply hamcount by nspam/nham when nspam < nham; or, IOOW,
!         # we don't "believe" any count to an extent more than
!         # min(nspam, nham) justifies.
! 
!         n = self.hamcount * spam2ham  +  self.spamcount * ham2spam
!         prob = (StimesX + n * prob) / (S + n)
! 
!         self.revision = meta.revision
!         if self.spamprob != prob:
!             self.spamprob = prob
!             return True
!         else:
!             return False
! 
!     def probability(self, meta):
!         """Return this word's spam probability, recalculating if needed."""
!         if meta.revision != self.revision:
!             self._update_probability(meta)
!         return self.spamprob
! 
  
  class Bayes:
***************
*** 105,117 ****
      def __init__(self):
          self.wordinfo = {}
!         self.nspam = self.nham = 0
  
      def __getstate__(self):
!         return PICKLE_VERSION, self.wordinfo, self.nspam, self.nham
  
      def __setstate__(self, t):
          if t[0] != PICKLE_VERSION:
              raise ValueError("Can't unpickle -- version %s unknown" % t[0])
!         self.wordinfo, self.nspam, self.nham = t[1:]
  
      # spamprob() implementations.  One of the following is aliased to
--- 211,223 ----
      def __init__(self):
          self.wordinfo = {}
!         self.meta = MetaInfo()
  
      def __getstate__(self):
!         return PICKLE_VERSION, self.wordinfo, self.meta
  
      def __setstate__(self, t):
          if t[0] != PICKLE_VERSION:
              raise ValueError("Can't unpickle -- version %s unknown" % t[0])
!         self.wordinfo, self.meta = t[1:]
  
      # spamprob() implementations.  One of the following is aliased to
***************
*** 145,150 ****
          clues = self._getclues(wordstream)
          for prob, word, record in clues:
-             if record is not None:  # else wordinfo doesn't know about it
-                 record.killcount += 1
              P *= 1.0 - prob
              Q *= prob
--- 251,254 ----
***************
*** 234,239 ****
          clues = self._getclues(wordstream)
          for prob, word, record in clues:
-             if record is not None:  # else wordinfo doesn't know about it
-                 record.killcount += 1
              S *= 1.0 - prob
              H *= prob
--- 338,341 ----
***************
*** 278,282 ****
          spamprob = chi2_spamprob
  
!     def learn(self, wordstream, is_spam, update_probabilities=True):
          """Teach the classifier by example.
  
--- 380,384 ----
          spamprob = chi2_spamprob
  
!     def learn(self, wordstream, is_spam, update_word_probabilities=True):
          """Teach the classifier by example.
  
***************
*** 285,302 ****
          else that it's definitely not spam.
  
!         If optional arg update_probabilities is False (the default is True),
!         don't update word probabilities.  Updating them is expensive, and if
!         you're going to pass many messages to learn(), it's more efficient
!         to pass False here and call update_probabilities() once when you're
!         done -- or to call learn() with update_probabilities=True when
!         passing the last new example.  The important thing is that the
!         probabilities get updated before calling spamprob() again.
          """
  
!         self._add_msg(wordstream, is_spam)
!         if update_probabilities:
!             self.update_probabilities()
  
!     def unlearn(self, wordstream, is_spam, update_probabilities=True):
          """In case of pilot error, call unlearn ASAP after screwing up.
  
--- 387,403 ----
          else that it's definitely not spam.
  
!         If optional arg update_word_probabilities is False (the default
!         is True), don't update individual words' probabilities.
!         Updating them is expensive, and if you're going to pass many
!         messages to learn(), it's more efficient to pass False here and
!         call update_probabilities() once when you're done.  The
!         important thing is that the probabilities get updated before
!         calling spamprob() again.
!         
          """
  
!         self._add_msg(wordstream, is_spam, update_word_probabilities)
  
!     def unlearn(self, wordstream, is_spam, update_word_probabilities=True):
          """In case of pilot error, call unlearn ASAP after screwing up.
  
***************
*** 304,310 ****
          """
  
!         self._remove_msg(wordstream, is_spam)
!         if update_probabilities:
!             self.update_probabilities()
  
      def update_probabilities(self):
--- 405,409 ----
          """
  
!         self._remove_msg(wordstream, is_spam, update_word_probabilities)
  
      def update_probabilities(self):
***************
*** 320,410 ****
  
          for word, record in self.wordinfo.iteritems():
!             self.update_word(word, record)
!                 
!     def update_word(self, word, record):
!         """Compute p(word) = prob(msg is spam | msg contains word).
!         
!         This is the Graham calculation, but stripped of biases, and
!         stripped of clamping into 0.01 thru 0.99.  The Bayesian
!         adjustment following keeps them in a sane range, and one
!         that naturally grows the more evidence there is to back up
!         a probability.
!         """
!         nham = float(self.nham or 1)
!         nspam = float(self.nspam or 1)
! 
!         if options.experimental_ham_spam_imbalance_adjustment:
!             spam2ham = min(nspam / nham, 1.0)
!             ham2spam = min(nham / nspam, 1.0)
!         else:
!             spam2ham = ham2spam = 1.0
! 
!         S = options.unknown_word_strength
!         StimesX = S * options.unknown_word_prob
!                 
!         hamcount = record.hamcount
!         assert hamcount <= nham
!         hamratio = hamcount / nham
! 
!         spamcount = record.spamcount
!         assert spamcount <= nspam
!         spamratio = spamcount / nspam
! 
!         prob = spamratio / (hamratio + spamratio)
! 
!         # Now do Robinson's Bayesian adjustment.
!         #
!         #         s*x + n*p(w)
!         # f(w) = --------------
!         #           s + n
!         #
!         # I find this easier to reason about like so (equivalent when
!         # s != 0):
!         #
!         #        x - p
!         #  p +  -------
!         #       1 + n/s
!         #
!         # IOW, it moves p a fraction of the distance from p to x, and
!         # less so the larger n is, or the smaller s is.
! 
!         # Experimental:
!         # Picking a good value for n is interesting:  how much empirical
!         # evidence do we really have?  If nham == nspam,
!         # hamcount + spamcount makes a lot of sense, and the code here
!         # does that by default.
!         # But if, e.g., nham is much larger than nspam, p(w) can get a
!         # lot closer to 0.0 than it can get to 1.0.  That in turn makes
!         # strong ham words (high hamcount) much stronger than strong
!         # spam words (high spamcount), and that makes the accidental
!         # appearance of a strong ham word in spam much more damaging than
!         # the accidental appearance of a strong spam word in ham.
!         # So we don't give hamcount full credit when nham > nspam (or
!         # spamcount when nspam > nham):  instead we knock hamcount down
!         # to what it would have been had nham been equal to nspam.  IOW,
!         # we multiply hamcount by nspam/nham when nspam < nham; or, IOOW,
!         # we don't "believe" any count to an extent more than
!         # min(nspam, nham) justifies.
! 
!         n = hamcount * spam2ham  +  spamcount * ham2spam
!         prob = (StimesX + n * prob) / (S + n)
! 
!         if record.spamprob != prob:
!             record.spamprob = prob
!             # The next seemingly pointless line appears to be a hack
!             # to allow a persistent db to realize the record has changed.
!             self.wordinfo[word] = record
! 
!     def clearjunk(self, oldesttime):
!         """Forget useless wordinfo records.  This can shrink the database size.
! 
!         A record for a word will be retained only if the word was accessed
!         at or after oldesttime.
!         """
! 
!         wordinfo = self.wordinfo
!         tonuke = [w for w, r in wordinfo.iteritems() if r.atime < oldesttime]
!         for w in tonuke:
!             del wordinfo[w]
  
      # NOTE:  Graham's scheme had a strange asymmetry:  when a word appeared
--- 419,425 ----
  
          for word, record in self.wordinfo.iteritems():
!             # This method updates probability iff the metainfo revision
!             # has changed.
!             record.probability(self.meta)
  
      # NOTE:  Graham's scheme had a strange asymmetry:  when a word appeared
***************
*** 428,444 ****
      # appears in a msg, but distorting spamprob doesn't appear a correct way
      # to exploit it.
!     def _add_msg(self, wordstream, is_spam):
          if is_spam:
!             self.nspam += 1
          else:
!             self.nham += 1
  
          wordinfo = self.wordinfo
          wordinfoget = wordinfo.get
-         now = time.time()
          for word in Set(wordstream):
              record = wordinfoget(word)
              if record is None:
!                 record = self.WordInfoClass(now)
  
              if is_spam:
--- 443,458 ----
      # appears in a msg, but distorting spamprob doesn't appear a correct way
      # to exploit it.
!     def _add_msg(self, wordstream, is_spam, update_word_probabilities):
          if is_spam:
!             self.meta.incr_spam()
          else:
!             self.meta.incr_ham()
  
          wordinfo = self.wordinfo
          wordinfoget = wordinfo.get
          for word in Set(wordstream):
              record = wordinfoget(word)
              if record is None:
!                 record = self.WordInfoClass()
  
              if is_spam:
***************
*** 446,461 ****
              else:
                  record.hamcount += 1
!             # Needed to tell a persistent DB that the content changed.
!             wordinfo[word] = record
  
!     def _remove_msg(self, wordstream, is_spam):
          if is_spam:
!             if self.nspam <= 0:
                  raise ValueError("spam count would go negative!")
!             self.nspam -= 1
          else:
!             if self.nham <= 0:
                  raise ValueError("non-spam count would go negative!")
!             self.nham -= 1
  
          wordinfo = self.wordinfo
--- 460,480 ----
              else:
                  record.hamcount += 1
!                 
!             if update_word_probabilities:
!                 self.update_word_probability(word, record)
!             else:
!                 # Needed to tell a persistent DB that the content changed.
!                 wordinfo[word] = record
  
! 
!     def _remove_msg(self, wordstream, is_spam, update_word_probabilities):
          if is_spam:
!             if self.meta.nspam() <= 0:
                  raise ValueError("spam count would go negative!")
!             self.meta.incr_spam(-1)
          else:
!             if self.meta.nham() <= 0:
                  raise ValueError("non-spam count would go negative!")
!             self.meta.incr_ham(-1)
  
          wordinfo = self.wordinfo
***************
*** 472,477 ****
                  if record.hamcount == 0 == record.spamcount:
                      del wordinfo[word]
                  else:
!                     # Needed to tell a persistent DB that the content changed.
                      wordinfo[word] = record
  
--- 491,499 ----
                  if record.hamcount == 0 == record.spamcount:
                      del wordinfo[word]
+                 elif update_word_probabilities:
+                     update_word_probability(word, record)
                  else:
!                     # Needed to tell a persistent DB that the content
!                     # changed.
                      wordinfo[word] = record
  
***************
*** 484,488 ****
  
          wordinfoget = self.wordinfo.get
-         now = time.time()
          for word in Set(wordstream):
              record = wordinfoget(word)
--- 506,509 ----
***************
*** 490,495 ****
                  prob = unknown
              else:
!                 record.atime = now
!                 prob = record.spamprob
              distance = abs(prob - 0.5)
              if distance >= mindist:
--- 511,515 ----
                  prob = unknown
              else:
!                 prob = record.probability(self.meta)
              distance = abs(prob - 0.5)
              if distance >= mindist:

Index: hammiefilter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammiefilter.py,v
retrieving revision 1.2.2.1
retrieving revision 1.2.2.2
diff -C2 -d -r1.2.2.1 -r1.2.2.2
*** hammiefilter.py	19 Nov 2002 23:45:25 -0000	1.2.2.1
--- hammiefilter.py	21 Nov 2002 04:16:36 -0000	1.2.2.2
***************
*** 52,89 ****
      sys.exit(code)
  
! def newdb():
!     h = hammie.open(options.persistent_storage_file,
!                     options.hammiefilter_persistent_use_database,
!                     'n')
!     h.store()
!     print "Created new database in", options.persistent_storage_file
  
! def filter():
!     h = hammie.open(options.persistent_storage_file,
!                     options.hammiefilter_persistent_use_database,
!                     'r')
!     msg = sys.stdin.read()
!     print h.filter(msg)
  
! def train_ham():
!     h = hammie.open(options.persistent_storage_file,
!                     options.hammiefilter_persistent_use_database,
!                     'w')
!     msg = sys.stdin.read()
!     h.train_ham(msg)
!     h.update_probabilities()
!     h.store()
  
! def train_spam():
!     h = hammie.open(options.persistent_storage_file,
!                     options.hammiefilter_persistent_use_database,
!                     'w')
!     msg = sys.stdin.read()
!     h.train_spam(msg)
!     h.update_probabilities()
!     h.store()
  
  def main():
!     action = filter
      opts, args = getopt.getopt(sys.argv[1:], 'hngs')
      for opt, arg in opts:
--- 52,93 ----
      sys.exit(code)
  
! class HammieFilter(object):
!     def __init__(self):
!         options = Options.options
!         options.mergefiles(['/etc/hammierc',
!                             os.path.expanduser('~/.hammierc')])
!         
!         self.dbname = options.hammiefilter_persistent_storage_file
!         self.dbname = os.path.expanduser(self.dbname)
!         self.usedb = options.hammiefilter_persistent_use_database
!         
  
!     def newdb(self):
!         h = hammie.open(self.dbname, self.usedb, 'n')
!         h.store()
!         print "Created new database in", self.dbname
  
!     def filter(self):
!         h = hammie.open(self.dbname, self.usedb, 'r')
!         msg = sys.stdin.read()
!         print h.filter(msg)
  
!     def train_ham(self):
!         h = hammie.open(self.dbname, self.usedb, 'c')
!         msg = sys.stdin.read()
!         h.train_ham(msg)
!         h.update_probabilities()
!         h.store()
! 
!     def train_spam(self):
!         h = hammie.open(self.dbname, self.usedb, 'c')
!         msg = sys.stdin.read()
!         h.train_spam(msg)
!         h.update_probabilities()
!         h.store()
  
  def main():
!     h = HammieFilter()
!     action = h.filter
      opts, args = getopt.getopt(sys.argv[1:], 'hngs')
      for opt, arg in opts:
***************
*** 91,103 ****
              usage(0)
          elif opt == '-g':
!             action = train_ham
          elif opt == '-s':
!             action = train_spam
          elif opt == "-n":
!             action = newdb
! 
!     # hammiefilter overrides
!     options.mergefiles(['/etc/hammierc',
!                         os.path.expanduser('~/.hammierc')])
  
      action()
--- 95,103 ----
              usage(0)
          elif opt == '-g':
!             action = h.train_ham
          elif opt == '-s':
!             action = h.train_spam
          elif opt == "-n":
!             action = h.newdb
  
      action()





More information about the Spambayes-checkins mailing list