[Spambayes-checkins] spambayes hammiefilter.py,1.9,1.10

Tue Jan 21 06:51:01 EST 2003

Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv6690

Modified Files:
	hammiefilter.py 
Log Message:
* hammiefilter now has -t option for filter/train step
* Options has new hammie_train_on_filter and hammie_trained_header options
* hammie.py:Hammie.filter has new train kwarg to support filter/train in
  one step.

Index: hammiefilter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammiefilter.py,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** hammiefilter.py	21 Jan 2003 00:17:57 -0000	1.9
--- hammiefilter.py	21 Jan 2003 14:50:25 -0000	1.10
***************
*** 15,19 ****
  ##

! """Usage: %(program)s [OPTION]

  A hammie front-end to make the simple stuff simple.  The intent is to call
--- 15,19 ----
  ##

! """Usage: %(program)s [OPTION]...

  A hammie front-end to make the simple stuff simple.  The intent is to call
***************
*** 26,51 ****
  calling it with either the -g or -s options, respectively.

! Where [OPTION] is one of:
      -h
          show usage and exit
-     -n
-         create a new database
-     -g
-         train on stdin as a good (ham) message
-     -s
-         train on stdin as a bad (spam) message
      -d DBFILE
          use database in DBFILE
      -D PICKLEFILE
          use pickle (instead of database) in PICKLEFILE
      -G
!         untrain ham on stdin -- only use if you've already trained this
!         message!
      -S
!         untrain spam on stdin -- only use if you've already trained this
!         message!

- If neither -g nor -s is given, stdin will be scored: the same message,
- with a new header containing the score, will be send to stdout.
  """

--- 26,54 ----
  calling it with either the -g or -s options, respectively.

! [OPTION] is one of:
      -h
          show usage and exit
      -d DBFILE
          use database in DBFILE
      -D PICKLEFILE
          use pickle (instead of database) in PICKLEFILE
+     -n
+         create a new database
+     -g
+         train as a good (ham) message
+     -s
+         train as a bad (spam) message
+     -t
+         filter and train based on the result (you must make sure to
+         untrain all mistakes later)
      -G
!         untrain ham (only use if you've already trained this message)
      -S
!         untrain spam (only use if you've already trained this message)
! 
! All processing options operate on stdin.  If no processing options are
! given, stdin will be scored: the same message, with a new header
! containing the score, will be send to stdout.

  """

***************
*** 53,69 ****
  import sys
  import getopt
! from spambayes import hammie, Options

  # See Options.py for explanations of these properties
  program = sys.argv[0]

- # Options
- options = Options.options
- options.mergefiles(['/etc/hammierc',
-                     os.path.expanduser('~/.hammierc')])
- DBNAME = options.hammiefilter_persistent_storage_file
- DBNAME = os.path.expanduser(DBNAME)
- USEDB = options.hammiefilter_persistent_use_database
- 
  def usage(code, msg=''):
      """Print usage message and sys.exit(code)."""
--- 56,64 ----
  import sys
  import getopt
! from spambayes import hammie, Options, mboxutils

  # See Options.py for explanations of these properties
  program = sys.argv[0]

  def usage(code, msg=''):
      """Print usage message and sys.exit(code)."""
***************
*** 77,82 ****
      def __init__(self):
          options = Options.options
!         self.dbname = DBNAME
!         self.usedb = USEDB

      def newdb(self):
--- 72,80 ----
      def __init__(self):
          options = Options.options
!         options.mergefiles(['/etc/hammierc',
!                             os.path.expanduser('~/.hammierc')])
!         self.dbname = options.hammiefilter_persistent_storage_file
!         self.dbname = os.path.expanduser(self.dbname)
!         self.usedb = options.hammiefilter_persistent_use_database

      def newdb(self):
***************
*** 85,144 ****
          print "Created new database in", self.dbname

!     def filter(self):
          h = hammie.open(self.dbname, self.usedb, 'r')
-         msg = sys.stdin.read()
          print h.filter(msg)

!     def train_ham(self):
          h = hammie.open(self.dbname, self.usedb, 'c')
-         msg = sys.stdin.read()
          h.train_ham(msg)
          h.store()

!     def train_spam(self):
          h = hammie.open(self.dbname, self.usedb, 'c')
-         msg = sys.stdin.read()
          h.train_spam(msg)
          h.store()

!     def untrain_ham(self):
          h = hammie.open(self.dbname, self.usedb, 'c')
-         msg = sys.stdin.read()
          h.untrain_ham(msg)
          h.store()

!     def untrain_spam(self):
          h = hammie.open(self.dbname, self.usedb, 'c')
-         msg = sys.stdin.read()
          h.untrain_spam(msg)
          h.store()

  def main():
-     global DBNAME, USEDB
-     
      h = HammieFilter()
!     action = h.filter
!     opts, args = getopt.getopt(sys.argv[1:], 'hngsGSd:D:', ['help'])
      for opt, arg in opts:
          if opt in ('-h', '--help'):
              usage(0)
          elif opt == '-d':
!             USEDB = True
!             DBNAME = arg
          elif opt == '-D':
!             USEDB = False
!             DBNAME = arg
          elif opt == '-g':
!             action = h.train_ham
          elif opt == '-s':
!             action = h.train_spam
          elif opt == '-G':
!             action = h.untrain_ham
          elif opt == '-S':
!             action = h.untrain_spam
          elif opt == "-n":
!             action = h.newdb

-     action()

  if __name__ == "__main__":
--- 83,148 ----
          print "Created new database in", self.dbname

!     def filter(self, msg):
          h = hammie.open(self.dbname, self.usedb, 'r')
          print h.filter(msg)

!     def filter_train(self, msg):
!         h = hammie.open(self.dbname, self.usedb, 'c')
!         print h.filter(msg, train=True)
! 
!     def train_ham(self, msg):
          h = hammie.open(self.dbname, self.usedb, 'c')
          h.train_ham(msg)
          h.store()

!     def train_spam(self, msg):
          h = hammie.open(self.dbname, self.usedb, 'c')
          h.train_spam(msg)
          h.store()

!     def untrain_ham(self, msg):
          h = hammie.open(self.dbname, self.usedb, 'c')
          h.untrain_ham(msg)
          h.store()

!     def untrain_spam(self, msg):
          h = hammie.open(self.dbname, self.usedb, 'c')
          h.untrain_spam(msg)
          h.store()

  def main():
      h = HammieFilter()
!     actions = []
!     opts, args = getopt.getopt(sys.argv[1:], 'hd:D:ngstGS', ['help'])
      for opt, arg in opts:
          if opt in ('-h', '--help'):
              usage(0)
          elif opt == '-d':
!             h.usedb = True
!             h.dbname = arg
          elif opt == '-D':
!             h.usedb = False
!             h.dbname = arg
          elif opt == '-g':
!             actions.append(h.train_ham)
          elif opt == '-s':
!             actions.append(h.train_spam)
!         elif opt == '-t':
!             actions.append(h.filter_train)
          elif opt == '-G':
!             actions.append(h.untrain_ham)
          elif opt == '-S':
!             actions.append(h.untrain_spam)
          elif opt == "-n":
!             h.newdb()
!             sys.exit(0)
! 
!     if actions == []:
!         actions = [h.filter]
! 
!     msg = mboxutils.get_message(sys.stdin)
!     for action in actions:
!         action(msg)

  if __name__ == "__main__":