[Spambayes-checkins] spambayes/spambayes UserInterface.py, 1.53, 1.54

Tony Meyer anadelonbrin at users.sourceforge.net
Wed Mar 16 04:29:04 CET 2005


Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16734/spambayes

Modified Files:
	UserInterface.py 
Log Message:
Upload messages properly.  Avoid creating pop3proxyham.mbox files all over the place,
 and update the message info database properly.

Index: UserInterface.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/UserInterface.py,v
retrieving revision 1.53
retrieving revision 1.54
diff -C2 -d -r1.53 -r1.54
*** UserInterface.py	2 Jan 2005 05:01:50 -0000	1.53
--- UserInterface.py	16 Mar 2005 03:29:01 -0000	1.54
***************
*** 89,92 ****
--- 89,94 ----
  from spambayes import Stats
  from spambayes import Version
+ from spambayes import storage
+ from spambayes import FileCorpus
  from Options import options, optionsPathname, defaults, OptionsClass, _
  
***************
*** 494,513 ****
          content = content.replace('\r\n', '\n').replace('\r', '\n')
  
!         # The upload might be a single message or am mbox file.
          messages = self._convertUploadToMessageList(content)
  
!         # Append the message(s) to a file, to make it easier to rebuild
!         # the database later.   This is a temporary implementation -
!         # it should keep a Corpus of trained messages.
!         # XXX Temporary, heh.  One of the problems with this is that
!         # XXX these files get opened in whatever happens to be the cwd.
!         # XXX I don't think anyone uses these anyway, but we should fix
!         # XXX this for 1.1.  I think that creating a new message in the
!         # XXX Ham/Spam corpus would work, and not interfere with anything.
!         # XXX We could later search for them, too, which would be a bonus.
          if isSpam:
!             f = open("_pop3proxyspam.mbox", "a")
          else:
!             f = open("_pop3proxyham.mbox", "a")
  
          # Train on the uploaded message(s).
--- 496,538 ----
          content = content.replace('\r\n', '\n').replace('\r', '\n')
  
!         # The upload might be a single message or a dbx/mbox file.
          messages = self._convertUploadToMessageList(content)
  
!         # Add the messages(s) to the appropriate corpus.  This means
!         # that we can rebuild the database later, if desired (as long as
!         # they haven't expired), and can search for the messages later
!         # (and even correct training).  This also takes care of training
!         # the messages.
!         # This replaces the 1.0.x practice of opening a
!         # "_pop3proxyham.mbox" or "_pop3proxyspam.mbox" in the CWD and
!         # placing them there.
          if isSpam:
!             desired_corpus = "spamCorpus"
          else:
!             desired_corpus = "hamCorpus"
!         if hasattr(self, desired_corpus):
!             corpus = getattr(self, desired_corpus)
!         else:
!             if hasattr(self, "state"):
!                 # sb_server (exists in state)
!                 corpus = getattr(self.state, desired_corpus)
!                 setattr(self, desired_corpus, corpus)
!             else:
!                 # sb_imapfilter (need to create)
!                 if isSpam:
!                     fn = storage.get_pathname_option("Storage",
!                                                      "spam_cache")
!                 else:
!                     fn = storage.get_pathname_option("Storage",
!                                                      "ham_cache")
!                 storage.ensureDir(fn)
!                 if self.gzipCache:
!                     factory = FileCorpus.GzipFileMessageFactory()
!                 else:
!                     factory = FileCorpus.FileMessageFactory()
!                 age = options["Storage", "cache_expiry_days"]*24*60*60
!                 corpus = FileCorpus.ExpiryFileCorpus(age, factory, fn,
!                                           '[0123456789\-]*', cacheSize=20)
!                 setattr(self, desired_corpus, corpus)
  
          # Train on the uploaded message(s).
***************
*** 515,529 ****
          self.flush()
          for message in messages:
!             # XXX Here, we should really use the message.Message class,
!             # XXX so that the messageinfo database is updated (and so
!             # XXX the stats are correct, and so on).
!             tokens = tokenizer.tokenize(message)
!             self.classifier.learn(tokens, isSpam)
!             f.write("From pop3proxy at spambayes.org Sat Jan 31 00:00:00 2000\n")
!             f.write(message)
!             f.write("\n\n")
  
!         # Save the database and return a link Home and another training form.
!         f.close()
          self._doSave()
          self.write(_("%sOK. Return %sHome%s or train again:%s") %
--- 540,549 ----
          self.flush()
          for message in messages:
!             msg = factory.create(key, message)
!             corpus.addMessage(msg)
!             msg.RememberClassification(isSpam)
  
!         # Save the database and return a link Home and another training
!         # form.
          self._doSave()
          self.write(_("%sOK. Return %sHome%s or train again:%s") %



More information about the Spambayes-checkins mailing list