[Spambayes-checkins] spambayes Corpus.py,1.4,1.5 FileCorpus.py,1.7,1.8 pop3proxy.py,1.30,1.31

Richie Hindle richiehindle@users.sourceforge.net
Thu Nov 28 22:02:48 2002


Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv27764

Modified Files:
	Corpus.py FileCorpus.py pop3proxy.py 
Log Message:
Expire old messages from the trained corpuses.  ExpiryFileCorpus is
now less clever - you need to call removeExpiredMessages() for it
to expire anything.  "Explicit is better than implicit."


Index: Corpus.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Corpus.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** Corpus.py	26 Nov 2002 00:43:51 -0000	1.4
--- Corpus.py	28 Nov 2002 22:02:46 -0000	1.5
***************
*** 235,268 ****
      '''Corpus of "young" file system artifacts'''
  
!     def __init__(self, expireBefore, factory, cacheSize=-1):
          '''Constructor'''
  
          self.expireBefore = expireBefore
-         Corpus.__init__(self, factory, cacheSize)
- 
-     def cacheMessage(self, msg):
-         '''Add a message to the in-memory cache'''
-         # This is where the expiry of a message is enforced
-         # This method should probably not be overridden
- 
-         if msg.createTimestamp() >= time.time() - self.expireBefore:
-             Corpus.cacheMessage(self, msg)
-         else:
-             if options.verbose:
-                 print 'Not caching %s because it has expired' % (msg.key())
-             raise KeyError, msg
- 
-         return msg
  
      def removeExpiredMessages(self):
          '''Kill expired messages'''
  
!         for key in self.keys():
!             try:
!                 msg = self[key]
!             except KeyError, e:
                  if options.verbose:
                      print 'message %s has expired' % (key)
!                 self.removeMessage(e[0])
  
  
--- 235,251 ----
      '''Corpus of "young" file system artifacts'''
  
!     def __init__(self, expireBefore):
          '''Constructor'''
  
          self.expireBefore = expireBefore
  
      def removeExpiredMessages(self):
          '''Kill expired messages'''
  
!         for msg in self:
!             if msg.createTimestamp() < time.time() - self.expireBefore:
                  if options.verbose:
                      print 'message %s has expired' % (key)
!                 self.removeMessage(msg)
  
  
***************
*** 376,383 ****
  
  	return match
! 	
      def getHeaders(self):
          '''Return message headers as text'''
!         
          return self.hdrtxt
  
--- 359,366 ----
  
  	return match
! 
      def getHeaders(self):
          '''Return message headers as text'''
! 
          return self.hdrtxt
  
***************
*** 411,413 ****
  
  if __name__ == '__main__':
!     print >>sys.stderr, __doc__
\ No newline at end of file
--- 394,396 ----
  
  if __name__ == '__main__':
!     print >>sys.stderr, __doc__

Index: FileCorpus.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/FileCorpus.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** FileCorpus.py	28 Nov 2002 15:48:29 -0000	1.7
--- FileCorpus.py	28 Nov 2002 22:02:46 -0000	1.8
***************
*** 183,187 ****
  filter'''
  
!         Corpus.ExpiryCorpus.__init__(self, expireBefore, factory, cacheSize)
          FileCorpus.__init__(self, factory, directory, filter, cacheSize)
  
--- 183,187 ----
  filter'''
  
!         Corpus.ExpiryCorpus.__init__(self, expireBefore)
          FileCorpus.__init__(self, factory, directory, filter, cacheSize)
  
***************
*** 251,255 ****
          elip = ''
          sub = self.getSubstance()
!         
          if options.verbose:
              sub = self.getSubstance()
--- 251,255 ----
          elip = ''
          sub = self.getSubstance()
! 
          if options.verbose:
              sub = self.getSubstance()
***************
*** 379,383 ****
      m1 = fmClass('XMG00001', 'fctestspamcorpus')
      m1.setSubstance(testmsg2())
!     
      print '\n\nAdd a message to hamcorpus that does not match the filter'
  
--- 379,383 ----
      m1 = fmClass('XMG00001', 'fctestspamcorpus')
      m1.setSubstance(testmsg2())
! 
      print '\n\nAdd a message to hamcorpus that does not match the filter'
  
***************
*** 404,407 ****
--- 404,408 ----
      unsurecorpus = ExpiryFileCorpus(5, fmFact, \
                                      'fctestunsurecorpus', 'MSG*', 2)
+     unsurecorpus.removeExpiredMessages()
  
  
***************
*** 436,440 ****
      print 'Subject header is',msg.getSubject()
      print 'From header is',msg.getFrom()
!     
      print 'Header text is:',msg.getHeaders()
      print 'Headers are:',msg.getHeadersList()
--- 437,441 ----
      print 'Subject header is',msg.getSubject()
      print 'From header is',msg.getFrom()
! 
      print 'Header text is:',msg.getHeaders()
      print 'Headers are:',msg.getHeadersList()
***************
*** 492,496 ****
              if e.errno != 2:     # errno.<WHAT>
                  raise
!     
          try:
              os.unlink('fctestclass.bayes')
--- 493,497 ----
              if e.errno != 2:     # errno.<WHAT>
                  raise
! 
          try:
              os.unlink('fctestclass.bayes')
***************
*** 725,727 ****
          print >>sys.stderr, __doc__
  
!        
--- 726,728 ----
          print >>sys.stderr, __doc__
  
! 

Index: pop3proxy.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v
retrieving revision 1.30
retrieving revision 1.31
diff -C2 -d -r1.30 -r1.31
*** pop3proxy.py	28 Nov 2002 21:27:09 -0000	1.30
--- pop3proxy.py	28 Nov 2002 22:02:46 -0000	1.31
***************
*** 141,145 ****
  import socket, asyncore, asynchat, cgi, urlparse, webbrowser
  import mailbox, storage, tokenizer, mboxutils, email.Header
! from FileCorpus import FileCorpus, FileMessageFactory, GzipFileMessageFactory
  from email.Iterators import typed_subpart_iterator
  from Options import options
--- 141,146 ----
  import socket, asyncore, asynchat, cgi, urlparse, webbrowser
  import mailbox, storage, tokenizer, mboxutils, email.Header
! from FileCorpus import FileCorpus, ExpiryFileCorpus
! from FileCorpus import FileMessageFactory, GzipFileMessageFactory
  from email.Iterators import typed_subpart_iterator
  from Options import options
***************
*** 1314,1324 ****
              map(ensureDir, [self.spamCache, self.hamCache, self.unknownCache])
              if self.gzipCache:
!                 messageFactory = GzipFileMessageFactory()
              else:
!                 messageFactory = FileMessageFactory()
!             self.messageFactory = messageFactory
!             self.spamCorpus = FileCorpus(messageFactory, self.spamCache)
!             self.hamCorpus = FileCorpus(messageFactory, self.hamCache)
!             self.unknownCorpus = FileCorpus(messageFactory, self.unknownCache)
  
              # Create the Trainers.
--- 1315,1329 ----
              map(ensureDir, [self.spamCache, self.hamCache, self.unknownCache])
              if self.gzipCache:
!                 factory = GzipFileMessageFactory()
              else:
!                 factory = FileMessageFactory()
!             age = options.pop3proxy_cache_expiry_days*24*60*60
!             self.spamCorpus = ExpiryFileCorpus(age, factory, self.spamCache)
!             self.hamCorpus = ExpiryFileCorpus(age, factory, self.hamCache)
!             self.unknownCorpus = FileCorpus(factory, self.unknownCache)
! 
!             # Expire old messages from the trained corpuses.
!             self.spamCorpus.removeExpiredMessages()
!             self.hamCorpus.removeExpiredMessages()
  
              # Create the Trainers.





More information about the Spambayes-checkins mailing list