[Spambayes-checkins]
spambayes Corpus.py,1.4,1.5 FileCorpus.py,1.7,1.8 pop3proxy.py,1.30,1.31
Richie Hindle
richiehindle@users.sourceforge.net
Thu Nov 28 22:02:48 2002
- Previous message: [Spambayes-checkins] spambayes pop3proxy.py,1.29,1.30
- Next message: [Spambayes-checkins] spambayes mailsort.py,NONE,1.1
README.txt,1.43,1.44 neilfilter.py,1.5,NONE neiltrain.py,1.6,NONE
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv27764
Modified Files:
Corpus.py FileCorpus.py pop3proxy.py
Log Message:
Expire old messages from the trained corpuses. ExpiryFileCorpus is
now less clever - you need to call removeExpiredMessages() for it
to expire anything. "Explicit is better than implicit."
Index: Corpus.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Corpus.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** Corpus.py 26 Nov 2002 00:43:51 -0000 1.4
--- Corpus.py 28 Nov 2002 22:02:46 -0000 1.5
***************
*** 235,268 ****
'''Corpus of "young" file system artifacts'''
! def __init__(self, expireBefore, factory, cacheSize=-1):
'''Constructor'''
self.expireBefore = expireBefore
- Corpus.__init__(self, factory, cacheSize)
-
- def cacheMessage(self, msg):
- '''Add a message to the in-memory cache'''
- # This is where the expiry of a message is enforced
- # This method should probably not be overridden
-
- if msg.createTimestamp() >= time.time() - self.expireBefore:
- Corpus.cacheMessage(self, msg)
- else:
- if options.verbose:
- print 'Not caching %s because it has expired' % (msg.key())
- raise KeyError, msg
-
- return msg
def removeExpiredMessages(self):
'''Kill expired messages'''
! for key in self.keys():
! try:
! msg = self[key]
! except KeyError, e:
if options.verbose:
print 'message %s has expired' % (key)
! self.removeMessage(e[0])
--- 235,251 ----
'''Corpus of "young" file system artifacts'''
! def __init__(self, expireBefore):
'''Constructor'''
self.expireBefore = expireBefore
def removeExpiredMessages(self):
'''Kill expired messages'''
! for msg in self:
! if msg.createTimestamp() < time.time() - self.expireBefore:
if options.verbose:
print 'message %s has expired' % (key)
! self.removeMessage(msg)
***************
*** 376,383 ****
return match
!
def getHeaders(self):
'''Return message headers as text'''
!
return self.hdrtxt
--- 359,366 ----
return match
!
def getHeaders(self):
'''Return message headers as text'''
!
return self.hdrtxt
***************
*** 411,413 ****
if __name__ == '__main__':
! print >>sys.stderr, __doc__
\ No newline at end of file
--- 394,396 ----
if __name__ == '__main__':
! print >>sys.stderr, __doc__
Index: FileCorpus.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/FileCorpus.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** FileCorpus.py 28 Nov 2002 15:48:29 -0000 1.7
--- FileCorpus.py 28 Nov 2002 22:02:46 -0000 1.8
***************
*** 183,187 ****
filter'''
! Corpus.ExpiryCorpus.__init__(self, expireBefore, factory, cacheSize)
FileCorpus.__init__(self, factory, directory, filter, cacheSize)
--- 183,187 ----
filter'''
! Corpus.ExpiryCorpus.__init__(self, expireBefore)
FileCorpus.__init__(self, factory, directory, filter, cacheSize)
***************
*** 251,255 ****
elip = ''
sub = self.getSubstance()
!
if options.verbose:
sub = self.getSubstance()
--- 251,255 ----
elip = ''
sub = self.getSubstance()
!
if options.verbose:
sub = self.getSubstance()
***************
*** 379,383 ****
m1 = fmClass('XMG00001', 'fctestspamcorpus')
m1.setSubstance(testmsg2())
!
print '\n\nAdd a message to hamcorpus that does not match the filter'
--- 379,383 ----
m1 = fmClass('XMG00001', 'fctestspamcorpus')
m1.setSubstance(testmsg2())
!
print '\n\nAdd a message to hamcorpus that does not match the filter'
***************
*** 404,407 ****
--- 404,408 ----
unsurecorpus = ExpiryFileCorpus(5, fmFact, \
'fctestunsurecorpus', 'MSG*', 2)
+ unsurecorpus.removeExpiredMessages()
***************
*** 436,440 ****
print 'Subject header is',msg.getSubject()
print 'From header is',msg.getFrom()
!
print 'Header text is:',msg.getHeaders()
print 'Headers are:',msg.getHeadersList()
--- 437,441 ----
print 'Subject header is',msg.getSubject()
print 'From header is',msg.getFrom()
!
print 'Header text is:',msg.getHeaders()
print 'Headers are:',msg.getHeadersList()
***************
*** 492,496 ****
if e.errno != 2: # errno.<WHAT>
raise
!
try:
os.unlink('fctestclass.bayes')
--- 493,497 ----
if e.errno != 2: # errno.<WHAT>
raise
!
try:
os.unlink('fctestclass.bayes')
***************
*** 725,727 ****
print >>sys.stderr, __doc__
!
--- 726,728 ----
print >>sys.stderr, __doc__
!
Index: pop3proxy.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v
retrieving revision 1.30
retrieving revision 1.31
diff -C2 -d -r1.30 -r1.31
*** pop3proxy.py 28 Nov 2002 21:27:09 -0000 1.30
--- pop3proxy.py 28 Nov 2002 22:02:46 -0000 1.31
***************
*** 141,145 ****
import socket, asyncore, asynchat, cgi, urlparse, webbrowser
import mailbox, storage, tokenizer, mboxutils, email.Header
! from FileCorpus import FileCorpus, FileMessageFactory, GzipFileMessageFactory
from email.Iterators import typed_subpart_iterator
from Options import options
--- 141,146 ----
import socket, asyncore, asynchat, cgi, urlparse, webbrowser
import mailbox, storage, tokenizer, mboxutils, email.Header
! from FileCorpus import FileCorpus, ExpiryFileCorpus
! from FileCorpus import FileMessageFactory, GzipFileMessageFactory
from email.Iterators import typed_subpart_iterator
from Options import options
***************
*** 1314,1324 ****
map(ensureDir, [self.spamCache, self.hamCache, self.unknownCache])
if self.gzipCache:
! messageFactory = GzipFileMessageFactory()
else:
! messageFactory = FileMessageFactory()
! self.messageFactory = messageFactory
! self.spamCorpus = FileCorpus(messageFactory, self.spamCache)
! self.hamCorpus = FileCorpus(messageFactory, self.hamCache)
! self.unknownCorpus = FileCorpus(messageFactory, self.unknownCache)
# Create the Trainers.
--- 1315,1329 ----
map(ensureDir, [self.spamCache, self.hamCache, self.unknownCache])
if self.gzipCache:
! factory = GzipFileMessageFactory()
else:
! factory = FileMessageFactory()
! age = options.pop3proxy_cache_expiry_days*24*60*60
! self.spamCorpus = ExpiryFileCorpus(age, factory, self.spamCache)
! self.hamCorpus = ExpiryFileCorpus(age, factory, self.hamCache)
! self.unknownCorpus = FileCorpus(factory, self.unknownCache)
!
! # Expire old messages from the trained corpuses.
! self.spamCorpus.removeExpiredMessages()
! self.hamCorpus.removeExpiredMessages()
# Create the Trainers.
- Previous message: [Spambayes-checkins] spambayes pop3proxy.py,1.29,1.30
- Next message: [Spambayes-checkins] spambayes mailsort.py,NONE,1.1
README.txt,1.43,1.44 neilfilter.py,1.5,NONE neiltrain.py,1.6,NONE
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the Spambayes-checkins
mailing list