[Spambayes-checkins] spambayes FileCorpus.py,1.2,1.2.2.1

Tim Stone timstone4@users.sourceforge.net
Fri Nov 22 00:31:21 2002


Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv23466

Modified Files:
      Tag: hammie-playground
	FileCorpus.py 
Log Message:
Corrected some references to .substance instead of .getSubstance()
and .setSubstance()

Added tests for the header and body convenience methods that were
added to Message

Index: FileCorpus.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/FileCorpus.py,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -C2 -d -r1.2 -r1.2.2.1
*** FileCorpus.py	16 Nov 2002 19:06:27 -0000	1.2
--- FileCorpus.py	22 Nov 2002 00:31:19 -0000	1.2.2.1
***************
*** 86,90 ****
  
  import Corpus
! import Bayes
  import sys, os, gzip, fnmatch, getopt, errno, time, stat
  
--- 86,90 ----
  
  import Corpus
! import Persistent
  import sys, os, gzip, fnmatch, getopt, errno, time, stat
  
***************
*** 192,195 ****
--- 192,196 ----
          '''Constructor(message file name, corpus directory name)'''
  
+         Corpus.Message.__init__(self)
          self.file_name = file_name
          self.directory = directory
***************
*** 214,218 ****
                 raise
          else:
!            self.substance = fp.read()
             fp.close()
  
--- 215,219 ----
                 raise
          else:
!            self.setSubstance(fp.read())
             fp.close()
  
***************
*** 225,229 ****
          pn = self.pathname()
          fp = open(pn, 'wb')
!         fp.write(self.substance)
          fp.close()
  
--- 226,230 ----
          pn = self.pathname()
          fp = open(pn, 'wb')
!         fp.write(self.getSubstance())
          fp.close()
  
***************
*** 248,260 ****
  
          elip = ''
!         sub = self.substance
! 
          if Corpus.Verbose:
!             sub = self.substance
          else:
!             if len(self.substance) > 20:
!                 sub = self.substance[:20]
!                 if len(self.substance) > 40:
!                     sub += '...' + self.substance[-20:]
  
          pn = os.path.join(self.directory, self.file_name)
--- 249,261 ----
  
          elip = ''
!         sub = self.getSubstance()
!         
          if Corpus.Verbose:
!             sub = self.getSubstance()
          else:
!             if len(sub) > 20:
!                 sub = sub[:20]
!                 if len(sub) > 40:
!                     sub += '...' + sub[-20:]
  
          pn = os.path.join(self.directory, self.file_name)
***************
*** 304,308 ****
                  raise
          else:
!             self.substance = fp.read()
              fp.close()
  
--- 305,309 ----
                  raise
          else:
!             self.setSubstance(fp.read())
              fp.close()
  
***************
*** 316,320 ****
          pn = self.pathname()
          gz = gzip.open(pn, 'wb')
!         gz.write(self.substance)
          gz.flush()
          gz.close()
--- 317,321 ----
          pn = self.pathname()
          gz = gzip.open(pn, 'wb')
!         gz.write(self.getSubstance())
          gz.flush()
          gz.close()
***************
*** 342,354 ****
          print 'Executing with uncompressed files'
  
!     print '\n\nCreating two Bayes databases'
!     miscbayes = Bayes.PickledBayes('fctestmisc.bayes')
!     classbayes = Bayes.DBDictBayes('fctestclass.bayes')
  
      print '\n\nSetting up spam corpus'
      spamcorpus = FileCorpus(fmFact, 'fctestspamcorpus')
!     spamtrainer = Bayes.SpamTrainer(miscbayes)
      spamcorpus.addObserver(spamtrainer)
!     anotherspamtrainer = Bayes.SpamTrainer(classbayes, Bayes.UPDATEPROBS)
      spamcorpus.addObserver(anotherspamtrainer)
  
--- 343,355 ----
          print 'Executing with uncompressed files'
  
!     print '\n\nCreating two Classifier databases'
!     miscbayes = Persistent.PickledClassifier('fctestmisc.bayes')
!     classbayes = Persistent.DBDictClassifier('fctestclass.bayes')
  
      print '\n\nSetting up spam corpus'
      spamcorpus = FileCorpus(fmFact, 'fctestspamcorpus')
!     spamtrainer = Persistent.SpamTrainer(miscbayes)
      spamcorpus.addObserver(spamtrainer)
!     anotherspamtrainer = Persistent.SpamTrainer(classbayes, Persistent.UPDATEPROBS)
      spamcorpus.addObserver(anotherspamtrainer)
  
***************
*** 365,374 ****
                            'fctesthamcorpus', \
                            'MSG*')
!     hamtrainer = Bayes.HamTrainer(miscbayes)
      hamcorpus.addObserver(hamtrainer)
      hamtrainer.trainAll(hamcorpus)
  
! 
!     print '\n\nAdd a message to hamcorpus that does not match the filter'
      if useGzip:
          fmClass = GzipFileMessage
--- 366,374 ----
                            'fctesthamcorpus', \
                            'MSG*')
!     hamtrainer = Persistent.HamTrainer(miscbayes)
      hamcorpus.addObserver(hamtrainer)
      hamtrainer.trainAll(hamcorpus)
  
!     print '\n\nA couple of message related tests'
      if useGzip:
          fmClass = GzipFileMessage
***************
*** 377,380 ****
--- 377,383 ----
  
      m1 = fmClass('XMG00001', 'fctestspamcorpus')
+     m1.setSubstance(testmsg2())
+     
+     print '\n\nAdd a message to hamcorpus that does not match the filter'
  
      try:
***************
*** 417,421 ****
  
      print '\n\nTrain with an individual message'
!     anotherhamtrainer = Bayes.HamTrainer(classbayes)
      anotherhamtrainer.train(unsurecorpus['MSG00005'])
  
--- 420,424 ----
  
      print '\n\nTrain with an individual message'
!     anotherhamtrainer = Persistent.HamTrainer(classbayes)
      anotherhamtrainer.train(unsurecorpus['MSG00005'])
  
***************
*** 428,431 ****
--- 431,443 ----
      msg = spamcorpus['MSG00001']
      print msg
+     print '\n\nThis is some vital information in the message'
+     print 'Date header is',msg.getDate()
+     print 'Subject header is',msg.getSubject()
+     print 'From header is',msg.getFrom()
+     
+     print 'Header text is:',msg.getHeaders()
+     print 'Headers are:',msg.getHeadersList()
+     print 'Body is:',msg.getBody()
+ 
  
  
***************
*** 526,538 ****
  
      m1 = fmClass('MSG00001', 'fctestspamcorpus')
!     m1.substance = tm1
      m1.store()
  
      m2 = fmClass('MSG00002', 'fctestspamcorpus')
!     m2.substance = tm2
      m2.store()
  
      m3 = fmClass('MSG00003', 'fctestunsurecorpus')
!     m3.substance = tm1
      m3.store()
  
--- 538,550 ----
  
      m1 = fmClass('MSG00001', 'fctestspamcorpus')
!     m1.setSubstance(tm1)
      m1.store()
  
      m2 = fmClass('MSG00002', 'fctestspamcorpus')
!     m2.setSubstance(tm2)
      m2.store()
  
      m3 = fmClass('MSG00003', 'fctestunsurecorpus')
!     m3.setSubstance(tm1)
      m3.store()
  
***************
*** 546,558 ****
  
      m4 = fmClass('MSG00004', 'fctestunsurecorpus')
!     m4.substance = tm1
      m4.store()
  
      m5 = fmClass('MSG00005', 'fctestunsurecorpus')
!     m5.substance = tm2
      m5.store()
  
      m6 = fmClass('MSG00006', 'fctestunsurecorpus')
!     m6.substance = tm2
      m6.store()
  
--- 558,570 ----
  
      m4 = fmClass('MSG00004', 'fctestunsurecorpus')
!     m4.setSubstance(tm1)
      m4.store()
  
      m5 = fmClass('MSG00005', 'fctestunsurecorpus')
!     m5.setSubstance(tm2)
      m5.store()
  
      m6 = fmClass('MSG00006', 'fctestunsurecorpus')
!     m6.setSubstance(tm2)
      m6.store()
  
***************
*** 583,587 ****
  Content-Type:text/plain; charset=us-ascii
  Content- Transfer- Encoding:7bit
- 
  Message-ID:<15814.42238.882013.702030@montanaro.dyndns.org>
  Date:Mon, 4 Nov 2002 10:49:02 -0600
--- 595,598 ----
***************
*** 644,648 ****
  Content-Type:text/plain; charset=us-ascii
  Content- Transfer- Encoding:7bit
- 
  X-Hammie- Disposition:Unsure
  
--- 655,658 ----





More information about the Spambayes-checkins mailing list