[Spambayes-checkins] spambayes/spambayes Corpus.py, 1.19, 1.20 FileCorpus.py, 1.11, 1.12

Tony Meyer anadelonbrin at users.sourceforge.net
Wed Jul 14 09:15:40 CEST 2004


Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31771/spambayes

Modified Files:
	Corpus.py FileCorpus.py 
Log Message:
Allow makeMessage to take a content parameter, which is the initial
content of the message.

Allow FileMessages to be created without any arguments, so that they
are compatible with emal.Parser.  Add in assert statements to ensure
that the required information is still entered.

Implement FileMessage.setPayload().  The parent version is now deprecated,
and this should also be avoided if possible (see the comment).

Update Factory classes to allow initial content.

Have testing setup use the Factory classes - this tests them and avoids using
setPayload.

The tests run, and sb_server still works, so I'm fairly confident that this hasn't broken
anything.  If it does, fix it or let me know and I will ASAP.

Index: Corpus.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Corpus.py,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** Corpus.py	6 Apr 2004 13:21:46 -0000	1.19
--- Corpus.py	14 Jul 2004 07:15:38 -0000	1.20
***************
*** 250,258 ****
          raise NotImplementedError
  
!     def makeMessage(self, key):
          '''Call the factory to make a message'''
  
          # This method will likely be overridden
!         msg = self.factory.create(key)
  
          return msg
--- 250,258 ----
          raise NotImplementedError
  
!     def makeMessage(self, key, content=None):
          '''Call the factory to make a message'''
  
          # This method will likely be overridden
!         msg = self.factory.create(key, content)
  
          return msg
***************
*** 284,288 ****
          pass
  
!     def create(self, key):
          '''Create a message instance'''
          raise NotImplementedError
--- 284,288 ----
          pass
  
!     def create(self, key, content=None):
          '''Create a message instance'''
          raise NotImplementedError

Index: FileCorpus.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/FileCorpus.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** FileCorpus.py	2 Apr 2004 18:10:52 -0000	1.11
--- FileCorpus.py	14 Jul 2004 07:15:38 -0000	1.12
***************
*** 85,88 ****
--- 85,90 ----
  from __future__ import generators
  
+ import email
+ 
  from spambayes import Corpus
  from spambayes import message
***************
*** 121,127 ****
                  self.msgs[filename] = None
  
!     def makeMessage(self, key):
          '''Ask our factory to make a Message'''
!         msg = self.factory.create(key, self.directory)
          return msg
  
--- 123,129 ----
                  self.msgs[filename] = None
  
!     def makeMessage(self, key, content=None):
          '''Ask our factory to make a Message'''
!         msg = self.factory.create(key, self.directory, content)
          return msg
  
***************
*** 186,190 ****
      '''Message that persists as a file system artifact.'''
  
!     def __init__(self,file_name, directory):
          '''Constructor(message file name, corpus directory name)'''
          message.SBHeaderMessage.__init__(self)
--- 188,192 ----
      '''Message that persists as a file system artifact.'''
  
!     def __init__(self, file_name=None, directory=None):
          '''Constructor(message file name, corpus directory name)'''
          message.SBHeaderMessage.__init__(self)
***************
*** 199,202 ****
--- 201,208 ----
      def pathname(self):
          '''Derive the pathname of the message file'''
+         assert(self.file_name is not None,
+                "Must set filename before using FileMessage instances.")
+         assert(self.directory is not None,
+                "Must set directory before using FileMessage instances.")
          return os.path.join(self.directory, self.file_name)
  
***************
*** 214,217 ****
--- 220,226 ----
              return
  
+         assert(self.file_name is not None,
+                "Must set filename before using FileMessage instances.")
+ 
          if options["globals", "verbose"]:
              print 'loading', self.file_name
***************
*** 237,240 ****
--- 246,252 ----
          '''Write the Message substance to the file'''
  
+         assert(self.file_name is not None,
+                "Must set filename before using FileMessage instances.")
+ 
          if options["globals", "verbose"]:
              print 'storing', self.file_name
***************
*** 245,250 ****
  
      def setPayload(self, payload):
          self.loaded = True
!         message.SBHeaderMessage.setPayload(self, payload)
  
      def remove(self):
--- 257,282 ----
  
      def setPayload(self, payload):
+         # This is a less-than-ideal method.  The Python email package
+         # has a clear distinction between parsing an email message and
+         # creating an email message object.  Here, we don't share that
+         # distinction, because our message object is trying to do its
+         # own parsing.  A better system would be to have the factory
+         # that creates these messages do the load from file bit (this
+         # does mean we lose the current load-on-demand feature, but
+         # I'm not sure that's ever used).  Alternatively, we could have
+         # a third type of FileMessage - PickledFileMessage - that stored
+         # the parsed form of the message.  This might also remove the
+         # need for some of the message database (although that would then
+         # expire along with the messages...).  This is something to
+         # consider before 1.1, however.
          self.loaded = True
! 
!         # We parse the content into a generic email.Message object.
!         msg = email.message_from_string(payload, strict=False)
! 
!         # And then we set ourselves to be equal to it.
!         self.set_payload(msg.get_payload())
!         self.set_unixfrom(msg.get_unixfrom())
!         self.set_charset(msg.get_charset())
  
      def remove(self):
***************
*** 262,274 ****
      def name(self):
          '''A unique name for the message'''
          return self.file_name
  
      def key(self):
          '''The key of this message in the msgs dictionary'''
          return self.file_name
  
      def __repr__(self):
          '''Instance as a representative string'''
- 
          sub = self.as_string()
  
--- 294,309 ----
      def name(self):
          '''A unique name for the message'''
+         assert(self.file_name is not None,
+                "Must set filename before using FileMessage instances.")
          return self.file_name
  
      def key(self):
          '''The key of this message in the msgs dictionary'''
+         assert(self.file_name is not None,
+                "Must set filename before using FileMessage instances.")
          return self.file_name
  
      def __repr__(self):
          '''Instance as a representative string'''
          sub = self.as_string()
  
***************
*** 280,288 ****
                      sub = sub[:20]
  
-         pn = os.path.join(self.directory, self.file_name)
- 
          return "<%s object at %8.8x, file: %s, %s>" % \
              (self.__class__.__name__, \
!             id(self), pn, sub)
  
      def __str__(self):
--- 315,321 ----
                      sub = sub[:20]
  
          return "<%s object at %8.8x, file: %s, %s>" % \
              (self.__class__.__name__, \
!             id(self), self.pathname(), sub)
  
      def __str__(self):
***************
*** 308,314 ****
      '''MessageFactory for FileMessage objects'''
  
!     def create(self, key, directory):
          '''Create a message object from a filename in a directory'''
! 
          return FileMessage(key, directory)
  
--- 341,352 ----
      '''MessageFactory for FileMessage objects'''
  
!     def create(self, key, directory, content=None):
          '''Create a message object from a filename in a directory'''
!         if content:
!             msg = email.message_from_string(content, _class=FileMessage,
!                                             strict=False)
!             msg.file_name = key
!             msg.directory = directory
!             return msg
          return FileMessage(key, directory)
  
***************
*** 318,321 ****
--- 356,361 ----
      def store(self):
          '''Write the Message substance to the file'''
+         assert(self.file_name is not None,
+                "Must set filename before using FileMessage instances.")
  
          if options["globals", "verbose"]:
***************
*** 332,342 ****
      '''MessageFactory for FileMessage objects'''
  
!     def create(self, key, directory):
          '''Create a message object from a filename in a directory'''
! 
          return GzipFileMessage(key, directory)
  
  
- 
  def runTest(useGzip):
  
--- 372,387 ----
      '''MessageFactory for FileMessage objects'''
  
!     def create(self, key, directory, content=None):
          '''Create a message object from a filename in a directory'''
!         if content:
!             msg = email.message_from_string(content,
!                                             _class=GzipFileMessage,
!                                             strict=False)
!             msg.file_name = key
!             msg.directory = directory
!             return msg
          return GzipFileMessage(key, directory)
  
  
  def runTest(useGzip):
  
***************
*** 379,388 ****
      print '\n\nA couple of message related tests'
      if useGzip:
!         fmClass = GzipFileMessage
      else:
!         fmClass = FileMessage
  
!     m1 = fmClass('XMG00001', 'fctestspamcorpus')
!     m1.setPayload(testmsg2())
  
      print '\n\nAdd a message to hamcorpus that does not match the filter'
--- 424,432 ----
      print '\n\nA couple of message related tests'
      if useGzip:
!         fmFactory = GzipFileMessageFactory()
      else:
!         fmFactory = FileMessageFactory()
  
!     m1 = fmFactory.create('XMG00001', 'fctestspamcorpus', testmsg2())
  
      print '\n\nAdd a message to hamcorpus that does not match the filter'
***************
*** 531,548 ****
  
      if useGzip:
!         fmClass = GzipFileMessage
      else:
!         fmClass = FileMessage
  
!     m1 = fmClass('MSG00001', 'fctestspamcorpus')
!     m1.setPayload(tm1)
      m1.store()
  
!     m2 = fmClass('MSG00002', 'fctestspamcorpus')
!     m2.setPayload(tm2)
      m2.store()
  
!     m3 = fmClass('MSG00003', 'fctestunsurecorpus')
!     m3.setPayload(tm1)
      m3.store()
  
--- 575,589 ----
  
      if useGzip:
!         fmFactory = GzipFileMessageFactory()
      else:
!         fmFactory = FileMessageFactory()
  
!     m1 = fmFactory.create('MSG00001', 'fctestspamcorpus', tm1)
      m1.store()
  
!     m2 = fmFactory.create('MSG00002', 'fctestspamcorpus', tm2)
      m2.store()
  
!     m3 = fmFactory.create('MSG00003', 'fctestunsurecorpus', tm1)
      m3.store()
  
***************
*** 555,568 ****
          print 'wait',10-x,'more second%s' % (s)
  
!     m4 = fmClass('MSG00004', 'fctestunsurecorpus')
!     m4.setPayload(tm1)
      m4.store()
  
!     m5 = fmClass('MSG00005', 'fctestunsurecorpus')
!     m5.setPayload(tm2)
      m5.store()
  
!     m6 = fmClass('MSG00006', 'fctestunsurecorpus')
!     m6.setPayload(tm2)
      m6.store()
  
--- 596,606 ----
          print 'wait',10-x,'more second%s' % (s)
  
!     m4 = fmFactory.create('MSG00004', 'fctestunsurecorpus', tm1)
      m4.store()
  
!     m5 = fmFactory.create('MSG00005', 'fctestunsurecorpus', tm2)
      m5.store()
  
!     m6 = fmFactory.create('MSG00006', 'fctestunsurecorpus', tm2)
      m6.store()
  



More information about the Spambayes-checkins mailing list