[Spambayes-checkins] spambayes/Outlook2000 msgstore.py, 1.69, 1.70 tester.py, 1.14, 1.15

Mark Hammond mhammond at users.sourceforge.net
Sat Aug 30 23:38:55 EDT 2003


Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1:/tmp/cvs-serv12507

Modified Files:
	msgstore.py tester.py 
Log Message:
msgstore: add an internal "_GetMessageTextParts()" method the test
suite can use for sanity checks.

tester: Cache the good and bad tokens we use to generate test mails, and 
print some stats for how many messages don't have headers, text body or 
html body.  How many don't have HTML should scare you  - see [798029]


Index: msgstore.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/msgstore.py,v
retrieving revision 1.69
retrieving revision 1.70
diff -C2 -d -r1.69 -r1.70
*** msgstore.py	27 Aug 2003 12:10:48 -0000	1.69
--- msgstore.py	31 Aug 2003 05:38:52 -0000	1.70
***************
*** 700,703 ****
--- 700,708 ----
  
      def _GetMessageText(self):
+         parts = self._GetMessageTextParts()
+         # parts is (headers, body, html), but could possibly grow
+         return "\n".join(parts)
+ 
+     def _GetMessageTextParts(self):
          # This is finally reliable.  The only messages this now fails for
          # are for "forwarded" messages, where the forwards are actually
***************
*** 793,797 ****
                  body = collect_text_parts(msg)
  
!         return "%s\n%s\n%s" % (headers, html, body)
  
      def _GetFakeHeaders(self):
--- 798,802 ----
                  body = collect_text_parts(msg)
  
!         return headers, body, html
  
      def _GetFakeHeaders(self):

Index: tester.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/tester.py,v
retrieving revision 1.14
retrieving revision 1.15
diff -C2 -d -r1.14 -r1.15
*** tester.py	24 Aug 2003 23:51:32 -0000	1.14
--- tester.py	31 Aug 2003 05:38:52 -0000	1.15
***************
*** 63,67 ****
--- 63,72 ----
  # bayes word list looking for any word with zero count in the
  # non-requested category.
+ _top_ham = None
+ _top_spam = None
  def FindTopWords(bayes, num, get_spam):
+     global _top_spam, _top_ham
+     if get_spam and _top_spam: return _top_spam
+     if not get_spam and _top_ham: return _top_ham
      items = []
      try:
***************
*** 72,75 ****
--- 77,82 ----
  
      for word, info in extractor(bayes):
+         if info is None:
+             break
          if ":" in word:
              continue
***************
*** 89,92 ****
--- 96,103 ----
      for n, word, info in items[:num]:
          ret[word]=copy.copy(info)
+     if get_spam:
+         _top_spam = ret
+     else:
+         _top_ham = ret
      return ret
  
***************
*** 399,403 ****
--- 410,416 ----
      msgstore.test_suite_running = False
      try:
+         print "Scanning all your good mail and spam for some sanity checks..."
          num_found = num_looked = 0
+         num_without_headers = num_without_body = num_without_html_body = 0
          for folder_ids, include_sub in [
              (manager.config.filter.watch_folder_ids, manager.config.filter.watch_include_sub),
***************
*** 418,422 ****
--- 431,442 ----
                          num_found += 1
                          print " %s/%s" % (folder.name, message.subject)
+                     headers, body, html_body = message._GetMessageTextParts()
+                     if not headers: num_without_headers += 1
+                     if not body: num_without_body += 1
+                     if not html_body: num_without_html_body += 1
+ 
          print "Checked %d items, %d non-filterable items found" % (num_looked, num_found)
+         print "of these items, %d had no headers, %d had no text body and %d had no HTML" % \
+                 (num_without_headers, num_without_body, num_without_html_body)
      finally:
          msgstore.test_suite_running = True
***************
*** 428,433 ****
      try: # restore the plugin config at exit.
          msgstore.test_suite_running = True
-         run_filter_tests(manager)
          run_nonfilter_tests(manager)
      finally:
          # Always restore configuration to how we started.
--- 448,454 ----
      try: # restore the plugin config at exit.
          msgstore.test_suite_running = True
          run_nonfilter_tests(manager)
+         # filtering tests take alot of time - do them last.
+         run_filter_tests(manager)
      finally:
          # Always restore configuration to how we started.





More information about the Spambayes-checkins mailing list