[Spambayes-checkins] spambayes/Outlook2000 msgstore.py, 1.69,
1.70 tester.py, 1.14, 1.15
Mark Hammond
mhammond at users.sourceforge.net
Sat Aug 30 23:38:55 EDT 2003
Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1:/tmp/cvs-serv12507
Modified Files:
msgstore.py tester.py
Log Message:
msgstore: add an internal "_GetMessageTextParts()" method the test
suite can use for sanity checks.
tester: Cache the good and bad tokens we use to generate test mails, and
print some stats for how many messages don't have headers, text body or
html body. How many don't have HTML should scare you - see [798029]
Index: msgstore.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/msgstore.py,v
retrieving revision 1.69
retrieving revision 1.70
diff -C2 -d -r1.69 -r1.70
*** msgstore.py 27 Aug 2003 12:10:48 -0000 1.69
--- msgstore.py 31 Aug 2003 05:38:52 -0000 1.70
***************
*** 700,703 ****
--- 700,708 ----
def _GetMessageText(self):
+ parts = self._GetMessageTextParts()
+ # parts is (headers, body, html), but could possibly grow
+ return "\n".join(parts)
+
+ def _GetMessageTextParts(self):
# This is finally reliable. The only messages this now fails for
# are for "forwarded" messages, where the forwards are actually
***************
*** 793,797 ****
body = collect_text_parts(msg)
! return "%s\n%s\n%s" % (headers, html, body)
def _GetFakeHeaders(self):
--- 798,802 ----
body = collect_text_parts(msg)
! return headers, body, html
def _GetFakeHeaders(self):
Index: tester.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/tester.py,v
retrieving revision 1.14
retrieving revision 1.15
diff -C2 -d -r1.14 -r1.15
*** tester.py 24 Aug 2003 23:51:32 -0000 1.14
--- tester.py 31 Aug 2003 05:38:52 -0000 1.15
***************
*** 63,67 ****
--- 63,72 ----
# bayes word list looking for any word with zero count in the
# non-requested category.
+ _top_ham = None
+ _top_spam = None
def FindTopWords(bayes, num, get_spam):
+ global _top_spam, _top_ham
+ if get_spam and _top_spam: return _top_spam
+ if not get_spam and _top_ham: return _top_ham
items = []
try:
***************
*** 72,75 ****
--- 77,82 ----
for word, info in extractor(bayes):
+ if info is None:
+ break
if ":" in word:
continue
***************
*** 89,92 ****
--- 96,103 ----
for n, word, info in items[:num]:
ret[word]=copy.copy(info)
+ if get_spam:
+ _top_spam = ret
+ else:
+ _top_ham = ret
return ret
***************
*** 399,403 ****
--- 410,416 ----
msgstore.test_suite_running = False
try:
+ print "Scanning all your good mail and spam for some sanity checks..."
num_found = num_looked = 0
+ num_without_headers = num_without_body = num_without_html_body = 0
for folder_ids, include_sub in [
(manager.config.filter.watch_folder_ids, manager.config.filter.watch_include_sub),
***************
*** 418,422 ****
--- 431,442 ----
num_found += 1
print " %s/%s" % (folder.name, message.subject)
+ headers, body, html_body = message._GetMessageTextParts()
+ if not headers: num_without_headers += 1
+ if not body: num_without_body += 1
+ if not html_body: num_without_html_body += 1
+
print "Checked %d items, %d non-filterable items found" % (num_looked, num_found)
+ print "of these items, %d had no headers, %d had no text body and %d had no HTML" % \
+ (num_without_headers, num_without_body, num_without_html_body)
finally:
msgstore.test_suite_running = True
***************
*** 428,433 ****
try: # restore the plugin config at exit.
msgstore.test_suite_running = True
- run_filter_tests(manager)
run_nonfilter_tests(manager)
finally:
# Always restore configuration to how we started.
--- 448,454 ----
try: # restore the plugin config at exit.
msgstore.test_suite_running = True
run_nonfilter_tests(manager)
+ # filtering tests take alot of time - do them last.
+ run_filter_tests(manager)
finally:
# Always restore configuration to how we started.
More information about the Spambayes-checkins
mailing list