[Spambayes-checkins]
spambayes/Outlook2000 tester.py,NONE,1.1 addin.py,1.40,1.41
train.py,1.20,1.21
Mark Hammond
mhammond at users.sourceforge.net
Mon Dec 9 01:18:40 EST 2002
Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1:/tmp/cvs-serv14252
Modified Files:
addin.py train.py
Added Files:
tester.py
Log Message:
Add a fairly comprehensive (of Outlook's code) test suite.
--- NEW FILE: tester.py ---
# unit tester for the Outlook addin.
#
# Note we are only attempting to test Outlook specific
# functionality, such as filters, etc.
#
# General process is to create test messages known to contain ham/spam
# keywords, and tracking their progress through the filters. We also
# move this test message back around, and watch the incremental retrain
# in action. Also checks that the message correctly remains classified
# after a message move.
from win32com.client import constants
from time import sleep
HAM="ham"
SPAM="spam"
UNSURE="unsure"
TEST_SUBJECT = "SpamBayes addin auto-generated test message"
class TestFailure(Exception):
pass
def TestFailed(msg):
raise TestFailure(msg)
def WaitForFilters():
import pythoncom
for i in range(100):
pythoncom.PumpWaitingMessages()
sleep(0.01)
# Find the top 'n' words in the Spam database that are clearly
# marked as either ham or spam. Simply enumerates the
# bayes word list looking for any word with zero count in the
# non-requested category.
def FindTopWords(bayes, num, get_spam):
items = []
for word, info in bayes.wordinfo.items():
if ":" in word:
continue
if get_spam:
if info.hamcount==0:
items.append((info.spamcount, word))
else:
if info.spamcount==0:
items.append((info.hamcount, word))
items.sort()
return [item[1] for item in items]
# A little driver/manager for our tests
class Driver:
def __init__(self, mgr):
if mgr is None:
import manager
mgr = manager.GetManager()
self.manager = mgr
# Remember the "spam" folder.
folder = mgr.message_store.GetFolder(mgr.config.filter.spam_folder_id)
self.folder_spam = folder.GetOutlookItem()
# Remember the "unsure" folder.
folder = mgr.message_store.GetFolder(mgr.config.filter.unsure_folder_id)
self.folder_unsure = folder.GetOutlookItem()
# The "watch" folder is a folder we can stick stuff into to have them
# filtered - just use the first one nominated.
for folder in mgr.message_store.GetFolderGenerator(
mgr.config.filter.watch_folder_ids,
mgr.config.filter.watch_include_sub):
self.folder_watch = folder.GetOutlookItem()
break
# And the drafts folder where new messages are created.
self.folder_drafts = mgr.outlook.Session.GetDefaultFolder(constants.olFolderDrafts)
def FindTestMessage(self, folder):
subject = TEST_SUBJECT
items = folder.Items
return items.Find("[Subject] = '%s'" % (subject,))
def _CleanTestMessageFromFolder(self, folder):
subject = TEST_SUBJECT
num = 0
while True:
msg = self.FindTestMessage(folder)
if msg is None:
break
msg.Delete()
num += 1
if num:
print "Cleaned %d test messages from folder '%s'" % (num, folder.Name)
def CleanAllTestMessages(self):
subject = TEST_SUBJECT
self._CleanTestMessageFromFolder(self.folder_spam)
self._CleanTestMessageFromFolder(self.folder_unsure)
self._CleanTestMessageFromFolder(self.folder_watch)
self._CleanTestMessageFromFolder(self.folder_drafts)
def CreateTestMessageInFolder(self, spam_status, folder):
msg = self.CreateTestMessage(spam_status)
msg.Save() # Put into "Drafts".
assert self.FindTestMessage(self.folder_drafts) is not None
# Move it to the specified folder
msg.Move(folder)
# And now find it in the specified folder
return self.FindTestMessage(folder)
def CreateTestMessage(self, spam_status):
words = []
if spam_status != SPAM:
words.extend(FindTopWords(self.manager.bayes, 50, False))
if spam_status != HAM:
words.extend(FindTopWords(self.manager.bayes, 50, True))
# Create a new blank message with our words
msg = self.manager.outlook.CreateItem(0)
msg.Body = "\n".join(words)
msg.Subject = TEST_SUBJECT
return msg
# The tests themselves.
# The "spam" test is huge - we do standard filter tests, but
# also do incremental retrain tests.
def TestSpamFilter(driver):
nspam = driver.manager.bayes.nspam
nham = driver.manager.bayes.nham
import copy
original_bayes = copy.copy(driver.manager.bayes)
# Create a spam message in the Inbox - it should get immediately filtered
msg = driver.CreateTestMessageInFolder(SPAM, driver.folder_watch)
# sleep to ensure filtering.
WaitForFilters()
# It should no longer be in the Inbox.
if driver.FindTestMessage(driver.folder_watch) is not None:
TestFailed("The test message appeared to not be filtered")
# It should be in the "sure is spam" folder.
spam_msg = driver.FindTestMessage(driver.folder_spam)
if spam_msg is None:
TestFailed("The test message vanished from the Inbox, but didn't appear in Spam")
# Check that none of the above caused training.
if nspam != driver.manager.bayes.nspam:
TestFailed("Something caused a new spam message to appear")
if nham != driver.manager.bayes.nham:
TestFailed("Something caused a new ham message to appear")
# Now move the message back to the inbox - it should get trained.
store_msg = driver.manager.message_store.GetMessage(spam_msg)
import train
if train.been_trained_as_ham(store_msg, driver.manager):
TestFailed("This new spam message should not have been trained as ham yet")
if train.been_trained_as_spam(store_msg, driver.manager):
TestFailed("This new spam message should not have been trained as spam yet")
spam_msg.Move(driver.folder_watch)
WaitForFilters()
spam_msg = driver.FindTestMessage(driver.folder_watch)
store_msg = driver.manager.message_store.GetMessage(spam_msg)
need_untrain = True
try:
if nspam != driver.manager.bayes.nspam:
TestFailed("There were not the same number of spam messages after a re-train")
if nham+1 != driver.manager.bayes.nham:
TestFailed("There was not one more ham messages after a re-train")
if train.been_trained_as_spam(store_msg, driver.manager):
TestFailed("This new spam message should not have been trained as spam yet")
if not train.been_trained_as_ham(store_msg, driver.manager):
TestFailed("This new spam message should have been trained as ham now")
# Now move it back to the Spam folder.
# This should see the message un-trained as ham, and re-trained as Spam
spam_msg.Move(driver.folder_spam)
WaitForFilters()
spam_msg = driver.FindTestMessage(driver.folder_spam)
if spam_msg is None:
TestFailed("Could not find the message in the Spam folder")
store_msg = driver.manager.message_store.GetMessage(spam_msg)
if nspam +1 != driver.manager.bayes.nspam:
TestFailed("There should be one more spam now")
if nham != driver.manager.bayes.nham:
TestFailed("There should be the same number of hams again")
if not train.been_trained_as_spam(store_msg, driver.manager):
TestFailed("This new spam message should have been trained as spam by now")
if train.been_trained_as_ham(store_msg, driver.manager):
TestFailed("This new spam message should have been un-trained as ham")
# Move the message to another folder, and make sure we still
# identify it correctly as having been trained.
# Move to the "unsure" folder, just cos we know about it, and
# we know that no special watching of this folder exists.
spam_msg.Move(driver.folder_unsure)
spam_msg = driver.FindTestMessage(driver.folder_unsure)
if spam_msg is None:
TestFailed("Could not find the message in the Unsure folder")
store_msg = driver.manager.message_store.GetMessage(spam_msg)
if not train.been_trained_as_spam(store_msg, driver.manager):
TestFailed("Message was not identified as Spam after moving")
# Now undo the damage we did.
was_spam = train.untrain_message(store_msg, driver.manager)
if not was_spam:
TestFailed("Untraining this message did not indicate it was spam")
need_untrain = False
finally:
if need_untrain:
train.untrain_message(store_msg, driver.manager)
# Check all the counts are back where we started.
if nspam != driver.manager.bayes.nspam:
TestFailed("Spam count didn't get back to the same")
if nham != driver.manager.bayes.nham:
TestFailed("Ham count didn't get back to the same")
if driver.manager.bayes.wordinfo != original_bayes.wordinfo:
TestFailed("The bayes object's 'wordinfo' did not compare the same at the end of all this!")
if driver.manager.bayes.probcache != original_bayes.probcache:
TestFailed("The bayes object's 'probcache' did not compare the same at the end of all this!")
spam_msg.Delete()
print "Created a Spam message, and saw it get filtered and trained."
def TestHamFilter(driver):
# Create a spam message in the Inbox - it should get immediately filtered
msg = driver.CreateTestMessageInFolder(HAM, driver.folder_watch)
# sleep to ensure filtering.
WaitForFilters()
# It should still be in the Inbox.
if driver.FindTestMessage(driver.folder_watch) is None:
TestFailed("The test ham message appeared to have been filtered!")
msg.Delete()
print "Created a Ham message, and saw it remain in place."
def TestUnsureFilter(driver):
# Create a spam message in the Inbox - it should get immediately filtered
msg = driver.CreateTestMessageInFolder(UNSURE, driver.folder_watch)
# sleep to ensure filtering.
WaitForFilters()
# It should no longer be in the Inbox.
if driver.FindTestMessage(driver.folder_watch) is not None:
TestFailed("The test unsure message appeared to not be filtered")
# It should be in the "unsure" folder.
spam_msg = driver.FindTestMessage(driver.folder_unsure)
if spam_msg is None:
TestFailed("The test message vanished from the Inbox, but didn't appear in Unsure")
spam_msg.Delete()
print "Created an unsure message, and saw it get filtered"
def test(manager = None):
# Run the tests - called from our plugin.
driver = Driver(manager)
assert driver.manager.config.filter.enabled, "Filtering must be enabled for these tests"
assert driver.manager.config.training.train_recovered_spam and \
driver.manager.config.training.train_manual_spam, "Incremental training must be enabled for these tests"
driver.CleanAllTestMessages()
TestSpamFilter(driver)
TestUnsureFilter(driver)
TestHamFilter(driver)
driver.CleanAllTestMessages()
if __name__=='__main__':
print "NOTE: This will NOT work from the command line"
print "(it nearly will, and is useful for debugging the tests"
print "themselves, so we will run them anyway!)"
test()
Index: addin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v
retrieving revision 1.40
retrieving revision 1.41
diff -C2 -d -r1.40 -r1.41
*** addin.py 27 Nov 2002 05:49:52 -0000 1.40
--- addin.py 9 Dec 2002 09:18:38 -0000 1.41
***************
*** 264,267 ****
--- 264,279 ----
new_msg.Display()
+ # A hook for whatever tests we have setup
+ def Tester(manager):
+ import tester, traceback
+ try:
+ print "Executing automated tests..."
+ tester.test(manager)
+ print "Tests worked."
+ except:
+ traceback.print_exc()
+ print "Tests FAILED. Sorry about that. If I were you, I would do a full re-train ASAP"
+ print "Please delete any test messages from your Spam, Unsure or Inbox folders first."
+
# The "Delete As Spam" and "Recover Spam" button
# The event from Outlook's explorer that our folder has changed.
***************
*** 424,427 ****
--- 436,444 ----
Caption="Show spam clues for current message",
Enabled=True)
+ # If we are running from Python sources, enable a few extra items
+ if not hasattr(sys, "frozen"):
+ self._AddPopup(popup, Tester, (self.manager,),
+ Caption="Execute test suite",
+ Enabled=True)
self.have_setup_ui = True
Index: train.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/train.py,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** train.py 25 Nov 2002 20:52:49 -0000 1.20
--- train.py 9 Dec 2002 09:18:38 -0000 1.21
***************
*** 58,61 ****
--- 58,78 ----
return True
+ # Untrain a message.
+ # Return: None == not previously trained
+ # True == was_spam
+ # False == was_ham
+ def untrain_message(msg, mgr):
+ from tokenizer import tokenize
+ stream = msg.GetEmailPackageObject()
+ if been_trained_as_spam(msg, mgr):
+ assert not been_trained_as_ham(msg, mgr), "Can't have been both!"
+ mgr.bayes.unlearn(tokenize(stream), True)
+ return True
+ if been_trained_as_ham(msg, mgr):
+ assert not been_trained_as_spam(msg, mgr), "Can't have been both!"
+ mgr.bayes.unlearn(tokenize(stream), False)
+ return False
+ return None
+
def train_folder(f, isspam, mgr, progress):
num = num_added = 0
More information about the Spambayes-checkins
mailing list