12c12
<
---
>
27c27
<
---
>
67c67
<
---
>
100a101,103
> -P : password
> -R filename : redirect stdout and stderr to file
> -L dbname : log to database (template alog4.ntf)
110c113
<
---
>
113,114c116,117
<
< Replicate, then classify inbox
---
>
> Replicate, then classify inbox
116c119
<
---
>
151a155,180
> def getMessage(doc):
> try:
> subj = doc.GetItemValue('Subject')[0]
> except:
> subj = 'No Subject'
>
> try:
> body = doc.GetItemValue('Body')[0]
> except:
> body = 'No Body'
>
> hdrs = ''
> for item in doc.Items:
> if item.Name == "From" or \
> item.Name == "Sender" or \
> item.Name == "Received" or \
> item.Name == "ReplyTo" \
> :
> try:
> hdrs = hdrs + ( "%s: %s\r\n" % (item.Name, item.Text) )
> except:
> hdrs = ''
>
> message = "%sSubject: %s\r\n\r\n%s" % (hdrs, subj, body)
>
> return message
153c182,183
< def classifyInbox(v, vmoveto, bayes, ldbname, notesindex):
---
>
> def classifyInbox(v, vmoveto, bayes, ldbname, notesindex, log):
161c191
<
---
>
167c197
<
---
>
184,195c214
<
< try:
< subj = doc.GetItemValue('Subject')[0]
< except:
< subj = 'No Subject'
<
< try:
< body = doc.GetItemValue('Body')[0]
< except:
< body = 'No Body'
<
< message = "Subject: %s\r\n\r\n%s" % (subj, body)
---
> message = getMessage(doc)
216a236,237
> if log:
> log.LogAction("%s spamprob is %s" % (subj[:30], prob))
218a240,246
> if log:
> log.LogAction("<subject not printed> spamprob is %s" \
> % (prob) )
>
> item = doc.ReplaceItemValue("Spam",prob)
> item.IsSummary = True
> doc.save(False,True,False)
232d259
<
234c261,268
< def processAndTrain(v, vmoveto, bayes, is_spam, notesindex):
---
> if log:
> log.LogAction("%s documents processed" % (numdocs))
> log.LogAction(" %s classified as spam" % (numspam))
> log.LogAction(" %s classified as ham" % (numham))
> log.LogAction(" %s classified as unsure" % (numuns))
>
>
> def processAndTrain(v, vmoveto, bayes, is_spam, notesindex,log):
242c276
<
---
>
246,256c280
< try:
< subj = doc.GetItemValue('Subject')[0]
< except:
< subj = 'No Subject'
<
< try:
< body = doc.GetItemValue('Body')[0]
< except:
< body = 'No Body'
<
< message = "Subject: %s\r\n%s" % (subj, body)
---
> message = getMessage(doc)
272c296
<
---
>
284c308,310
<
---
> if log:
> log.LogAction("%s documents trained" % (len(docstomove)))
>
286c312,313
< def run(bdbname, useDBM, ldbname, rdbname, foldname, doTrain, doClassify):
---
> def run(bdbname, useDBM, ldbname, rdbname, foldname, doTrain, doClassify, pwd,
> idxname, logname):
292a320
> sess = win32com.client.Dispatch("Lotus.NotesSession")
294c322,338
< fp = open("%s.sbindex" % (ldbname), 'rb')
---
> sess.initialize(pwd)
> except pywintypes.com_error:
> print "Session aborted"
> sys.exit()
>
> log = sess.CreateLog("SpambayesAgentLog")
> try:
> log.OpenNotesLog("",logname)
> except pywintypes.com_error:
> print "Could not open log"
> log = None
>
> if log:
> log.LogAction("Running spambayes")
>
> try:
> fp = open(idxname, 'rb')
298,299c342,343
< print "%s.sbindex file not found, this is a first time run" \
< % (ldbname)
---
> print "%s file not found, this is a first time run" \
> % (idxname)
304,305c348,350
<
< sess = win32com.client.Dispatch("Lotus.NotesSession")
---
>
> need_replicate = False
>
307c352
< sess.initialize()
---
> db = sess.GetDatabase(rdbname,ldbname)
309,313c354,362
< print "Session aborted"
< sys.exit()
<
< db = sess.GetDatabase("",ldbname)
<
---
> print "Could not open database remotely, trying locally"
> if rdbname:
> try:
> db = sess.GetDatabase("",ldbname)
> need_replicate = True
> except pywintypes.com_error:
> print "Could not open database"
>
>
319c368,376
<
---
>
> if need_replicate: # rdbname:
> try:
> print "Replicating..."
> db.Replicate(rdbname)
> print "Done"
> except pywintypes.com_error:
> print "Could not replicate"
>
321c378
< processAndTrain(vtrainspam, vspam, bayes, True, notesindex)
---
> processAndTrain(vtrainspam, vspam, bayes, True, notesindex, log)
323,329c380,381
< processAndTrain(vtrainham, vham, bayes, False, notesindex)
<
< if rdbname:
< print "Replicating..."
< db.Replicate(rdbname)
< print "Done"
<
---
> processAndTrain(vtrainham, vham, bayes, False, notesindex, log)
>
331c383,391
< classifyInbox(vinbox, vtrainspam, bayes, ldbname, notesindex)
---
> classifyInbox(vinbox, vtrainspam, bayes, ldbname, notesindex, log)
>
> if need_replicate: # rdbname:
> try:
> print "Replicating..."
> db.Replicate(rdbname)
> print "Done"
> except pywintypes.com_error:
> print "Could not replicate"
338c398
< fp = open("%s.sbindex" % (ldbname), 'wb')
---
> fp = open(idxname, 'wb')
341c401,403
<
---
>
> if log:
> log.LogAction("Finished running spambayes")
344a407,411
> # try:
> # except error, msg:
> # print >>sys.stderr, str(msg) + '\n\n' + __doc__
> # sys.exit()
>
346c413
< opts, args = getopt.getopt(sys.argv[1:], 'htcpd:D:l:r:f:')
---
> opts, args = getopt.getopt(sys.argv[1:], 'htcpd:D:L:l:i:R:r:f:P:')
354a422,425
> idxname = None # index file name
> logname = None # log database name
> redname = None # redirect name
> pwd = None # password
368a440,441
> elif opt == '-i':
> idxname = arg
370a444,445
> elif opt == '-L':
> logname = arg
372a448,449
> elif opt == '-R':
> redname = arg
380a458,472
> elif opt == '-P':
> pwd = arg
>
> if (not idxname):
> idxname = "%s.sbindex" % (ldbname)
>
> if redname:
> try:
> sys.stdout = open(redname, 'w')
> sys.stderr = sys.stdout
> print "Running bayesian mail filter"
> except getopt.error, msg:
> print >>sys.stderr, str(msg) + '\n\n' + __doc__
> sys.exit()
>
384c476,478
< sbfname, doTrain, doClassify)
---
> sbfname, doTrain, doClassify,pwd, idxname, logname)
>
> print >>sys.stderr, "Done"
392c486
< print >>sys.stderr, __doc__
\ No newline at end of file
---
> print >>sys.stderr, __doc__