[Spambayes-checkins]
spambayes/spambayes Options.py,1.7,1.8 hammie.py,1.3,1.4
Neale Pickett
npickett at users.sourceforge.net
Tue Jan 21 21:23:20 EST 2003
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv13378/spambayes
Modified Files:
Options.py hammie.py
Log Message:
* Fix function name in hammie.py
* Expound upon docstrings in hammie.py
* Options.py will now look for bayescustomize.ini and ~/.spambayesrc.
Hopefully some non-Unix folks will update this with sensible defaults
for their platforms.
* hammiefilter has a ton of new options -- check the docstring
Index: Options.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** Options.py 21 Jan 2003 14:50:26 -0000 1.7
--- Options.py 22 Jan 2003 05:23:17 -0000 1.8
***************
*** 557,559 ****
options.mergefiles(alternate.split())
else:
! options.mergefiles(['bayescustomize.ini'])
--- 557,565 ----
options.mergefiles(alternate.split())
else:
! alts = []
! for path in ['bayescustomize.ini', '~/.spambayesrc']:
! epath = os.path.expanduser(path)
! if os.path.exists(epath):
! alts.append(epath)
! if alts:
! options.mergefiles(alts)
Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/hammie.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** hammie.py 21 Jan 2003 14:50:27 -0000 1.3
--- hammie.py 22 Jan 2003 05:23:17 -0000 1.4
***************
*** 76,81 ****
If 'train' is True, also train on the result of scoring the
message (ie. train as ham if it's ham, train as spam if it's
! spam). You'll want to be very dilligent about retraining
! mistakes if you use this.
All defaults for optional parameters come from the Options file.
--- 76,82 ----
If 'train' is True, also train on the result of scoring the
message (ie. train as ham if it's ham, train as spam if it's
! spam). If the message already has a trained header, it will be
! untrained first. You'll want to be very dilligent about
! retraining mistakes if you use this option.
All defaults for optional parameters come from the Options file.
***************
*** 103,122 ****
except KeyError:
pass
prob, clues = self._scoremsg(msg, True)
if prob < ham_cutoff:
is_spam = False
! trained = options.header_ham_string
! disp = trained
elif prob > spam_cutoff:
is_spam = True
! trained = options.header_spam_string
! disp = trained
else:
is_spam = False
- trained = options.header_ham_string
disp = options.header_unsure_string
if train:
! self.train(msg, is_spam)
! msg.add_header(options.hammie_trained_header, trained)
disp += ("; %."+str(options.header_score_digits)+"f") % prob
if options.header_score_logarithm:
--- 104,121 ----
except KeyError:
pass
+ if train:
+ self.untrain_from_header(msg)
prob, clues = self._scoremsg(msg, True)
if prob < ham_cutoff:
is_spam = False
! disp = options.header_ham_string
elif prob > spam_cutoff:
is_spam = True
! disp = options.header_spam_string
else:
is_spam = False
disp = options.header_unsure_string
if train:
! self.train(msg, is_spam, True)
disp += ("; %."+str(options.header_score_digits)+"f") % prob
if options.header_score_logarithm:
***************
*** 129,139 ****
x=-math.log10(1.0-prob)
disp += " (%d)"%x
msg.add_header(header, disp)
if debug:
disp = self.formatclues(clues)
msg.add_header(debugheader, disp)
return msg.as_string(unixfrom=(msg.get_unixfrom() is not None))
! def train(self, msg, is_spam):
"""Train bayes with a message.
--- 128,140 ----
x=-math.log10(1.0-prob)
disp += " (%d)"%x
+ del msg[header]
msg.add_header(header, disp)
if debug:
disp = self.formatclues(clues)
+ del msg[debugheader]
msg.add_header(debugheader, disp)
return msg.as_string(unixfrom=(msg.get_unixfrom() is not None))
! def train(self, msg, is_spam, add_header=False):
"""Train bayes with a message.
***************
*** 142,148 ****
--- 143,159 ----
is_spam should be 1 if the message is spam, 0 if not.
+ If add_header is True, add a header with how it was trained (in
+ case we need to untrain later)
+
"""
self.bayes.learn(tokenize(msg), is_spam)
+ if add_header:
+ if is_spam:
+ trained = options.header_spam_string
+ else:
+ trained = options.header_ham_string
+ del msg[options.hammie_trained_header]
+ msg.add_header(options.hammie_trained_header, trained)
def untrain(self, msg, is_spam):
***************
*** 151,155 ****
msg can be a string, a file object, or a Message object.
! is_spam should be 1 if the message is spam, 0 if not.
"""
--- 162,166 ----
msg can be a string, a file object, or a Message object.
! is_spam should be True if the message is spam, False if not.
"""
***************
*** 157,180 ****
self.bayes.unlearn(tokenize(msg), is_spam)
! def train_ham(self, msg):
"""Train bayes with ham.
msg can be a string, a file object, or a Message object.
"""
! self.train(msg, False)
! def train_spam(self, msg):
"""Train bayes with spam.
msg can be a string, a file object, or a Message object.
"""
! self.train(msg, True)
def untrain_ham(self, msg):
! """Untrain bayes with ham.
msg can be a string, a file object, or a Message object.
--- 168,222 ----
self.bayes.unlearn(tokenize(msg), is_spam)
! def untrain_from_header(self, msg):
! """Untrain bayes based on X-Spambayes-Trained header.
!
! msg can be a string, a file object, or a Message object.
!
! If no such header is present, nothing happens.
!
! If add_header is True, add a header with how it was trained (in
! case we need to untrain later)
!
! """
!
! msg = mboxutils.get_message(msg)
! trained = msg.get(options.hammie_trained_header)
! if not trained:
! return
! del msg[options.hammie_trained_header]
! if trained == options.header_ham_string:
! self.untrain_ham(msg)
! elif trained == options.header_spam_string:
! self.untrain_spam(msg)
! else:
! raise ValueError('%s header value unrecognized'
! % options.hammie_trained_header)
!
! def train_ham(self, msg, add_header=False):
"""Train bayes with ham.
msg can be a string, a file object, or a Message object.
+ If add_header is True, add a header with how it was trained (in
+ case we need to untrain later)
+
"""
! self.train(msg, False, add_header)
! def train_spam(self, msg, add_header=False):
"""Train bayes with spam.
msg can be a string, a file object, or a Message object.
+ If add_header is True, add a header with how it was trained (in
+ case we need to untrain later)
+
"""
! self.train(msg, True, add_header)
def untrain_ham(self, msg):
! """Untrain bayes with a message previously trained as ham.
msg can be a string, a file object, or a Message object.
***************
*** 184,189 ****
self.untrain(msg, False)
! def train_spam(self, msg):
! """Untrain bayes with spam.
msg can be a string, a file object, or a Message object.
--- 226,231 ----
self.untrain(msg, False)
! def untrain_spam(self, msg):
! """Untrain bayes with a message previously traned as spam.
msg can be a string, a file object, or a Message object.
More information about the Spambayes-checkins
mailing list