[Spambayes-checkins] spambayes imapfilter.py,1.36,1.37
Tony Meyer
anadelonbrin at users.sourceforge.net
Mon Apr 28 01:01:29 EDT 2003
Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv11246
Modified Files:
imapfilter.py
Log Message:
Switch from imapfilter using IMAP uids as persistent ids for
messages to our own generated ids. This code runs on one
server I have, but I don't have time for more testing at the moment.
I'm checking it in since the old code is Bad and this should be
ok....
WARNING: You will need to disguard any hammie.db that you
have trained with imapfilter.py, and also any spambayes.messageinfo.db
files as well.
Index: imapfilter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/imapfilter.py,v
retrieving revision 1.36
retrieving revision 1.37
diff -C2 -d -r1.36 -r1.37
*** imapfilter.py 28 Apr 2003 05:20:46 -0000 1.36
--- imapfilter.py 28 Apr 2003 07:01:20 -0000 1.37
***************
*** 93,96 ****
--- 93,101 ----
This might help if the username/password has special characters like
accented characters.
+ o The code currently FETCHes the whole RFC822 message in training, even
+ if it will subsequently decide that the message has already been
+ trained (it won't then train on it, it just throws the message away).
+ This is really inefficient, and a better solution needs to be
+ developed.
o Suggestions?
"""
***************
*** 191,196 ****
def __init__(self, server, port, debug=0, do_expunge=False):
imaplib.Debug = debug # this is a global in the imaplib module
! imaplib.IMAP4.__init__(self, server, port)
! # XXX We should check here to see if the server/port were valid
# For efficiency, we remember which folder we are currently
# in, and only send a select command to the IMAP server if
--- 196,208 ----
def __init__(self, server, port, debug=0, do_expunge=False):
imaplib.Debug = debug # this is a global in the imaplib module
! try:
! imaplib.IMAP4.__init__(self, server, port)
! except:
! # A more specific except would be good here, but I get
! # (in Python 2.2) a generic 'error' and a 'gaierror'
! # if I pass a valid domain that isn't an IMAP server
! # or invalid domain (respectively)
! print "Invalid server or port, please check these settings."
! sys.exit(-1)
# For efficiency, we remember which folder we are currently
# in, and only send a select command to the IMAP server if
***************
*** 200,206 ****
self.do_expunge = do_expunge
! def login(self, uid, pw):
try:
! imaplib.IMAP4.login(self, uid, pw) # superclass login
except imaplib.IMAP4.error, e:
if str(e) == "permission denied":
--- 212,218 ----
self.do_expunge = do_expunge
! def login(self, username, pwd):
try:
! imaplib.IMAP4.login(self, username, pwd) # superclass login
except imaplib.IMAP4.error, e:
if str(e) == "permission denied":
***************
*** 219,224 ****
def SelectFolder(self, folder):
'''A method to point ensuing imap operations at a target folder'''
! if self.current_folder != None:
! if self.current_folder != folder:
if self.do_expunge:
# It is faster to do close() than a single
--- 231,236 ----
def SelectFolder(self, folder):
'''A method to point ensuing imap operations at a target folder'''
! if self.current_folder != folder:
! if self.current_folder != None:
if self.do_expunge:
# It is faster to do close() than a single
***************
*** 252,257 ****
def _force_CRLF(self, data):
! """Make sure data uses CRLF for line termination.
! """
return CRLF_RE.sub('\r\n', data)
--- 264,268 ----
def _force_CRLF(self, data):
! """Make sure data uses CRLF for line termination."""
return CRLF_RE.sub('\r\n', data)
***************
*** 266,271 ****
def extractTime(self):
# When we create a new copy of a message, we need to specify
! # a timestamp for the message. If the message has a date header
! # we use that. Otherwise, we use the current time.
message_date = self["Date"]
if message_date is not None:
--- 277,282 ----
def extractTime(self):
# When we create a new copy of a message, we need to specify
! # a timestamp for the message. If the message has a valid date
! # header we use that. Otherwise, we use the current time.
message_date = self["Date"]
if message_date is not None:
***************
*** 278,288 ****
def MoveTo(self, dest):
'''Note that message should move to another folder. No move is
! carried out until Save() is called.'''
! # This move operation just changes where we think we are,
! # and we do an actual move on save (to avoid doing
! # this more than once)
if self.previous_folder is None:
self.previous_folder = self.folder
! self.folder = dest
def Save(self):
--- 289,296 ----
def MoveTo(self, dest):
'''Note that message should move to another folder. No move is
! carried out until Save() is called, for efficiency.'''
if self.previous_folder is None:
self.previous_folder = self.folder
! self.folder = dest
def Save(self):
***************
*** 290,294 ****
# we can't actually update the message with IMAP
# so what we do is create a new message and delete the old one
- # we need to copy the flags as well
if self.folder is None:
raise RuntimeError, """Can't save a message that doesn't
--- 298,301 ----
***************
*** 297,301 ****
raise RuntimeError, """Can't save a message that doesn't have
an id."""
! response = imap.uid("FETCH", self.id, "(FLAGS INTERNALDATE)")
self._check(response, 'fetch (flags internaldate)')
data = _extract_fetch_data(response[1][0])
--- 304,308 ----
raise RuntimeError, """Can't save a message that doesn't have
an id."""
! response = imap.uid("FETCH", self.uid, "(FLAGS INTERNALDATE)")
self._check(response, 'fetch (flags internaldate)')
data = _extract_fetch_data(response[1][0])
***************
*** 312,318 ****
flags = None
! # See searching for new uid comments below
! old_id = self.id
! self["X-Spambayes-IMAP-OldID"] = old_id
response = imap.append(self.folder.name, flags,
--- 319,328 ----
flags = None
! # Once, we used the IMAP uid to keep track of messages.
! # This fails miserably because it's only guarenteed to be unique
! # within a particular folder. Folders have a UID validity value,
! # but this can change from session to session. So we forget this
! # imap rubbish and use our own id.
! self[options["pop3proxy", "mailid_header_name"]] = self.id
response = imap.append(self.folder.name, flags,
***************
*** 325,344 ****
imap.SelectFolder(self.previous_folder.name)
self.previous_folder = None
! response = imap.uid("STORE", old_id, "+FLAGS.SILENT", "(\\Deleted)")
self._check(response, 'store')
! # We need to update the uid, as it will have changed
! # Searching for the new message is full of problems. Searching for
! # the text sends far too much data through the connection, and
! # doesn't work reliably anyway. We instead search for a special
! # header that we add for this explicit purpose.
imap.SelectFolder(self.folder.name)
! response = imap.uid("SEARCH", "HEADER", "X-Spambayes-IMAP-OldID",
! old_id)
self._check(response, 'search')
new_id = response[1][0]
! # This works with NetMail, but not with Courier. Very strange,
! # and needs more examination. For the moment, if the search
! # turns up empty, we make the very big assumption that the new
# message is the last one with a recent flag
if new_id == "":
--- 335,352 ----
imap.SelectFolder(self.previous_folder.name)
self.previous_folder = None
! response = imap.uid("STORE", self.uid, "+FLAGS.SILENT", "(\\Deleted)")
self._check(response, 'store')
! # We need to update the uid, as it will have changed.
! # Although we don't use the UID to keep track of messages, we do
! # have to use it for IMAP operations.
imap.SelectFolder(self.folder.name)
! response = imap.uid("SEARCH", "HEADER",
! options["pop3proxy", "mailid_header_name"],
! self.id)
self._check(response, 'search')
new_id = response[1][0]
! # Let's hope it doesn't, but, just in case, if the search
! # turns up empty, we make the assumption that the new
# message is the last one with a recent flag
if new_id == "":
***************
*** 348,358 ****
ids = new_id.split(' ')
new_id = ids[-1]
!
! #XXX This code to delete the old message id from the message
! #XXX info db and manipulate the message id, is a *serious* hack.
! #XXX There's gotta be a better way to do this.
! message.msginfoDB._delState(self)
! self.id = new_id
! self.modified()
# This performs a similar function to email.message_from_string()
--- 356,360 ----
ids = new_id.split(' ')
new_id = ids[-1]
! self.uid = new_id
# This performs a similar function to email.message_from_string()
***************
*** 362,368 ****
class IMAPFolder(object):
! def __init__(self, folder_name, readOnly=True):
self.name = folder_name
self.rfc822_command = "RFC822.PEEK"
def __cmp__(self, obj):
--- 364,373 ----
class IMAPFolder(object):
! def __init__(self, folder_name):
self.name = folder_name
self.rfc822_command = "RFC822.PEEK"
+ # Unique names for cached messages - see _generate_id below.
+ self.lastBaseMessageName = ''
+ self.uniquifier = 2
def __cmp__(self, obj):
***************
*** 385,389 ****
pass
! def recent_keys(self):
'''Returns uids for all the messages in the folder that
are flagged as recent, but not flagged as deleted.'''
--- 390,394 ----
pass
! def recent_uids(self):
'''Returns uids for all the messages in the folder that
are flagged as recent, but not flagged as deleted.'''
***************
*** 394,400 ****
def keys(self):
! '''Returns uids for all the messages in the folder not
marked as deleted.'''
- # request message range
imap.SelectFolder(self.name)
response = imap.uid("SEARCH", "UNDELETED")
--- 399,404 ----
def keys(self):
! '''Returns *uids* for all the messages in the folder not
marked as deleted.'''
imap.SelectFolder(self.name)
response = imap.uid("SEARCH", "UNDELETED")
***************
*** 421,429 ****
# raw rfc822 message
msg = imapmessage_from_string(messageText)
- msg.setId(key)
msg.setFolder(self)
return msg
!
def Train(self, classifier, isSpam):
'''Train folder as spam/ham'''
--- 425,453 ----
# raw rfc822 message
msg = imapmessage_from_string(messageText)
msg.setFolder(self)
+ msg.uid = data["UID"]
+ if msg.setIdFromPayload() is None:
+ msg.setId(self._generate_id())
+ # Unfortunately, we now have to re-save this message, so that
+ # our id is stored on the IMAP server. Before anyone suggests
+ # it, we can't store it as a flag, because user-defined flags
+ # aren't supported by all IMAP servers.
+ msg.Save()
return msg
!
! # Lifted straight from pop3proxy.py (under the name getNewMessageName)
! def _generate_id(self):
! # The message id is the time it arrived, with a uniquifier
! # appended if two arrive within one clock tick of each other.
! messageName = "%10.10d" % long(time.time())
! if messageName == self.lastBaseMessageName:
! messageName = "%s-%d" % (messageName, self.uniquifier)
! self.uniquifier += 1
! else:
! self.lastBaseMessageName = messageName
! self.uniquifier = 2
! return messageName
!
def Train(self, classifier, isSpam):
'''Train folder as spam/ham'''
***************
*** 522,526 ****
# Select the folder to make sure it exists
imap.SelectFolder(filter_folder)
! folder = IMAPFolder(filter_folder, False)
folder.Filter(self.classifier, self.spam_folder,
self.unsure_folder)
--- 546,550 ----
# Select the folder to make sure it exists
imap.SelectFolder(filter_folder)
! folder = IMAPFolder(filter_folder)
folder.Filter(self.classifier, self.spam_folder,
self.unsure_folder)
More information about the Spambayes-checkins
mailing list