POP3 and selective download/deletion

David Mertz mertz at gnosis.cx
Sat Apr 5 20:30:14 CEST 2003


"Steve Holden" <sholden at holdenweb.com> wrote previously:
|Unfortunately the POP3 protocol doesn't allow you download the headers of a
|message without downloading the whole thing.

Nonsense.  This is part of the script I use to filter SPAM based only
on headers (so I don't download the large virus/spam message bodies).  There is
a bit of cruft in that I display some progress information and log some
actions, but it shows a realistic program.

    def check_pop_box(host, user, passwd, log, blist, wlist, screen_setup=""):
        freerows = ROWS-6   # 6 rows used by decorations
        try:
            mbox = poplib.POP3(host)
            say(screen_setup)
        except socket.gaierror:
            return None
        mbox.user(user)
        mbox.pass_(passwd)

        spams = {}
        for i in range(1, mbox.stat()[0]+1):
            # messages use one-based indexing
            headerlines = mbox.top(i, 0)[1]    # No body lines
            subject, from_ = '<NONE>','<NONE>'
            whitelisted = 0
            header = []
            for ln in headerlines:
                header.append(ln)
                if ln.upper().startswith('SUBJECT: '): subject = ln[9:]
                elif ln.upper().startswith('FROM: '):  from_ = ln[6:]
                for fld, pat in blist:  # blacklisted
                    if match_header(ln, fld, pat):
                        spams[i] = spams.get(i,'')+ln.strip()+'\n'
                for fld, pat in wlist:  # whitelisted
                    if match_header(ln, fld, pat):
                        whitelisted = 1
            # check the header as a whole for spamminess (& log headers)
            header = "\n".join(header)
            spam_prob = is_spam(header)
            summary = summary_line(from_, subject, spam_prob, whitelisted)
            if not whitelisted and spam_prob > 0.98:
                spams[i] = spams.get(i,'')+'+'+summary[:73].strip()+'\n'
            if spams.has_key(i):
                open('new-spam.headers','a').write("%s\n" % header)
                spams[i] += summary
            display(summary)
            freerows -= 1

        say(e('30;1m'),' '*80,'\n',e('36;1m'))  # gray line, then cyan
        for i,fld in spams.items():
            lines = fld.split('\n')
            summary, criteria = lines[-1], lines[:-1]
            freerows -= len(criteria)
            try:
                print "SPAM:", '\n'.join(criteria)
                print >> log, time.strftime("[%y-%m-%d]"), summary
                mbox.dele(i)
            except poplib.error_proto:
                print "Bad response on: 'mbox.del(%d)'" % i
        mbox.quit()
        log.flush()

        say(e('30;1m'),' '*80,'\n')  # gray line, then gray
        for ln in open('spam.log').readlines()[-freerows:]:
            print ln[:80].strip()


--
    _/_/_/ THIS MESSAGE WAS BROUGHT TO YOU BY: Postmodern Enterprises _/_/_/
   _/_/    ~~~~~~~~~~~~~~~~~~~~[mertz at gnosis.cx]~~~~~~~~~~~~~~~~~~~~~  _/_/
  _/_/  The opinions expressed here must be those of my employer...   _/_/
 _/_/_/_/_/_/_/_/_/_/ Surely you don't think that *I* believe them!  _/_/






More information about the Python-list mailing list