Toss your cookies (Mozilla cookie file filter)

Mon Jun 2 23:19:53 EDT 2003

I threw together a simple Python script to remove unwanted cookies
from my Mozilla cookie file.  Mozilla has some built-in cookie
filtering preferences but they're not that useful.  Some better
preferences will probably be in a later version (there's a very active
CR in Bugzilla on the subject) so this script will become obsolete
when that happens, but for now I find it handy enough that I thought
I'd post it here.  

================================================================

#!/usr/bin/python

# clean mozilla cookie file by removing unwanted cookies
#
# Copyright 2003 by Paul Rubin, written May 2003
# Copying permissions: GNU General Public License version 2, www.gnu.org
#
# Uses two files, "whitelist" and "blacklist"
# file format:
#  blank lines and lines starting with '#' are ignored
#  hostname.domain.tld
#       matches all cookies from that domain
#  .domain.tld
#        matches all cookies from that domain or any subdomain
#  hostname.domain.tld:cookie1,cookie2,...
#        matches only cookies whose names appear in the list
#
# The script reads the whitelist and discards any cookies that don't
# match some line in it.  Then it reads the blacklist and discards
# any cookies that match it, that have fallen through the whitelist.

class ruledict:
    def __init__(self):
        self.rules = {}
    def add(self, rule):
        rule = rule.split(':')
        domain = rule[0]
        if self.rules.get(domain,0) == 1:
            # already accepting all cookies for this domain
            return
        if len(rule) == 1:
            # accept all cookies for this domain
            self.rules[domain] = 1
            return
        cookie_names = [a.strip() for a in rule[1].split(',')]
        if domain not in self.rules:
            self.rules[domain] = {}
        self.rules[domain].update(dict([(a,1) for a in cookie_names]))

    def match(self, cookie):
        import re
        domain = cookie[0]
        cookie_name = cookie[5]
        found = 1
        while found:
            if domain in self.rules:
                d = self.rules[domain]
                if d == 1 or cookie_name in d:
                    return 1
            if not domain.startswith('.'):
                domain, found = '.' + domain, 1
            else:
                domain, found = re.subn('^(.[^.]*)\.', '.', domain)
        return 0

def read_ruledict(filename):
    val = ruledict()
    for line in open(filename):
        if line[0] == '#' or line == '\n':
            continue
        val.add(line.strip())
    return val

vv = read_ruledict('whitelist')

import re,tempfile,os

cookie_filename = "cookies.txt"
global whitelist,blacklist

def backupfile(filename):
    # generate backup filename from filename and current time
    from time import time
    # return filename + '.' + hex(int(time()))[2:] + "~"
    # don't make so many backups
    return filename + "~"

def good_cookie(cookie):
    return whitelist.match(cookie) and not blacklist.match(cookie)

def main():
    global whitelist, blacklist
    whitelist = read_ruledict("whitelist")
    blacklist = read_ruledict("blacklist")
    f = open(cookie_filename)
    tempname = tempfile.mktemp()
    out = open(tempname, "w")
    killed = {}

    for line in f:
        cookie = line.split('\t')
        if len(cookie) == 1 or line[0] == '#' or good_cookie(cookie):
            out.write(line)
        else:
            origin = cookie[0]
            killed[origin] = 1 + killed.get(origin, 0)

    if killed:
        print 'Killed:'
        ko = killed.keys()
        ko.sort()
        for k in ko:
            print '%s (%d)'% (k, killed[k])

    os.rename(cookie_filename, backupfile(cookie_filename))
    os.rename(tempname, cookie_filename)

main()