Toss your cookies (Mozilla cookie file filter)
Paul Rubin
http
Mon Jun 2 23:19:53 EDT 2003
I threw together a simple Python script to remove unwanted cookies
from my Mozilla cookie file. Mozilla has some built-in cookie
filtering preferences but they're not that useful. Some better
preferences will probably be in a later version (there's a very active
CR in Bugzilla on the subject) so this script will become obsolete
when that happens, but for now I find it handy enough that I thought
I'd post it here.
================================================================
#!/usr/bin/python
# clean mozilla cookie file by removing unwanted cookies
#
# Copyright 2003 by Paul Rubin, written May 2003
# Copying permissions: GNU General Public License version 2, www.gnu.org
#
# Uses two files, "whitelist" and "blacklist"
# file format:
# blank lines and lines starting with '#' are ignored
# hostname.domain.tld
# matches all cookies from that domain
# .domain.tld
# matches all cookies from that domain or any subdomain
# hostname.domain.tld:cookie1,cookie2,...
# matches only cookies whose names appear in the list
#
# The script reads the whitelist and discards any cookies that don't
# match some line in it. Then it reads the blacklist and discards
# any cookies that match it, that have fallen through the whitelist.
class ruledict:
def __init__(self):
self.rules = {}
def add(self, rule):
rule = rule.split(':')
domain = rule[0]
if self.rules.get(domain,0) == 1:
# already accepting all cookies for this domain
return
if len(rule) == 1:
# accept all cookies for this domain
self.rules[domain] = 1
return
cookie_names = [a.strip() for a in rule[1].split(',')]
if domain not in self.rules:
self.rules[domain] = {}
self.rules[domain].update(dict([(a,1) for a in cookie_names]))
def match(self, cookie):
import re
domain = cookie[0]
cookie_name = cookie[5]
found = 1
while found:
if domain in self.rules:
d = self.rules[domain]
if d == 1 or cookie_name in d:
return 1
if not domain.startswith('.'):
domain, found = '.' + domain, 1
else:
domain, found = re.subn('^(.[^.]*)\.', '.', domain)
return 0
def read_ruledict(filename):
val = ruledict()
for line in open(filename):
if line[0] == '#' or line == '\n':
continue
val.add(line.strip())
return val
vv = read_ruledict('whitelist')
import re,tempfile,os
cookie_filename = "cookies.txt"
global whitelist,blacklist
def backupfile(filename):
# generate backup filename from filename and current time
from time import time
# return filename + '.' + hex(int(time()))[2:] + "~"
# don't make so many backups
return filename + "~"
def good_cookie(cookie):
return whitelist.match(cookie) and not blacklist.match(cookie)
def main():
global whitelist, blacklist
whitelist = read_ruledict("whitelist")
blacklist = read_ruledict("blacklist")
f = open(cookie_filename)
tempname = tempfile.mktemp()
out = open(tempname, "w")
killed = {}
for line in f:
cookie = line.split('\t')
if len(cookie) == 1 or line[0] == '#' or good_cookie(cookie):
out.write(line)
else:
origin = cookie[0]
killed[origin] = 1 + killed.get(origin, 0)
if killed:
print 'Killed:'
ko = killed.keys()
ko.sort()
for k in ko:
print '%s (%d)'% (k, killed[k])
os.rename(cookie_filename, backupfile(cookie_filename))
os.rename(tempname, cookie_filename)
main()
More information about the Python-list
mailing list