python/dist/src/Lib _LWPCookieJar.py, NONE, 1.1 _MozillaCookieJar.py, NONE, 1.1 cookielib.py, NONE, 1.1 urllib2.py, 1.66, 1.67

Update of /cvsroot/python/python/dist/src/Lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27715/Lib Modified Files: urllib2.py Added Files: _LWPCookieJar.py _MozillaCookieJar.py cookielib.py Log Message: Patch #963318: Add support for client-side cookie management. --- NEW FILE: _LWPCookieJar.py --- """Load / save to libwww-perl (LWP) format files. Actually, the format is slightly extended from that used by LWP's (libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information not recorded by LWP. It uses the version string "2.0", though really there isn't an LWP Cookies 2.0 format. This indicates that there is extra information in here (domain_dot and # port_spec) while still being compatible with libwww-perl, I hope. """ import time, re, logging from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \ MISSING_FILENAME_TEXT, join_header_words, split_header_words, \ iso2time, time2isoz def lwp_cookie_str(cookie): """Return string representation of Cookie in an the LWP cookie file format. Actually, the format is extended a bit -- see module docstring. """ h = [(cookie.name, cookie.value), ("path", cookie.path), ("domain", cookie.domain)] if cookie.port is not None: h.append(("port", cookie.port)) if cookie.path_specified: h.append(("path_spec", None)) if cookie.port_specified: h.append(("port_spec", None)) if cookie.domain_initial_dot: h.append(("domain_dot", None)) if cookie.secure: h.append(("secure", None)) if cookie.expires: h.append(("expires", time2isoz(float(cookie.expires)))) if cookie.discard: h.append(("discard", None)) if cookie.comment: h.append(("comment", cookie.comment)) if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) keys = cookie._rest.keys() keys.sort() for k in keys: h.append((k, str(cookie._rest[k]))) h.append(("version", str(cookie.version))) return join_header_words([h]) class LWPCookieJar(FileCookieJar): """ The LWPCookieJar saves a sequence of"Set-Cookie3" lines. "Set-Cookie3" is the format used by the libwww-perl libary, not known to be compatible with any browser, but which is easy to read and doesn't lose information about RFC 2965 cookies. Additional methods as_lwp_str(ignore_discard=True, ignore_expired=True) """ def as_lwp_str(self, ignore_discard=True, ignore_expires=True): """Return cookies as a string of "\n"-separated "Set-Cookie3" headers. ignore_discard and ignore_expires: see docstring for FileCookieJar.save """ now = time.time() r = [] for cookie in self: if not ignore_discard and cookie.discard: continue if not ignore_expires and cookie.is_expired(now): continue r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) return "\n".join(r+[""]) def save(self, filename=None, ignore_discard=False, ignore_expires=False): if filename is None: if self.filename is not None: filename = self.filename else: raise ValueError(MISSING_FILENAME_TEXT) f = open(filename, "w") try: # There really isn't an LWP Cookies 2.0 format, but this indicates # that there is extra information in here (domain_dot and # port_spec) while still being compatible with libwww-perl, I hope. f.write("#LWP-Cookies-2.0\n") f.write(self.as_lwp_str(ignore_discard, ignore_expires)) finally: f.close() def _really_load(self, f, filename, ignore_discard, ignore_expires): magic = f.readline() if not re.search(self.magic_re, magic): msg = "%s does not seem to contain cookies" % filename raise IOError(msg) now = time.time() header = "Set-Cookie3:" boolean_attrs = ("port_spec", "path_spec", "domain_dot", "secure", "discard") value_attrs = ("version", "port", "path", "domain", "expires", "comment", "commenturl") try: while 1: line = f.readline() if line == "": break if not line.startswith(header): continue line = line[len(header):].strip() for data in split_header_words([line]): name, value = data[0] # name and value are an exception here, since a plain "foo" # (with no "=", unlike "bar=foo") means a cookie with no # name and value "foo". With all other cookie-attributes, # the situation is reversed: "foo" means an attribute named # "foo" with no value! if value is None: name, value = value, name standard = {} rest = {} for k in boolean_attrs: standard[k] = False for k, v in data[1:]: if k is not None: lc = k.lower() else: lc = None # don't lose case distinction for unknown fields if (lc in value_attrs) or (lc in boolean_attrs): k = lc if k in boolean_attrs: if v is None: v = True standard[k] = v elif k in value_attrs: standard[k] = v else: rest[k] = v h = standard.get expires = h("expires") discard = h("discard") if expires is not None: expires = iso2time(expires) if expires is None: discard = True domain = h("domain") domain_specified = domain.startswith(".") c = Cookie(h("version"), name, value, h("port"), h("port_spec"), domain, domain_specified, h("domain_dot"), h("path"), h("path_spec"), h("secure"), expires, discard, h("comment"), h("commenturl"), rest) if not ignore_discard and c.discard: continue if not ignore_expires and c.is_expired(now): continue self.set_cookie(c) except: reraise_unmasked_exceptions((IOError,)) raise IOError("invalid Set-Cookie3 format file %s" % filename) --- NEW FILE: _MozillaCookieJar.py --- """Mozilla / Netscape cookie loading / saving.""" import re, time, logging from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \ MISSING_FILENAME_TEXT class MozillaCookieJar(FileCookieJar): """ WARNING: you may want to backup your browser's cookies file if you use this class to save cookies. I *think* it works, but there have been bugs in the past! This class differs from CookieJar only in the format it uses to save and load cookies to and from a file. This class uses the Mozilla/Netscape `cookies.txt' format. lynx uses this file format, too. Don't expect cookies saved while the browser is running to be noticed by the browser (in fact, Mozilla on unix will overwrite your saved cookies if you change them on disk while it's running; on Windows, you probably can't save at all while the browser is running). Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to Netscape cookies on saving. In particular, the cookie version and port number information is lost, together with information about whether or not Path, Port and Discard were specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the domain as set in the HTTP header started with a dot (yes, I'm aware some domains in Netscape files start with a dot and some don't -- trust me, you really don't want to know any more about this). Note that though Mozilla and Netscape use the same format, they use slightly different headers. The class saves cookies using the Netscape header by default (Mozilla can cope with that). """ magic_re = "#( Netscape)? HTTP Cookie File" header = """\ # Netscape HTTP Cookie File # http://www.netscape.com/newsref/std/cookie_spec.html # This is a generated file! Do not edit. """ def _really_load(self, f, filename, ignore_discard, ignore_expires): now = time.time() magic = f.readline() if not re.search(self.magic_re, magic): f.close() raise IOError( "%s does not look like a Netscape format cookies file" % filename) try: while 1: line = f.readline() if line == "": break # last field may be absent, so keep any trailing tab if line.endswith("\n"): line = line[:-1] # skip comments and blank lines XXX what is $ for? if (line.strip().startswith("#") or line.strip().startswith("$") or line.strip() == ""): continue domain, domain_specified, path, secure, expires, name, value = \ line.split("\t") secure = (secure == "TRUE") domain_specified = (domain_specified == "TRUE") if name == "": name = value value = None initial_dot = domain.startswith(".") assert domain_specified == initial_dot discard = False if expires == "": expires = None discard = True # assume path_specified is false c = Cookie(0, name, value, None, False, domain, domain_specified, initial_dot, path, False, secure, expires, discard, None, None, {}) if not ignore_discard and c.discard: continue if not ignore_expires and c.is_expired(now): continue self.set_cookie(c) except: reraise_unmasked_exceptions((IOError,)) raise IOError("invalid Netscape format file %s: %s" % (filename, line)) def save(self, filename=None, ignore_discard=False, ignore_expires=False): if filename is None: if self.filename is not None: filename = self.filename else: raise ValueError(MISSING_FILENAME_TEXT) f = open(filename, "w") try: f.write(self.header) now = time.time() for cookie in self: if not ignore_discard and cookie.discard: continue if not ignore_expires and cookie.is_expired(now): continue if cookie.secure: secure = "TRUE" else: secure = "FALSE" if cookie.domain.startswith("."): initial_dot = "TRUE" else: initial_dot = "FALSE" if cookie.expires is not None: expires = str(cookie.expires) else: expires = "" if cookie.value is None: # cookies.txt regards 'Set-Cookie: foo' as a cookie # with no name, whereas cookielib regards it as a # cookie with no value. name = "" value = cookie.name else: name = cookie.name value = cookie.value f.write( "\t".join([cookie.domain, initial_dot, cookie.path, secure, expires, name, value])+ "\n") finally: f.close() --- NEW FILE: cookielib.py --- """HTTP cookie handling for web clients. This module has (now fairly distant) origins in Gisle Aas' Perl module HTTP::Cookies, from the libwww-perl library. Docstrings, comments and debug strings in this code refer to the attributes of the HTTP cookie system as cookie-attributes, to distinguish them clearly from Python attributes. Class diagram (note that the classes which do not derive from FileCookieJar are not distributed with the Python standard library, but are available from http://wwwsearch.sf.net/): CookieJar____ / \ \ FileCookieJar \ \ / | \ \ \ MozillaCookieJar | LWPCookieJar \ \ | | \ [...1717 lines suppressed...] """ if filename is None: if self.filename is not None: filename = self.filename else: raise ValueError(MISSING_FILENAME_TEXT) self._cookies_lock.acquire() old_state = copy.deepcopy(self._cookies) self._cookies = {} try: self.load(filename, ignore_discard, ignore_expires) except (LoadError, IOError): self._cookies = old_state raise self._cookies_lock.release() from _LWPCookieJar import LWPCookieJar, lwp_cookie_str from _MozillaCookieJar import MozillaCookieJar Index: urllib2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/urllib2.py,v retrieving revision 1.66 retrieving revision 1.67 diff -C2 -d -r1.66 -r1.67 *** urllib2.py 10 May 2004 07:35:33 -0000 1.66 --- urllib2.py 31 May 2004 18:22:40 -0000 1.67 *************** *** 107,110 **** --- 107,111 ---- import urlparse import bisect + import cookielib try: *************** *** 177,181 **** class Request: ! def __init__(self, url, data=None, headers={}): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.__original = unwrap(url) --- 178,183 ---- class Request: ! def __init__(self, url, data=None, headers={}, ! origin_req_host=None, unverifiable=False): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.__original = unwrap(url) *************** *** 189,192 **** --- 191,198 ---- self.add_header(key, value) self.unredirected_hdrs = {} + if origin_req_host is None: + origin_req_host = cookielib.request_host(self) + self.origin_req_host = origin_req_host + self.unverifiable = unverifiable def __getattr__(self, attr): *************** *** 243,246 **** --- 249,258 ---- self.__r_host = self.__original + def get_origin_req_host(self): + return self.origin_req_host + + def is_unverifiable(self): + return self.unverifiable + def add_header(self, key, val): # useful for something like authentication *************** *** 255,258 **** --- 267,279 ---- header_name in self.unredirected_hdrs) + def get_header(self, header_name, default=None): + return self.headers.get( + header_name, + self.unredirected_hdrs.get(header_name, default)) + + def header_items(self): + hdrs = self.unredirected_hdrs.copy() + hdrs.update(self.headers) + return hdrs.items() class OpenerDirector: *************** *** 461,465 **** class HTTPRedirectHandler(BaseHandler): ! # maximum number of redirections before assuming we're in a loop max_redirections = 10 --- 482,490 ---- class HTTPRedirectHandler(BaseHandler): ! # maximum number of redirections to any single URL ! # this is needed because of the state that cookies introduce ! max_repeats = 4 ! # maximum total number of redirections (regardless of URL) before ! # assuming we're in a loop max_redirections = 10 *************** *** 482,486 **** # essentially all clients do redirect in this case, so we # do the same. ! return Request(newurl, headers=req.headers) else: raise HTTPError(req.get_full_url(), code, msg, headers, fp) --- 507,514 ---- # essentially all clients do redirect in this case, so we # do the same. ! return Request(newurl, ! headers=req.headers, ! origin_req_host=req.get_origin_req_host(), ! unverifiable=True) else: raise HTTPError(req.get_full_url(), code, msg, headers, fp) *************** *** 491,498 **** # attribute to the Request object. def http_error_302(self, req, fp, code, msg, headers): if 'location' in headers: ! newurl = headers['location'] elif 'uri' in headers: ! newurl = headers['uri'] else: return --- 519,528 ---- # attribute to the Request object. def http_error_302(self, req, fp, code, msg, headers): + # Some servers (incorrectly) return multiple Location headers + # (so probably same goes for URI). Use first header. if 'location' in headers: ! newurl = headers.getheaders('location')[0] elif 'uri' in headers: ! newurl = headers.getheaders('uri')[0] else: return *************** *** 507,524 **** # loop detection ! # .redirect_dict has a key (url, code) if url was previously ! # visited as a result of a redirection with that code. The ! # code is needed in addition to the URL because visiting a URL ! # twice isn't necessarily a loop: there is more than one way ! # to redirect (301, 302, 303, 307, refresh). ! key = (newurl, code) if hasattr(req, 'redirect_dict'): visited = new.redirect_dict = req.redirect_dict ! if key in visited or len(visited) >= self.max_redirections: raise HTTPError(req.get_full_url(), code, self.inf_msg + msg, headers, fp) else: visited = new.redirect_dict = req.redirect_dict = {} ! visited[key] = None # Don't close the fp until we are sure that we won't use it --- 537,550 ---- # loop detection ! # .redirect_dict has a key url if url was previously visited. if hasattr(req, 'redirect_dict'): visited = new.redirect_dict = req.redirect_dict ! if (visited.get(newurl, 0) >= self.max_repeats or ! len(visited) >= self.max_redirections): raise HTTPError(req.get_full_url(), code, self.inf_msg + msg, headers, fp) else: visited = new.redirect_dict = req.redirect_dict = {} ! visited[newurl] = visited.get(newurl, 0) + 1 # Don't close the fp until we are sure that we won't use it *************** *** 913,917 **** self._debuglevel = level ! def do_request(self, request): host = request.get_host() if not host: --- 939,943 ---- self._debuglevel = level ! def do_request_(self, request): host = request.get_host() if not host: *************** *** 988,992 **** return self.do_open(httplib.HTTPConnection, req) ! http_request = AbstractHTTPHandler.do_request if hasattr(httplib, 'HTTPS'): --- 1014,1018 ---- return self.do_open(httplib.HTTPConnection, req) ! http_request = AbstractHTTPHandler.do_request_ if hasattr(httplib, 'HTTPS'): *************** *** 996,1000 **** return self.do_open(httplib.HTTPSConnection, req) ! https_request = AbstractHTTPHandler.do_request class UnknownHandler(BaseHandler): --- 1022,1043 ---- return self.do_open(httplib.HTTPSConnection, req) ! https_request = AbstractHTTPHandler.do_request_ ! ! class HTTPCookieProcessor(BaseHandler): ! def __init__(self, cookiejar=None): ! if cookiejar is None: ! cookiejar = CookieJar() ! self.cookiejar = cookiejar ! ! def http_request(self, request): ! self.cookiejar.add_cookie_header(request) ! return request ! ! def http_response(self, request, response): ! self.cookiejar.extract_cookies(response, request) ! return response ! ! https_request = http_request ! https_response = http_response class UnknownHandler(BaseHandler):
participants (1)
-
loewis@users.sourceforge.net