Mailman 3 python/dist/src/Lib _LWPCookieJar.py, NONE, 1.1 _MozillaCookieJar.py, NONE, 1.1 cookielib.py, NONE, 1.1 urllib2.py, 1.66, 1.67 - Python-checkins

May 31, 2004

      Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27715/Lib

Modified Files:
	urllib2.py 
Added Files:
	_LWPCookieJar.py _MozillaCookieJar.py cookielib.py 
Log Message:
Patch #963318: Add support for client-side cookie management.

--- NEW FILE: _LWPCookieJar.py ---
"""Load / save to libwww-perl (LWP) format files.

Actually, the format is slightly extended from that used by LWP's
(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
not recorded by LWP.

It uses the version string "2.0", though really there isn't an LWP Cookies
2.0 format.  This indicates that there is extra information in here
(domain_dot and # port_spec) while still being compatible with
libwww-perl, I hope.

"""

import time, re, logging
from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
     MISSING_FILENAME_TEXT, join_header_words, split_header_words, \
     iso2time, time2isoz

def lwp_cookie_str(cookie):
    """Return string representation of Cookie in an the LWP cookie file format.

    Actually, the format is extended a bit -- see module docstring.

    """
    h = [(cookie.name, cookie.value),
         ("path", cookie.path),
         ("domain", cookie.domain)]
    if cookie.port is not None: h.append(("port", cookie.port))
    if cookie.path_specified: h.append(("path_spec", None))
    if cookie.port_specified: h.append(("port_spec", None))
    if cookie.domain_initial_dot: h.append(("domain_dot", None))
    if cookie.secure: h.append(("secure", None))
    if cookie.expires: h.append(("expires",
                               time2isoz(float(cookie.expires))))
    if cookie.discard: h.append(("discard", None))
    if cookie.comment: h.append(("comment", cookie.comment))
    if cookie.comment_url: h.append(("commenturl", cookie.comment_url))

    keys = cookie._rest.keys()
    keys.sort()
    for k in keys:
        h.append((k, str(cookie._rest[k])))

    h.append(("version", str(cookie.version)))

    return join_header_words([h])

class LWPCookieJar(FileCookieJar):
    """
    The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
    "Set-Cookie3" is the format used by the libwww-perl libary, not known
    to be compatible with any browser, but which is easy to read and
    doesn't lose information about RFC 2965 cookies.

    Additional methods

    as_lwp_str(ignore_discard=True, ignore_expired=True)

    """

    def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
        """Return cookies as a string of "\n"-separated "Set-Cookie3" headers.

        ignore_discard and ignore_expires: see docstring for FileCookieJar.save

        """
        now = time.time()
        r = []
        for cookie in self:
            if not ignore_discard and cookie.discard:
                continue
            if not ignore_expires and cookie.is_expired(now):
                continue
            r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
        return "\n".join(r+[""])

    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
        if filename is None:
            if self.filename is not None: filename = self.filename
            else: raise ValueError(MISSING_FILENAME_TEXT)

        f = open(filename, "w")
        try:
            # There really isn't an LWP Cookies 2.0 format, but this indicates
            # that there is extra information in here (domain_dot and
            # port_spec) while still being compatible with libwww-perl, I hope.
            f.write("#LWP-Cookies-2.0\n")
            f.write(self.as_lwp_str(ignore_discard, ignore_expires))
        finally:
            f.close()

    def _really_load(self, f, filename, ignore_discard, ignore_expires):
        magic = f.readline()
        if not re.search(self.magic_re, magic):
            msg = "%s does not seem to contain cookies" % filename
            raise IOError(msg)

        now = time.time()

        header = "Set-Cookie3:"
        boolean_attrs = ("port_spec", "path_spec", "domain_dot",
                         "secure", "discard")
        value_attrs = ("version",
                       "port", "path", "domain",
                       "expires",
                       "comment", "commenturl")

        try:
            while 1:
                line = f.readline()
                if line == "": break
                if not line.startswith(header):
                    continue
                line = line[len(header):].strip()

                for data in split_header_words([line]):
                    name, value = data[0]
                    # name and value are an exception here, since a plain "foo"
                    # (with no "=", unlike "bar=foo") means a cookie with no
                    # name and value "foo".  With all other cookie-attributes,
                    # the situation is reversed: "foo" means an attribute named
                    # "foo" with no value!
                    if value is None:
                        name, value = value, name
                    standard = {}
                    rest = {}
                    for k in boolean_attrs:
                        standard[k] = False
                    for k, v in data[1:]:
                        if k is not None:
                            lc = k.lower()
                        else:
                            lc = None
                        # don't lose case distinction for unknown fields
                        if (lc in value_attrs) or (lc in boolean_attrs):
                            k = lc
                        if k in boolean_attrs:
                            if v is None: v = True
                            standard[k] = v
                        elif k in value_attrs:
                            standard[k] = v
                        else:
                            rest[k] = v

                    h = standard.get
                    expires = h("expires")
                    discard = h("discard")
                    if expires is not None:
                        expires = iso2time(expires)
                    if expires is None:
                        discard = True
                    domain = h("domain")
                    domain_specified = domain.startswith(".")
                    c = Cookie(h("version"), name, value,
                               h("port"), h("port_spec"),
                               domain, domain_specified, h("domain_dot"),
                               h("path"), h("path_spec"),
                               h("secure"),
                               expires,
                               discard,
                               h("comment"),
                               h("commenturl"),
                               rest)
                    if not ignore_discard and c.discard:
                        continue
                    if not ignore_expires and c.is_expired(now):
                        continue
                    self.set_cookie(c)
        except:
            reraise_unmasked_exceptions((IOError,))
            raise IOError("invalid Set-Cookie3 format file %s" % filename)

--- NEW FILE: _MozillaCookieJar.py ---
"""Mozilla / Netscape cookie loading / saving."""

import re, time, logging

from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
     MISSING_FILENAME_TEXT

class MozillaCookieJar(FileCookieJar):
    """

    WARNING: you may want to backup your browser's cookies file if you use
    this class to save cookies.  I *think* it works, but there have been
    bugs in the past!

    This class differs from CookieJar only in the format it uses to save and
    load cookies to and from a file.  This class uses the Mozilla/Netscape
    `cookies.txt' format.  lynx uses this file format, too.

    Don't expect cookies saved while the browser is running to be noticed by
    the browser (in fact, Mozilla on unix will overwrite your saved cookies if
    you change them on disk while it's running; on Windows, you probably can't
    save at all while the browser is running).

    Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
    Netscape cookies on saving.

    In particular, the cookie version and port number information is lost,
    together with information about whether or not Path, Port and Discard were
    specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
    domain as set in the HTTP header started with a dot (yes, I'm aware some
    domains in Netscape files start with a dot and some don't -- trust me, you
    really don't want to know any more about this).

    Note that though Mozilla and Netscape use the same format, they use
    slightly different headers.  The class saves cookies using the Netscape
    header by default (Mozilla can cope with that).

    """
    magic_re = "#( Netscape)? HTTP Cookie File"
    header = """\
    # Netscape HTTP Cookie File
    # http://www.netscape.com/newsref/std/cookie_spec.html
    # This is a generated file!  Do not edit.

"""

    def _really_load(self, f, filename, ignore_discard, ignore_expires):
        now = time.time()

        magic = f.readline()
        if not re.search(self.magic_re, magic):
            f.close()
            raise IOError(
                "%s does not look like a Netscape format cookies file" %
                filename)

        try:
            while 1:
                line = f.readline()
                if line == "": break

                # last field may be absent, so keep any trailing tab
                if line.endswith("\n"): line = line[:-1]

                # skip comments and blank lines XXX what is $ for?
                if (line.strip().startswith("#") or
                    line.strip().startswith("$") or
                    line.strip() == ""):
                    continue

                domain, domain_specified, path, secure, expires, name, value = \
                        line.split("\t")
                secure = (secure == "TRUE")
                domain_specified = (domain_specified == "TRUE")
                if name == "":
                    name = value
                    value = None

                initial_dot = domain.startswith(".")
                assert domain_specified == initial_dot

                discard = False
                if expires == "":
                    expires = None
                    discard = True

                # assume path_specified is false
                c = Cookie(0, name, value,
                           None, False,
                           domain, domain_specified, initial_dot,
                           path, False,
                           secure,
                           expires,
                           discard,
                           None,
                           None,
                           {})
                if not ignore_discard and c.discard:
                    continue
                if not ignore_expires and c.is_expired(now):
                    continue
                self.set_cookie(c)

        except:
            reraise_unmasked_exceptions((IOError,))
            raise IOError("invalid Netscape format file %s: %s" %
                          (filename, line))

    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
        if filename is None:
            if self.filename is not None: filename = self.filename
            else: raise ValueError(MISSING_FILENAME_TEXT)

        f = open(filename, "w")
        try:
            f.write(self.header)
            now = time.time()
            for cookie in self:
                if not ignore_discard and cookie.discard:
                    continue
                if not ignore_expires and cookie.is_expired(now):
                    continue
                if cookie.secure: secure = "TRUE"
                else: secure = "FALSE"
                if cookie.domain.startswith("."): initial_dot = "TRUE"
                else: initial_dot = "FALSE"
                if cookie.expires is not None:
                    expires = str(cookie.expires)
                else:
                    expires = ""
                if cookie.value is None:
                    # cookies.txt regards 'Set-Cookie: foo' as a cookie
                    # with no name, whereas cookielib regards it as a
                    # cookie with no value.
                    name = ""
                    value = cookie.name
                else:
                    name = cookie.name
                    value = cookie.value
                f.write(
                    "\t".join([cookie.domain, initial_dot, cookie.path,
                               secure, expires, name, value])+
                    "\n")
        finally:
            f.close()

--- NEW FILE: cookielib.py ---
"""HTTP cookie handling for web clients.

This module has (now fairly distant) origins in Gisle Aas' Perl module
HTTP::Cookies, from the libwww-perl library.

Docstrings, comments and debug strings in this code refer to the
attributes of the HTTP cookie system as cookie-attributes, to distinguish
them clearly from Python attributes.

Class diagram (note that the classes which do not derive from
FileCookieJar are not distributed with the Python standard library, but
are available from http://wwwsearch.sf.net/):

                        CookieJar____
                        /     \      \
            FileCookieJar      \      \
             /    |   \         \      \
 MozillaCookieJar | LWPCookieJar \      \
                  |               |      \
[...1717 lines suppressed...]

        """
        if filename is None:
            if self.filename is not None: filename = self.filename
            else: raise ValueError(MISSING_FILENAME_TEXT)

        self._cookies_lock.acquire()

        old_state = copy.deepcopy(self._cookies)
        self._cookies = {}
        try:
            self.load(filename, ignore_discard, ignore_expires)
        except (LoadError, IOError):
            self._cookies = old_state
            raise

        self._cookies_lock.release()

from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
from _MozillaCookieJar import MozillaCookieJar

Index: urllib2.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/urllib2.py,v
retrieving revision 1.66
retrieving revision 1.67
diff -C2 -d -r1.66 -r1.67
*** urllib2.py	10 May 2004 07:35:33 -0000	1.66
--- urllib2.py	31 May 2004 18:22:40 -0000	1.67
***************
*** 107,110 ****
--- 107,111 ----
  import urlparse
  import bisect
+ import cookielib

  try:
***************
*** 177,181 ****
  class Request:

!     def __init__(self, url, data=None, headers={}):
          # unwrap('<URL:type://host/path>') --> 'type://host/path'
          self.__original = unwrap(url)
--- 178,183 ----
  class Request:

!     def __init__(self, url, data=None, headers={},
!                  origin_req_host=None, unverifiable=False):
          # unwrap('<URL:type://host/path>') --> 'type://host/path'
          self.__original = unwrap(url)
***************
*** 189,192 ****
--- 191,198 ----
              self.add_header(key, value)
          self.unredirected_hdrs = {}
+         if origin_req_host is None:
+             origin_req_host = cookielib.request_host(self)
+         self.origin_req_host = origin_req_host
+         self.unverifiable = unverifiable

      def __getattr__(self, attr):
***************
*** 243,246 ****
--- 249,258 ----
          self.__r_host = self.__original

+     def get_origin_req_host(self):
+         return self.origin_req_host
+ 
+     def is_unverifiable(self):
+         return self.unverifiable
+ 
      def add_header(self, key, val):
          # useful for something like authentication
***************
*** 255,258 ****
--- 267,279 ----
                      header_name in self.unredirected_hdrs)

+     def get_header(self, header_name, default=None):
+         return self.headers.get(
+             header_name,
+             self.unredirected_hdrs.get(header_name, default))
+ 
+     def header_items(self):
+         hdrs = self.unredirected_hdrs.copy()
+         hdrs.update(self.headers)
+         return hdrs.items()

  class OpenerDirector:
***************
*** 461,465 ****

  class HTTPRedirectHandler(BaseHandler):
!     # maximum number of redirections before assuming we're in a loop
      max_redirections = 10

--- 482,490 ----

  class HTTPRedirectHandler(BaseHandler):
!     # maximum number of redirections to any single URL
!     # this is needed because of the state that cookies introduce
!     max_repeats = 4
!     # maximum total number of redirections (regardless of URL) before
!     # assuming we're in a loop
      max_redirections = 10

***************
*** 482,486 ****
              # essentially all clients do redirect in this case, so we
              # do the same.
!             return Request(newurl, headers=req.headers)
          else:
              raise HTTPError(req.get_full_url(), code, msg, headers, fp)
--- 507,514 ----
              # essentially all clients do redirect in this case, so we
              # do the same.
!             return Request(newurl,
!                            headers=req.headers,
!                            origin_req_host=req.get_origin_req_host(),
!                            unverifiable=True)
          else:
              raise HTTPError(req.get_full_url(), code, msg, headers, fp)
***************
*** 491,498 ****
      # attribute to the Request object.
      def http_error_302(self, req, fp, code, msg, headers):
          if 'location' in headers:
!             newurl = headers['location']
          elif 'uri' in headers:
!             newurl = headers['uri']
          else:
              return
--- 519,528 ----
      # attribute to the Request object.
      def http_error_302(self, req, fp, code, msg, headers):
+         # Some servers (incorrectly) return multiple Location headers
+         # (so probably same goes for URI).  Use first header.
          if 'location' in headers:
!             newurl = headers.getheaders('location')[0]
          elif 'uri' in headers:
!             newurl = headers.getheaders('uri')[0]
          else:
              return
***************
*** 507,524 ****

          # loop detection
!         # .redirect_dict has a key (url, code) if url was previously
!         # visited as a result of a redirection with that code.  The
!         # code is needed in addition to the URL because visiting a URL
!         # twice isn't necessarily a loop: there is more than one way
!         # to redirect (301, 302, 303, 307, refresh).
!         key = (newurl, code)
          if hasattr(req, 'redirect_dict'):
              visited = new.redirect_dict = req.redirect_dict
!             if key in visited or len(visited) >= self.max_redirections:
                  raise HTTPError(req.get_full_url(), code,
                                  self.inf_msg + msg, headers, fp)
          else:
              visited = new.redirect_dict = req.redirect_dict = {}
!         visited[key] = None

          # Don't close the fp until we are sure that we won't use it
--- 537,550 ----

          # loop detection
!         # .redirect_dict has a key url if url was previously visited.
          if hasattr(req, 'redirect_dict'):
              visited = new.redirect_dict = req.redirect_dict
!             if (visited.get(newurl, 0) >= self.max_repeats or
!                 len(visited) >= self.max_redirections):
                  raise HTTPError(req.get_full_url(), code,
                                  self.inf_msg + msg, headers, fp)
          else:
              visited = new.redirect_dict = req.redirect_dict = {}
!         visited[newurl] = visited.get(newurl, 0) + 1

          # Don't close the fp until we are sure that we won't use it
***************
*** 913,917 ****
          self._debuglevel = level

!     def do_request(self, request):
          host = request.get_host()
          if not host:
--- 939,943 ----
          self._debuglevel = level

!     def do_request_(self, request):
          host = request.get_host()
          if not host:
***************
*** 988,992 ****
          return self.do_open(httplib.HTTPConnection, req)

!     http_request = AbstractHTTPHandler.do_request

  if hasattr(httplib, 'HTTPS'):
--- 1014,1018 ----
          return self.do_open(httplib.HTTPConnection, req)

!     http_request = AbstractHTTPHandler.do_request_

  if hasattr(httplib, 'HTTPS'):
***************
*** 996,1000 ****
              return self.do_open(httplib.HTTPSConnection, req)

!         https_request = AbstractHTTPHandler.do_request

  class UnknownHandler(BaseHandler):
--- 1022,1043 ----
              return self.do_open(httplib.HTTPSConnection, req)

!         https_request = AbstractHTTPHandler.do_request_
! 
! class HTTPCookieProcessor(BaseHandler):
!     def __init__(self, cookiejar=None):
!         if cookiejar is None:
!             cookiejar = CookieJar()
!         self.cookiejar = cookiejar
! 
!     def http_request(self, request):
!         self.cookiejar.add_cookie_header(request)
!         return request
! 
!     def http_response(self, request, response):
!         self.cookiejar.extract_cookies(response, request)
!         return response
! 
!     https_request = http_request
!     https_response = http_response

  class UnknownHandler(BaseHandler):

python/dist/src/Lib _LWPCookieJar.py, NONE, 1.1 _MozillaCookieJar.py, NONE, 1.1 cookielib.py, NONE, 1.1 urllib2.py, 1.66, 1.67

loewis＠users.sourceforge.net

tags

participants (1)