ClientCookie/urllib2 with persistent connections?

John J. Lee jjl at pobox.com
Sun May 9 07:31:58 EDT 2004


Chuck Bearden <cbearden at hal-pc.org> writes:

> My impression is that if I want to make several requests over a single,
> persistent HTTP 1.1 connection, I must use httplib rather than urllib2
> or ClientCookies.  Is that correct?

Yes.


> To put it another way, can I have cookie support over persistent
> connections without having to code the cookie support myself?

Yes.  You can use httplib direct, and use a couple of little functions
to link it up to ClientCookie.  The request and response interfaces
required are well-documented.  Here is a clumsy cobbled-together hack,
for HTTP only, which may even work ;-)  No automatic redirection
handling, proxy support, etc.  The cookies bit is simple (thanks to
ClientCookie :-), it's the rest that's messy.

IIUC, the only problem is that urllib.addbase expects a readline
method (as do some users of urllib2, no doubt), which
httplib.HTTPResponse doesn't provide.  So,
urllib2.AbstractHTTPHandler.do_open() passes urllib.addinfourl the
underlying socket object instead (the fp attribute of HTTPResponse),
which does have a readline() method.  In turn, that means that
HTTPResponse's knowledge of the HTTP protocol is lost, and you can't
do more than one request in a single connection.  So, the code below
passes the HTTPResponse itself to addinfourl, rather than just its fp
attribute.  I suppose the solution is to add a readline method to
HTTPResponse (and whichever other methods HTTPResponse.fp has and
addbase passes through).

import httplib
import ClientCookie

class addbase:
    """Base class for addinfo and addclosehook."""

    def __init__(self, fp):
        self.fp = fp
        self.read = self.fp.read
        if hasattr(self.fp, "readlines"): self.readline = self.fp.readline
        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
        if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
        if hasattr(self.fp, "__iter__"):
            self.__iter__ = self.fp.__iter__
            if hasattr(self.fp, "next"):
                self.next = self.fp.next

    def __repr__(self):
        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
                                             id(self), self.fp)

    def close(self):
        self.read = None
        self.readline = None
        self.readlines = None
        self.fileno = None
        if self.fp: self.fp.close()
        self.fp = None

class addclosehook(addbase):
    """Class to add a close hook to an open file."""

    def __init__(self, fp, closehook, *hookargs):
        addbase.__init__(self, fp)
        self.closehook = closehook
        self.hookargs = hookargs

    def close(self):
        addbase.close(self)
        if self.closehook:
            self.closehook(*self.hookargs)
            self.closehook = None
            self.hookargs = None

class addinfo(addbase):
    """class to add an info() method to an open file."""

    def __init__(self, fp, headers):
        addbase.__init__(self, fp)
        self.headers = headers

    def info(self):
        return self.headers

class addinfourl(addbase):
    """class to add info() and geturl() methods to an open file."""

    def __init__(self, fp, headers, url):
        addbase.__init__(self, fp)
        self.headers = headers
        self.url = url

    def info(self):
        return self.headers

    def geturl(self):
        return self.url


class SimpleURLOpener:

    def __init__(self, cookiejar, debuglevel=0):
        self.cj = cookiejar
        self._debuglevel = debuglevel
        self.conn = None
        self.host = None

    def open_connection(self, host):
        assert self.conn is self.host is None
        self.host = host
        self.conn = httplib.HTTPConnection(host)
    def close_connection(self):
        self.conn.close()
        self.conn = self.host = None

    def set_http_debuglevel(self, level):
        self._debuglevel = level

    def add_cookie_header(self, request):
        self.cj.add_cookie_header(request)
    def extract_cookies(self, response, request):
        self.cj.extract_cookies(response, request)

    def open(self, url_or_request):
        if isinstance(url_or_request, ClientCookie.Request):
            req = url_or_request
        else:
            req = ClientCookie.Request(url_or_request)
        self.add_cookie_header(req)
        self._do_request(req)
        r = self._do_open(req)
        self.extract_cookies(r, req)
        return r

    def _do_request(self, request):

    def _do_open(self, req):
        from urllib import splittype, splithost

        # bleah
        host = req.get_host()
        if not host:
            raise URLError('no host given')
        scheme, sel = splittype(req.get_selector())
        sel_host, sel_path = splithost(sel)
        assert self.host == (sel_host or host)

        if request.has_data():  # POST
            data = request.get_data()
            if not request.has_header('Content-type'):
                request.add_unredirected_header(
                    'Content-type',
                    'application/x-www-form-urlencoded')
            if not request.has_header('Content-length'):
                request.add_unredirected_header(
                    'Content-length', '%d' % len(data))
        if not request.has_header('Host'):
            request.add_unredirected_header('Host', self.host)

        self.conn.set_debuglevel(self._debuglevel)

        headers = dict(req.headers)
        headers.update(req.unredirected_hdrs)
        try:
            self.conn.request(req.get_method(), req.get_selector(), req.data, headers)
            r = self.conn.getresponse()
        except socket.error, err: # XXX what error?
            raise URLError(err)

        # Pick apart the HTTPResponse object to get the addinfourl
        # object initialized properly
        resp = addinfourl(r, r.msg, req.get_full_url())
        resp.code = r.status
        resp.msg = r.reason
        return resp

opener = SimpleURLOpener(ClientCookie.CookieJar())

opener.open_connection("python.org")
r1 = opener.open("http://python.org/index.html")
print "***********************************************************"
print r1.read()
req = ClientCookie.Request("http://python.org/download",
                           headers={"Foo-Bar": "baz"})
r2 = opener.open(req)
print "***********************************************************"
print r2.read()
opener.close_connection()


John



More information about the Python-list mailing list