[pypy-commit] pypy default: Close underlaying sockets in few places. I'm sure this list is not-exhaustive,

fijal noreply at buildbot.pypy.org
Sat Oct 8 18:51:04 CEST 2011


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: 
Changeset: r47880:ab84e543fe0d
Date: 2011-10-08 18:50 +0200
http://bitbucket.org/pypy/pypy/changeset/ab84e543fe0d/

Log:	Close underlaying sockets in few places. I'm sure this list is not-
	exhaustive, but I'm a bit unsure what to do :/

diff --git a/lib-python/modified-2.7/httplib.py b/lib-python/modified-2.7/httplib.py
new file mode 100644
--- /dev/null
+++ b/lib-python/modified-2.7/httplib.py
@@ -0,0 +1,1377 @@
+"""HTTP/1.1 client library
+
+<intro stuff goes here>
+<other stuff, too>
+
+HTTPConnection goes through a number of "states", which define when a client
+may legally make another request or fetch the response for a particular
+request. This diagram details these state transitions:
+
+    (null)
+      |
+      | HTTPConnection()
+      v
+    Idle
+      |
+      | putrequest()
+      v
+    Request-started
+      |
+      | ( putheader() )*  endheaders()
+      v
+    Request-sent
+      |
+      | response = getresponse()
+      v
+    Unread-response   [Response-headers-read]
+      |\____________________
+      |                     |
+      | response.read()     | putrequest()
+      v                     v
+    Idle                  Req-started-unread-response
+                     ______/|
+                   /        |
+   response.read() |        | ( putheader() )*  endheaders()
+                   v        v
+       Request-started    Req-sent-unread-response
+                            |
+                            | response.read()
+                            v
+                          Request-sent
+
+This diagram presents the following rules:
+  -- a second request may not be started until {response-headers-read}
+  -- a response [object] cannot be retrieved until {request-sent}
+  -- there is no differentiation between an unread response body and a
+     partially read response body
+
+Note: this enforcement is applied by the HTTPConnection class. The
+      HTTPResponse class does not enforce this state machine, which
+      implies sophisticated clients may accelerate the request/response
+      pipeline. Caution should be taken, though: accelerating the states
+      beyond the above pattern may imply knowledge of the server's
+      connection-close behavior for certain requests. For example, it
+      is impossible to tell whether the server will close the connection
+      UNTIL the response headers have been read; this means that further
+      requests cannot be placed into the pipeline until it is known that
+      the server will NOT be closing the connection.
+
+Logical State                  __state            __response
+-------------                  -------            ----------
+Idle                           _CS_IDLE           None
+Request-started                _CS_REQ_STARTED    None
+Request-sent                   _CS_REQ_SENT       None
+Unread-response                _CS_IDLE           <response_class>
+Req-started-unread-response    _CS_REQ_STARTED    <response_class>
+Req-sent-unread-response       _CS_REQ_SENT       <response_class>
+"""
+
+from array import array
+import os
+import socket
+from sys import py3kwarning
+from urlparse import urlsplit
+import warnings
+with warnings.catch_warnings():
+    if py3kwarning:
+        warnings.filterwarnings("ignore", ".*mimetools has been removed",
+                                DeprecationWarning)
+    import mimetools
+
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+
+__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
+           "HTTPException", "NotConnected", "UnknownProtocol",
+           "UnknownTransferEncoding", "UnimplementedFileMode",
+           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
+           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
+           "BadStatusLine", "error", "responses"]
+
+HTTP_PORT = 80
+HTTPS_PORT = 443
+
+_UNKNOWN = 'UNKNOWN'
+
+# connection states
+_CS_IDLE = 'Idle'
+_CS_REQ_STARTED = 'Request-started'
+_CS_REQ_SENT = 'Request-sent'
+
+# status codes
+# informational
+CONTINUE = 100
+SWITCHING_PROTOCOLS = 101
+PROCESSING = 102
+
+# successful
+OK = 200
+CREATED = 201
+ACCEPTED = 202
+NON_AUTHORITATIVE_INFORMATION = 203
+NO_CONTENT = 204
+RESET_CONTENT = 205
+PARTIAL_CONTENT = 206
+MULTI_STATUS = 207
+IM_USED = 226
+
+# redirection
+MULTIPLE_CHOICES = 300
+MOVED_PERMANENTLY = 301
+FOUND = 302
+SEE_OTHER = 303
+NOT_MODIFIED = 304
+USE_PROXY = 305
+TEMPORARY_REDIRECT = 307
+
+# client error
+BAD_REQUEST = 400
+UNAUTHORIZED = 401
+PAYMENT_REQUIRED = 402
+FORBIDDEN = 403
+NOT_FOUND = 404
+METHOD_NOT_ALLOWED = 405
+NOT_ACCEPTABLE = 406
+PROXY_AUTHENTICATION_REQUIRED = 407
+REQUEST_TIMEOUT = 408
+CONFLICT = 409
+GONE = 410
+LENGTH_REQUIRED = 411
+PRECONDITION_FAILED = 412
+REQUEST_ENTITY_TOO_LARGE = 413
+REQUEST_URI_TOO_LONG = 414
+UNSUPPORTED_MEDIA_TYPE = 415
+REQUESTED_RANGE_NOT_SATISFIABLE = 416
+EXPECTATION_FAILED = 417
+UNPROCESSABLE_ENTITY = 422
+LOCKED = 423
+FAILED_DEPENDENCY = 424
+UPGRADE_REQUIRED = 426
+
+# server error
+INTERNAL_SERVER_ERROR = 500
+NOT_IMPLEMENTED = 501
+BAD_GATEWAY = 502
+SERVICE_UNAVAILABLE = 503
+GATEWAY_TIMEOUT = 504
+HTTP_VERSION_NOT_SUPPORTED = 505
+INSUFFICIENT_STORAGE = 507
+NOT_EXTENDED = 510
+
+# Mapping status codes to official W3C names
+responses = {
+    100: 'Continue',
+    101: 'Switching Protocols',
+
+    200: 'OK',
+    201: 'Created',
+    202: 'Accepted',
+    203: 'Non-Authoritative Information',
+    204: 'No Content',
+    205: 'Reset Content',
+    206: 'Partial Content',
+
+    300: 'Multiple Choices',
+    301: 'Moved Permanently',
+    302: 'Found',
+    303: 'See Other',
+    304: 'Not Modified',
+    305: 'Use Proxy',
+    306: '(Unused)',
+    307: 'Temporary Redirect',
+
+    400: 'Bad Request',
+    401: 'Unauthorized',
+    402: 'Payment Required',
+    403: 'Forbidden',
+    404: 'Not Found',
+    405: 'Method Not Allowed',
+    406: 'Not Acceptable',
+    407: 'Proxy Authentication Required',
+    408: 'Request Timeout',
+    409: 'Conflict',
+    410: 'Gone',
+    411: 'Length Required',
+    412: 'Precondition Failed',
+    413: 'Request Entity Too Large',
+    414: 'Request-URI Too Long',
+    415: 'Unsupported Media Type',
+    416: 'Requested Range Not Satisfiable',
+    417: 'Expectation Failed',
+
+    500: 'Internal Server Error',
+    501: 'Not Implemented',
+    502: 'Bad Gateway',
+    503: 'Service Unavailable',
+    504: 'Gateway Timeout',
+    505: 'HTTP Version Not Supported',
+}
+
+# maximal amount of data to read at one time in _safe_read
+MAXAMOUNT = 1048576
+
+class HTTPMessage(mimetools.Message):
+
+    def addheader(self, key, value):
+        """Add header for field key handling repeats."""
+        prev = self.dict.get(key)
+        if prev is None:
+            self.dict[key] = value
+        else:
+            combined = ", ".join((prev, value))
+            self.dict[key] = combined
+
+    def addcontinue(self, key, more):
+        """Add more field data from a continuation line."""
+        prev = self.dict[key]
+        self.dict[key] = prev + "\n " + more
+
+    def readheaders(self):
+        """Read header lines.
+
+        Read header lines up to the entirely blank line that terminates them.
+        The (normally blank) line that ends the headers is skipped, but not
+        included in the returned list.  If a non-header line ends the headers,
+        (which is an error), an attempt is made to backspace over it; it is
+        never included in the returned list.
+
+        The variable self.status is set to the empty string if all went well,
+        otherwise it is an error message.  The variable self.headers is a
+        completely uninterpreted list of lines contained in the header (so
+        printing them will reproduce the header exactly as it appears in the
+        file).
+
+        If multiple header fields with the same name occur, they are combined
+        according to the rules in RFC 2616 sec 4.2:
+
+        Appending each subsequent field-value to the first, each separated
+        by a comma. The order in which header fields with the same field-name
+        are received is significant to the interpretation of the combined
+        field value.
+        """
+        # XXX The implementation overrides the readheaders() method of
+        # rfc822.Message.  The base class design isn't amenable to
+        # customized behavior here so the method here is a copy of the
+        # base class code with a few small changes.
+
+        self.dict = {}
+        self.unixfrom = ''
+        self.headers = hlist = []
+        self.status = ''
+        headerseen = ""
+        firstline = 1
+        startofline = unread = tell = None
+        if hasattr(self.fp, 'unread'):
+            unread = self.fp.unread
+        elif self.seekable:
+            tell = self.fp.tell
+        while True:
+            if tell:
+                try:
+                    startofline = tell()
+                except IOError:
+                    startofline = tell = None
+                    self.seekable = 0
+            line = self.fp.readline()
+            if not line:
+                self.status = 'EOF in headers'
+                break
+            # Skip unix From name time lines
+            if firstline and line.startswith('From '):
+                self.unixfrom = self.unixfrom + line
+                continue
+            firstline = 0
+            if headerseen and line[0] in ' \t':
+                # XXX Not sure if continuation lines are handled properly
+                # for http and/or for repeating headers
+                # It's a continuation line.
+                hlist.append(line)
+                self.addcontinue(headerseen, line.strip())
+                continue
+            elif self.iscomment(line):
+                # It's a comment.  Ignore it.
+                continue
+            elif self.islast(line):
+                # Note! No pushback here!  The delimiter line gets eaten.
+                break
+            headerseen = self.isheader(line)
+            if headerseen:
+                # It's a legal header line, save it.
+                hlist.append(line)
+                self.addheader(headerseen, line[len(headerseen)+1:].strip())
+                continue
+            else:
+                # It's not a header line; throw it back and stop here.
+                if not self.dict:
+                    self.status = 'No headers'
+                else:
+                    self.status = 'Non-header line where header expected'
+                # Try to undo the read.
+                if unread:
+                    unread(line)
+                elif tell:
+                    self.fp.seek(startofline)
+                else:
+                    self.status = self.status + '; bad seek'
+                break
+
+class HTTPResponse:
+
+    # strict: If true, raise BadStatusLine if the status line can't be
+    # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
+    # false because it prevents clients from talking to HTTP/0.9
+    # servers.  Note that a response with a sufficiently corrupted
+    # status line will look like an HTTP/0.9 response.
+
+    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
+
+    def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
+        if buffering:
+            # The caller won't be using any sock.recv() calls, so buffering
+            # is fine and recommended for performance.
+            self.fp = sock.makefile('rb')
+        else:
+            # The buffer size is specified as zero, because the headers of
+            # the response are read with readline().  If the reads were
+            # buffered the readline() calls could consume some of the
+            # response, which make be read via a recv() on the underlying
+            # socket.
+            self.fp = sock.makefile('rb', 0)
+        self.debuglevel = debuglevel
+        self.strict = strict
+        self._method = method
+
+        self.msg = None
+
+        # from the Status-Line of the response
+        self.version = _UNKNOWN # HTTP-Version
+        self.status = _UNKNOWN  # Status-Code
+        self.reason = _UNKNOWN  # Reason-Phrase
+
+        self.chunked = _UNKNOWN         # is "chunked" being used?
+        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
+        self.length = _UNKNOWN          # number of bytes left in response
+        self.will_close = _UNKNOWN      # conn will close at end of response
+
+    def _read_status(self):
+        # Initialize with Simple-Response defaults
+        line = self.fp.readline()
+        if self.debuglevel > 0:
+            print "reply:", repr(line)
+        if not line:
+            # Presumably, the server closed the connection before
+            # sending a valid response.
+            raise BadStatusLine(line)
+        try:
+            [version, status, reason] = line.split(None, 2)
+        except ValueError:
+            try:
+                [version, status] = line.split(None, 1)
+                reason = ""
+            except ValueError:
+                # empty version will cause next test to fail and status
+                # will be treated as 0.9 response.
+                version = ""
+        if not version.startswith('HTTP/'):
+            if self.strict:
+                self.close()
+                raise BadStatusLine(line)
+            else:
+                # assume it's a Simple-Response from an 0.9 server
+                self.fp = LineAndFileWrapper(line, self.fp)
+                return "HTTP/0.9", 200, ""
+
+        # The status code is a three-digit number
+        try:
+            status = int(status)
+            if status < 100 or status > 999:
+                raise BadStatusLine(line)
+        except ValueError:
+            raise BadStatusLine(line)
+        return version, status, reason
+
+    def begin(self):
+        if self.msg is not None:
+            # we've already started reading the response
+            return
+
+        # read until we get a non-100 response
+        while True:
+            version, status, reason = self._read_status()
+            if status != CONTINUE:
+                break
+            # skip the header from the 100 response
+            while True:
+                skip = self.fp.readline().strip()
+                if not skip:
+                    break
+                if self.debuglevel > 0:
+                    print "header:", skip
+
+        self.status = status
+        self.reason = reason.strip()
+        if version == 'HTTP/1.0':
+            self.version = 10
+        elif version.startswith('HTTP/1.'):
+            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
+        elif version == 'HTTP/0.9':
+            self.version = 9
+        else:
+            raise UnknownProtocol(version)
+
+        if self.version == 9:
+            self.length = None
+            self.chunked = 0
+            self.will_close = 1
+            self.msg = HTTPMessage(StringIO())
+            return
+
+        self.msg = HTTPMessage(self.fp, 0)
+        if self.debuglevel > 0:
+            for hdr in self.msg.headers:
+                print "header:", hdr,
+
+        # don't let the msg keep an fp
+        self.msg.fp = None
+
+        # are we using the chunked-style of transfer encoding?
+        tr_enc = self.msg.getheader('transfer-encoding')
+        if tr_enc and tr_enc.lower() == "chunked":
+            self.chunked = 1
+            self.chunk_left = None
+        else:
+            self.chunked = 0
+
+        # will the connection close at the end of the response?
+        self.will_close = self._check_close()
+
+        # do we have a Content-Length?
+        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
+        length = self.msg.getheader('content-length')
+        if length and not self.chunked:
+            try:
+                self.length = int(length)
+            except ValueError:
+                self.length = None
+            else:
+                if self.length < 0:  # ignore nonsensical negative lengths
+                    self.length = None
+        else:
+            self.length = None
+
+        # does the body have a fixed length? (of zero)
+        if (status == NO_CONTENT or status == NOT_MODIFIED or
+            100 <= status < 200 or      # 1xx codes
+            self._method == 'HEAD'):
+            self.length = 0
+
+        # if the connection remains open, and we aren't using chunked, and
+        # a content-length was not provided, then assume that the connection
+        # WILL close.
+        if not self.will_close and \
+           not self.chunked and \
+           self.length is None:
+            self.will_close = 1
+
+    def _check_close(self):
+        conn = self.msg.getheader('connection')
+        if self.version == 11:
+            # An HTTP/1.1 proxy is assumed to stay open unless
+            # explicitly closed.
+            conn = self.msg.getheader('connection')
+            if conn and "close" in conn.lower():
+                return True
+            return False
+
+        # Some HTTP/1.0 implementations have support for persistent
+        # connections, using rules different than HTTP/1.1.
+
+        # For older HTTP, Keep-Alive indicates persistent connection.
+        if self.msg.getheader('keep-alive'):
+            return False
+
+        # At least Akamai returns a "Connection: Keep-Alive" header,
+        # which was supposed to be sent by the client.
+        if conn and "keep-alive" in conn.lower():
+            return False
+
+        # Proxy-Connection is a netscape hack.
+        pconn = self.msg.getheader('proxy-connection')
+        if pconn and "keep-alive" in pconn.lower():
+            return False
+
+        # otherwise, assume it will close
+        return True
+
+    def close(self):
+        if self.fp:
+            self.fp.close()
+            self.fp = None
+
+    def isclosed(self):
+        # NOTE: it is possible that we will not ever call self.close(). This
+        #       case occurs when will_close is TRUE, length is None, and we
+        #       read up to the last byte, but NOT past it.
+        #
+        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
+        #          called, meaning self.isclosed() is meaningful.
+        return self.fp is None
+
+    # XXX It would be nice to have readline and __iter__ for this, too.
+
+    def read(self, amt=None):
+        if self.fp is None:
+            return ''
+
+        if self._method == 'HEAD':
+            self.close()
+            return ''
+
+        if self.chunked:
+            return self._read_chunked(amt)
+
+        if amt is None:
+            # unbounded read
+            if self.length is None:
+                s = self.fp.read()
+            else:
+                s = self._safe_read(self.length)
+                self.length = 0
+            self.close()        # we read everything
+            return s
+
+        if self.length is not None:
+            if amt > self.length:
+                # clip the read to the "end of response"
+                amt = self.length
+
+        # we do not use _safe_read() here because this may be a .will_close
+        # connection, and the user is reading more bytes than will be provided
+        # (for example, reading in 1k chunks)
+        s = self.fp.read(amt)
+        if self.length is not None:
+            self.length -= len(s)
+            if not self.length:
+                self.close()
+        return s
+
+    def _read_chunked(self, amt):
+        assert self.chunked != _UNKNOWN
+        chunk_left = self.chunk_left
+        value = []
+        while True:
+            if chunk_left is None:
+                line = self.fp.readline()
+                i = line.find(';')
+                if i >= 0:
+                    line = line[:i] # strip chunk-extensions
+                try:
+                    chunk_left = int(line, 16)
+                except ValueError:
+                    # close the connection as protocol synchronisation is
+                    # probably lost
+                    self.close()
+                    raise IncompleteRead(''.join(value))
+                if chunk_left == 0:
+                    break
+            if amt is None:
+                value.append(self._safe_read(chunk_left))
+            elif amt < chunk_left:
+                value.append(self._safe_read(amt))
+                self.chunk_left = chunk_left - amt
+                return ''.join(value)
+            elif amt == chunk_left:
+                value.append(self._safe_read(amt))
+                self._safe_read(2)  # toss the CRLF at the end of the chunk
+                self.chunk_left = None
+                return ''.join(value)
+            else:
+                value.append(self._safe_read(chunk_left))
+                amt -= chunk_left
+
+            # we read the whole chunk, get another
+            self._safe_read(2)      # toss the CRLF at the end of the chunk
+            chunk_left = None
+
+        # read and discard trailer up to the CRLF terminator
+        ### note: we shouldn't have any trailers!
+        while True:
+            line = self.fp.readline()
+            if not line:
+                # a vanishingly small number of sites EOF without
+                # sending the trailer
+                break
+            if line == '\r\n':
+                break
+
+        # we read everything; close the "file"
+        self.close()
+
+        return ''.join(value)
+
+    def _safe_read(self, amt):
+        """Read the number of bytes requested, compensating for partial reads.
+
+        Normally, we have a blocking socket, but a read() can be interrupted
+        by a signal (resulting in a partial read).
+
+        Note that we cannot distinguish between EOF and an interrupt when zero
+        bytes have been read. IncompleteRead() will be raised in this
+        situation.
+
+        This function should be used when <amt> bytes "should" be present for
+        reading. If the bytes are truly not available (due to EOF), then the
+        IncompleteRead exception can be used to detect the problem.
+        """
+        # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
+        # return less than x bytes unless EOF is encountered.  It now handles
+        # signal interruptions (socket.error EINTR) internally.  This code
+        # never caught that exception anyways.  It seems largely pointless.
+        # self.fp.read(amt) will work fine.
+        s = []
+        while amt > 0:
+            chunk = self.fp.read(min(amt, MAXAMOUNT))
+            if not chunk:
+                raise IncompleteRead(''.join(s), amt)
+            s.append(chunk)
+            amt -= len(chunk)
+        return ''.join(s)
+
+    def fileno(self):
+        return self.fp.fileno()
+
+    def getheader(self, name, default=None):
+        if self.msg is None:
+            raise ResponseNotReady()
+        return self.msg.getheader(name, default)
+
+    def getheaders(self):
+        """Return list of (header, value) tuples."""
+        if self.msg is None:
+            raise ResponseNotReady()
+        return self.msg.items()
+
+
+class HTTPConnection:
+
+    _http_vsn = 11
+    _http_vsn_str = 'HTTP/1.1'
+
+    response_class = HTTPResponse
+    default_port = HTTP_PORT
+    auto_open = 1
+    debuglevel = 0
+    strict = 0
+
+    def __init__(self, host, port=None, strict=None,
+                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
+        self.timeout = timeout
+        self.source_address = source_address
+        self.sock = None
+        self._buffer = []
+        self.__response = None
+        self.__state = _CS_IDLE
+        self._method = None
+        self._tunnel_host = None
+        self._tunnel_port = None
+        self._tunnel_headers = {}
+
+        self._set_hostport(host, port)
+        if strict is not None:
+            self.strict = strict
+
+    def set_tunnel(self, host, port=None, headers=None):
+        """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
+
+        The headers argument should be a mapping of extra HTTP headers
+        to send with the CONNECT request.
+        """
+        self._tunnel_host = host
+        self._tunnel_port = port
+        if headers:
+            self._tunnel_headers = headers
+        else:
+            self._tunnel_headers.clear()
+
+    def _set_hostport(self, host, port):
+        if port is None:
+            i = host.rfind(':')
+            j = host.rfind(']')         # ipv6 addresses have [...]
+            if i > j:
+                try:
+                    port = int(host[i+1:])
+                except ValueError:
+                    raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
+                host = host[:i]
+            else:
+                port = self.default_port
+            if host and host[0] == '[' and host[-1] == ']':
+                host = host[1:-1]
+        self.host = host
+        self.port = port
+
+    def set_debuglevel(self, level):
+        self.debuglevel = level
+
+    def _tunnel(self):
+        self._set_hostport(self._tunnel_host, self._tunnel_port)
+        self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
+        for header, value in self._tunnel_headers.iteritems():
+            self.send("%s: %s\r\n" % (header, value))
+        self.send("\r\n")
+        response = self.response_class(self.sock, strict = self.strict,
+                                       method = self._method)
+        (version, code, message) = response._read_status()
+
+        if code != 200:
+            self.close()
+            raise socket.error("Tunnel connection failed: %d %s" % (code,
+                                                                    message.strip()))
+        while True:
+            line = response.fp.readline()
+            if line == '\r\n': break
+
+
+    def connect(self):
+        """Connect to the host and port specified in __init__."""
+        self.sock = socket.create_connection((self.host,self.port),
+                                             self.timeout, self.source_address)
+
+        if self._tunnel_host:
+            self._tunnel()
+
+    def close(self):
+        """Close the connection to the HTTP server."""
+        if self.sock:
+            self.sock.close()   # close it manually... there may be other refs
+            self.sock = None
+        if self.__response:
+            self.__response.close()
+            self.__response = None
+        self.__state = _CS_IDLE
+
+    def send(self, data):
+        """Send `data' to the server."""
+        if self.sock is None:
+            if self.auto_open:
+                self.connect()
+            else:
+                raise NotConnected()
+
+        if self.debuglevel > 0:
+            print "send:", repr(data)
+        blocksize = 8192
+        if hasattr(data,'read') and not isinstance(data, array):
+            if self.debuglevel > 0: print "sendIng a read()able"
+            datablock = data.read(blocksize)
+            while datablock:
+                self.sock.sendall(datablock)
+                datablock = data.read(blocksize)
+        else:
+            self.sock.sendall(data)
+
+    def _output(self, s):
+        """Add a line of output to the current request buffer.
+
+        Assumes that the line does *not* end with \\r\\n.
+        """
+        self._buffer.append(s)
+
+    def _send_output(self, message_body=None):
+        """Send the currently buffered request and clear the buffer.
+
+        Appends an extra \\r\\n to the buffer.
+        A message_body may be specified, to be appended to the request.
+        """
+        self._buffer.extend(("", ""))
+        msg = "\r\n".join(self._buffer)
+        del self._buffer[:]
+        # If msg and message_body are sent in a single send() call,
+        # it will avoid performance problems caused by the interaction
+        # between delayed ack and the Nagle algorithim.
+        if isinstance(message_body, str):
+            msg += message_body
+            message_body = None
+        self.send(msg)
+        if message_body is not None:
+            #message_body was not a string (i.e. it is a file) and
+            #we must run the risk of Nagle
+            self.send(message_body)
+
+    def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
+        """Send a request to the server.
+
+        `method' specifies an HTTP request method, e.g. 'GET'.
+        `url' specifies the object being requested, e.g. '/index.html'.
+        `skip_host' if True does not add automatically a 'Host:' header
+        `skip_accept_encoding' if True does not add automatically an
+           'Accept-Encoding:' header
+        """
+
+        # if a prior response has been completed, then forget about it.
+        if self.__response and self.__response.isclosed():
+            self.__response = None
+
+
+        # in certain cases, we cannot issue another request on this connection.
+        # this occurs when:
+        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
+        #   2) a response to a previous request has signalled that it is going
+        #      to close the connection upon completion.
+        #   3) the headers for the previous response have not been read, thus
+        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
+        #
+        # if there is no prior response, then we can request at will.
+        #
+        # if point (2) is true, then we will have passed the socket to the
+        # response (effectively meaning, "there is no prior response"), and
+        # will open a new one when a new request is made.
+        #
+        # Note: if a prior response exists, then we *can* start a new request.
+        #       We are not allowed to begin fetching the response to this new
+        #       request, however, until that prior response is complete.
+        #
+        if self.__state == _CS_IDLE:
+            self.__state = _CS_REQ_STARTED
+        else:
+            raise CannotSendRequest()
+
+        # Save the method we use, we need it later in the response phase
+        self._method = method
+        if not url:
+            url = '/'
+        hdr = '%s %s %s' % (method, url, self._http_vsn_str)
+
+        self._output(hdr)
+
+        if self._http_vsn == 11:
+            # Issue some standard headers for better HTTP/1.1 compliance
+
+            if not skip_host:
+                # this header is issued *only* for HTTP/1.1
+                # connections. more specifically, this means it is
+                # only issued when the client uses the new
+                # HTTPConnection() class. backwards-compat clients
+                # will be using HTTP/1.0 and those clients may be
+                # issuing this header themselves. we should NOT issue
+                # it twice; some web servers (such as Apache) barf
+                # when they see two Host: headers
+
+                # If we need a non-standard port,include it in the
+                # header.  If the request is going through a proxy,
+                # but the host of the actual URL, not the host of the
+                # proxy.
+
+                netloc = ''
+                if url.startswith('http'):
+                    nil, netloc, nil, nil, nil = urlsplit(url)
+
+                if netloc:
+                    try:
+                        netloc_enc = netloc.encode("ascii")
+                    except UnicodeEncodeError:
+                        netloc_enc = netloc.encode("idna")
+                    self.putheader('Host', netloc_enc)
+                else:
+                    try:
+                        host_enc = self.host.encode("ascii")
+                    except UnicodeEncodeError:
+                        host_enc = self.host.encode("idna")
+                    # Wrap the IPv6 Host Header with [] (RFC 2732)
+                    if host_enc.find(':') >= 0:
+                        host_enc = "[" + host_enc + "]"
+                    if self.port == self.default_port:
+                        self.putheader('Host', host_enc)
+                    else:
+                        self.putheader('Host', "%s:%s" % (host_enc, self.port))
+
+            # note: we are assuming that clients will not attempt to set these
+            #       headers since *this* library must deal with the
+            #       consequences. this also means that when the supporting
+            #       libraries are updated to recognize other forms, then this
+            #       code should be changed (removed or updated).
+
+            # we only want a Content-Encoding of "identity" since we don't
+            # support encodings such as x-gzip or x-deflate.
+            if not skip_accept_encoding:
+                self.putheader('Accept-Encoding', 'identity')
+
+            # we can accept "chunked" Transfer-Encodings, but no others
+            # NOTE: no TE header implies *only* "chunked"
+            #self.putheader('TE', 'chunked')
+
+            # if TE is supplied in the header, then it must appear in a
+            # Connection header.
+            #self.putheader('Connection', 'TE')
+
+        else:
+            # For HTTP/1.0, the server will assume "not chunked"
+            pass
+
+    def putheader(self, header, *values):
+        """Send a request header line to the server.
+
+        For example: h.putheader('Accept', 'text/html')
+        """
+        if self.__state != _CS_REQ_STARTED:
+            raise CannotSendHeader()
+
+        hdr = '%s: %s' % (header, '\r\n\t'.join([str(v) for v in values]))
+        self._output(hdr)
+
+    def endheaders(self, message_body=None):
+        """Indicate that the last header line has been sent to the server.
+
+        This method sends the request to the server.  The optional
+        message_body argument can be used to pass message body
+        associated with the request.  The message body will be sent in
+        the same packet as the message headers if possible.  The
+        message_body should be a string.
+        """
+        if self.__state == _CS_REQ_STARTED:
+            self.__state = _CS_REQ_SENT
+        else:
+            raise CannotSendHeader()
+        self._send_output(message_body)
+
+    def request(self, method, url, body=None, headers={}):
+        """Send a complete request to the server."""
+        self._send_request(method, url, body, headers)
+
+    def _set_content_length(self, body):
+        # Set the content-length based on the body.
+        thelen = None
+        try:
+            thelen = str(len(body))
+        except TypeError, te:
+            # If this is a file-like object, try to
+            # fstat its file descriptor
+            try:
+                thelen = str(os.fstat(body.fileno()).st_size)
+            except (AttributeError, OSError):
+                # Don't send a length if this failed
+                if self.debuglevel > 0: print "Cannot stat!!"
+
+        if thelen is not None:
+            self.putheader('Content-Length', thelen)
+
+    def _send_request(self, method, url, body, headers):
+        # Honor explicitly requested Host: and Accept-Encoding: headers.
+        header_names = dict.fromkeys([k.lower() for k in headers])
+        skips = {}
+        if 'host' in header_names:
+            skips['skip_host'] = 1
+        if 'accept-encoding' in header_names:
+            skips['skip_accept_encoding'] = 1
+
+        self.putrequest(method, url, **skips)
+
+        if body and ('content-length' not in header_names):
+            self._set_content_length(body)
+        for hdr, value in headers.iteritems():
+            self.putheader(hdr, value)
+        self.endheaders(body)
+
+    def getresponse(self, buffering=False):
+        "Get the response from the server."
+
+        # if a prior response has been completed, then forget about it.
+        if self.__response and self.__response.isclosed():
+            self.__response = None
+
+        #
+        # if a prior response exists, then it must be completed (otherwise, we
+        # cannot read this response's header to determine the connection-close
+        # behavior)
+        #
+        # note: if a prior response existed, but was connection-close, then the
+        # socket and response were made independent of this HTTPConnection
+        # object since a new request requires that we open a whole new
+        # connection
+        #
+        # this means the prior response had one of two states:
+        #   1) will_close: this connection was reset and the prior socket and
+        #                  response operate independently
+        #   2) persistent: the response was retained and we await its
+        #                  isclosed() status to become true.
+        #
+        if self.__state != _CS_REQ_SENT or self.__response:
+            raise ResponseNotReady()
+
+        args = (self.sock,)
+        kwds = {"strict":self.strict, "method":self._method}
+        if self.debuglevel > 0:
+            args += (self.debuglevel,)
+        if buffering:
+            #only add this keyword if non-default, for compatibility with
+            #other response_classes.
+            kwds["buffering"] = True;
+        response = self.response_class(*args, **kwds)
+
+        try:
+            response.begin()
+        except:
+            response.close()
+            raise
+        assert response.will_close != _UNKNOWN
+        self.__state = _CS_IDLE
+
+        if response.will_close:
+            # this effectively passes the connection to the response
+            self.close()
+        else:
+            # remember this, so we can tell when it is complete
+            self.__response = response
+
+        return response
+
+
+class HTTP:
+    "Compatibility class with httplib.py from 1.5."
+
+    _http_vsn = 10
+    _http_vsn_str = 'HTTP/1.0'
+
+    debuglevel = 0
+
+    _connection_class = HTTPConnection
+
+    def __init__(self, host='', port=None, strict=None):
+        "Provide a default host, since the superclass requires one."
+
+        # some joker passed 0 explicitly, meaning default port
+        if port == 0:
+            port = None
+
+        # Note that we may pass an empty string as the host; this will throw
+        # an error when we attempt to connect. Presumably, the client code
+        # will call connect before then, with a proper host.
+        self._setup(self._connection_class(host, port, strict))
+
+    def _setup(self, conn):
+        self._conn = conn
+
+        # set up delegation to flesh out interface
+        self.send = conn.send
+        self.putrequest = conn.putrequest
+        self.putheader = conn.putheader
+        self.endheaders = conn.endheaders
+        self.set_debuglevel = conn.set_debuglevel
+
+        conn._http_vsn = self._http_vsn
+        conn._http_vsn_str = self._http_vsn_str
+
+        self.file = None
+
+    def connect(self, host=None, port=None):
+        "Accept arguments to set the host/port, since the superclass doesn't."
+
+        if host is not None:
+            self._conn._set_hostport(host, port)
+        self._conn.connect()
+
+    def getfile(self):
+        "Provide a getfile, since the superclass' does not use this concept."
+        return self.file
+
+    def getreply(self, buffering=False):
+        """Compat definition since superclass does not define it.
+
+        Returns a tuple consisting of:
+        - server status code (e.g. '200' if all goes well)
+        - server "reason" corresponding to status code
+        - any RFC822 headers in the response from the server
+        """
+        try:
+            if not buffering:
+                response = self._conn.getresponse()
+            else:
+                #only add this keyword if non-default for compatibility
+                #with other connection classes
+                response = self._conn.getresponse(buffering)
+        except BadStatusLine, e:
+            ### hmm. if getresponse() ever closes the socket on a bad request,
+            ### then we are going to have problems with self.sock
+
+            ### should we keep this behavior? do people use it?
+            # keep the socket open (as a file), and return it
+            self.file = self._conn.sock.makefile('rb', 0)
+
+            # close our socket -- we want to restart after any protocol error
+            self.close()
+
+            self.headers = None
+            return -1, e.line, None
+
+        self.headers = response.msg
+        self.file = response.fp
+        return response.status, response.reason, response.msg
+
+    def close(self):
+        self._conn.close()
+
+        # note that self.file == response.fp, which gets closed by the
+        # superclass. just clear the object ref here.
+        ### hmm. messy. if status==-1, then self.file is owned by us.
+        ### well... we aren't explicitly closing, but losing this ref will
+        ### do it
+        self.file = None
+
+try:
+    import ssl
+except ImportError:
+    pass
+else:
+    class HTTPSConnection(HTTPConnection):
+        "This class allows communication via SSL."
+
+        default_port = HTTPS_PORT
+
+        def __init__(self, host, port=None, key_file=None, cert_file=None,
+                     strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+                     source_address=None):
+            HTTPConnection.__init__(self, host, port, strict, timeout,
+                                    source_address)
+            self.key_file = key_file
+            self.cert_file = cert_file
+
+        def connect(self):
+            "Connect to a host on a given (SSL) port."
+
+            sock = socket.create_connection((self.host, self.port),
+                                            self.timeout, self.source_address)
+            if self._tunnel_host:
+                self.sock = sock
+                self._tunnel()
+            self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
+
+    __all__.append("HTTPSConnection")
+
+    class HTTPS(HTTP):
+        """Compatibility with 1.5 httplib interface
+
+        Python 1.5.2 did not have an HTTPS class, but it defined an
+        interface for sending http requests that is also useful for
+        https.
+        """
+
+        _connection_class = HTTPSConnection
+
+        def __init__(self, host='', port=None, key_file=None, cert_file=None,
+                     strict=None):
+            # provide a default host, pass the X509 cert info
+
+            # urf. compensate for bad input.
+            if port == 0:
+                port = None
+            self._setup(self._connection_class(host, port, key_file,
+                                               cert_file, strict))
+
+            # we never actually use these for anything, but we keep them
+            # here for compatibility with post-1.5.2 CVS.
+            self.key_file = key_file
+            self.cert_file = cert_file
+
+
+    def FakeSocket (sock, sslobj):
+        warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
+                      "Use the result of ssl.wrap_socket() directly instead.",
+                      DeprecationWarning, stacklevel=2)
+        return sslobj
+
+
+class HTTPException(Exception):
+    # Subclasses that define an __init__ must call Exception.__init__
+    # or define self.args.  Otherwise, str() will fail.
+    pass
+
+class NotConnected(HTTPException):
+    pass
+
+class InvalidURL(HTTPException):
+    pass
+
+class UnknownProtocol(HTTPException):
+    def __init__(self, version):
+        self.args = version,
+        self.version = version
+
+class UnknownTransferEncoding(HTTPException):
+    pass
+
+class UnimplementedFileMode(HTTPException):
+    pass
+
+class IncompleteRead(HTTPException):
+    def __init__(self, partial, expected=None):
+        self.args = partial,
+        self.partial = partial
+        self.expected = expected
+    def __repr__(self):
+        if self.expected is not None:
+            e = ', %i more expected' % self.expected
+        else:
+            e = ''
+        return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
+    def __str__(self):
+        return repr(self)
+
+class ImproperConnectionState(HTTPException):
+    pass
+
+class CannotSendRequest(ImproperConnectionState):
+    pass
+
+class CannotSendHeader(ImproperConnectionState):
+    pass
+
+class ResponseNotReady(ImproperConnectionState):
+    pass
+
+class BadStatusLine(HTTPException):
+    def __init__(self, line):
+        if not line:
+            line = repr(line)
+        self.args = line,
+        self.line = line
+
+# for backwards compatibility
+error = HTTPException
+
+class LineAndFileWrapper:
+    """A limited file-like object for HTTP/0.9 responses."""
+
+    # The status-line parsing code calls readline(), which normally
+    # get the HTTP status line.  For a 0.9 response, however, this is
+    # actually the first line of the body!  Clients need to get a
+    # readable file object that contains that line.
+
+    def __init__(self, line, file):
+        self._line = line
+        self._file = file
+        self._line_consumed = 0
+        self._line_offset = 0
+        self._line_left = len(line)
+
+    def __getattr__(self, attr):
+        return getattr(self._file, attr)
+
+    def _done(self):
+        # called when the last byte is read from the line.  After the
+        # call, all read methods are delegated to the underlying file
+        # object.
+        self._line_consumed = 1
+        self.read = self._file.read
+        self.readline = self._file.readline
+        self.readlines = self._file.readlines
+
+    def read(self, amt=None):
+        if self._line_consumed:
+            return self._file.read(amt)
+        assert self._line_left
+        if amt is None or amt > self._line_left:
+            s = self._line[self._line_offset:]
+            self._done()
+            if amt is None:
+                return s + self._file.read()
+            else:
+                return s + self._file.read(amt - len(s))
+        else:
+            assert amt <= self._line_left
+            i = self._line_offset
+            j = i + amt
+            s = self._line[i:j]
+            self._line_offset = j
+            self._line_left -= amt
+            if self._line_left == 0:
+                self._done()
+            return s
+
+    def readline(self):
+        if self._line_consumed:
+            return self._file.readline()
+        assert self._line_left
+        s = self._line[self._line_offset:]
+        self._done()
+        return s
+
+    def readlines(self, size=None):
+        if self._line_consumed:
+            return self._file.readlines(size)
+        assert self._line_left
+        L = [self._line[self._line_offset:]]
+        self._done()
+        if size is None:
+            return L + self._file.readlines()
+        else:
+            return L + self._file.readlines(size)
+
+def test():
+    """Test this module.
+
+    A hodge podge of tests collected here, because they have too many
+    external dependencies for the regular test suite.
+    """
+
+    import sys
+    import getopt
+    opts, args = getopt.getopt(sys.argv[1:], 'd')
+    dl = 0
+    for o, a in opts:
+        if o == '-d': dl = dl + 1
+    host = 'www.python.org'
+    selector = '/'
+    if args[0:]: host = args[0]
+    if args[1:]: selector = args[1]
+    h = HTTP()
+    h.set_debuglevel(dl)
+    h.connect(host)
+    h.putrequest('GET', selector)
+    h.endheaders()
+    status, reason, headers = h.getreply()
+    print 'status =', status
+    print 'reason =', reason
+    print "read", len(h.getfile().read())
+    print
+    if headers:
+        for header in headers.headers: print header.strip()
+    print
+
+    # minimal test that code to extract host from url works
+    class HTTP11(HTTP):
+        _http_vsn = 11
+        _http_vsn_str = 'HTTP/1.1'
+
+    h = HTTP11('www.python.org')
+    h.putrequest('GET', 'http://www.python.org/~jeremy/')
+    h.endheaders()
+    h.getreply()
+    h.close()
+
+    try:
+        import ssl
+    except ImportError:
+        pass
+    else:
+
+        for host, selector in (('sourceforge.net', '/projects/python'),
+                               ):
+            print "https://%s%s" % (host, selector)
+            hs = HTTPS()
+            hs.set_debuglevel(dl)
+            hs.connect(host)
+            hs.putrequest('GET', selector)
+            hs.endheaders()
+            status, reason, headers = hs.getreply()
+            print 'status =', status
+            print 'reason =', reason
+            print "read", len(hs.getfile().read())
+            print
+            if headers:
+                for header in headers.headers: print header.strip()
+            print
+
+if __name__ == '__main__':
+    test()
diff --git a/lib-python/modified-2.7/test/test_urllib2.py b/lib-python/modified-2.7/test/test_urllib2.py
--- a/lib-python/modified-2.7/test/test_urllib2.py
+++ b/lib-python/modified-2.7/test/test_urllib2.py
@@ -307,6 +307,9 @@
     def getresponse(self):
         return MockHTTPResponse(MockFile(), {}, 200, "OK")
 
+    def close(self):
+        pass
+
 class MockHandler:
     # useful for testing handler machinery
     # see add_ordered_mock_handlers() docstring
diff --git a/lib-python/modified-2.7/urllib2.py b/lib-python/modified-2.7/urllib2.py
new file mode 100644
--- /dev/null
+++ b/lib-python/modified-2.7/urllib2.py
@@ -0,0 +1,1440 @@
+"""An extensible library for opening URLs using a variety of protocols
+
+The simplest way to use this module is to call the urlopen function,
+which accepts a string containing a URL or a Request object (described
+below).  It opens the URL and returns the results as file-like
+object; the returned object has some extra methods described below.
+
+The OpenerDirector manages a collection of Handler objects that do
+all the actual work.  Each Handler implements a particular protocol or
+option.  The OpenerDirector is a composite object that invokes the
+Handlers needed to open the requested URL.  For example, the
+HTTPHandler performs HTTP GET and POST requests and deals with
+non-error returns.  The HTTPRedirectHandler automatically deals with
+HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
+deals with digest authentication.
+
+urlopen(url, data=None) -- Basic usage is the same as original
+urllib.  pass the url and optionally data to post to an HTTP URL, and
+get a file-like object back.  One difference is that you can also pass
+a Request instance instead of URL.  Raises a URLError (subclass of
+IOError); for HTTP errors, raises an HTTPError, which can also be
+treated as a valid response.
+
+build_opener -- Function that creates a new OpenerDirector instance.
+Will install the default handlers.  Accepts one or more Handlers as
+arguments, either instances or Handler classes that it will
+instantiate.  If one of the argument is a subclass of the default
+handler, the argument will be installed instead of the default.
+
+install_opener -- Installs a new opener as the default opener.
+
+objects of interest:
+
+OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
+the Handler classes, while dealing with requests and responses.
+
+Request -- An object that encapsulates the state of a request.  The
+state can be as simple as the URL.  It can also include extra HTTP
+headers, e.g. a User-Agent.
+
+BaseHandler --
+
+exceptions:
+URLError -- A subclass of IOError, individual protocols have their own
+specific subclass.
+
+HTTPError -- Also a valid HTTP response, so you can treat an HTTP error
+as an exceptional event or valid response.
+
+internals:
+BaseHandler and parent
+_call_chain conventions
+
+Example usage:
+
+import urllib2
+
+# set up authentication info
+authinfo = urllib2.HTTPBasicAuthHandler()
+authinfo.add_password(realm='PDQ Application',
+                      uri='https://mahler:8092/site-updates.py',
+                      user='klem',
+                      passwd='geheim$parole')
+
+proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
+
+# build a new opener that adds authentication and caching FTP handlers
+opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
+
+# install it
+urllib2.install_opener(opener)
+
+f = urllib2.urlopen('http://www.python.org/')
+
+
+"""
+
+# XXX issues:
+# If an authentication error handler that tries to perform
+# authentication for some reason but fails, how should the error be
+# signalled?  The client needs to know the HTTP error code.  But if
+# the handler knows that the problem was, e.g., that it didn't know
+# that hash algo that requested in the challenge, it would be good to
+# pass that information along to the client, too.
+# ftp errors aren't handled cleanly
+# check digest against correct (i.e. non-apache) implementation
+
+# Possible extensions:
+# complex proxies  XXX not sure what exactly was meant by this
+# abstract factory for opener
+
+import base64
+import hashlib
+import httplib
+import mimetools
+import os
+import posixpath
+import random
+import re
+import socket
+import sys
+import time
+import urlparse
+import bisect
+
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+
+from urllib import (unwrap, unquote, splittype, splithost, quote,
+     addinfourl, splitport, splittag,
+     splitattr, ftpwrapper, splituser, splitpasswd, splitvalue)
+
+# support for FileHandler, proxies via environment variables
+from urllib import localhost, url2pathname, getproxies, proxy_bypass
+
+# used in User-Agent header sent
+__version__ = sys.version[:3]
+
+_opener = None
+def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+    global _opener
+    if _opener is None:
+        _opener = build_opener()
+    return _opener.open(url, data, timeout)
+
+def install_opener(opener):
+    global _opener
+    _opener = opener
+
+# do these error classes make sense?
+# make sure all of the IOError stuff is overridden.  we just want to be
+# subtypes.
+
+class URLError(IOError):
+    # URLError is a sub-type of IOError, but it doesn't share any of
+    # the implementation.  need to override __init__ and __str__.
+    # It sets self.args for compatibility with other EnvironmentError
+    # subclasses, but args doesn't have the typical format with errno in
+    # slot 0 and strerror in slot 1.  This may be better than nothing.
+    def __init__(self, reason):
+        self.args = reason,
+        self.reason = reason
+
+    def __str__(self):
+        return '<urlopen error %s>' % self.reason
+
+class HTTPError(URLError, addinfourl):
+    """Raised when HTTP error occurs, but also acts like non-error return"""
+    __super_init = addinfourl.__init__
+
+    def __init__(self, url, code, msg, hdrs, fp):
+        self.code = code
+        self.msg = msg
+        self.hdrs = hdrs
+        self.fp = fp
+        self.filename = url
+        # The addinfourl classes depend on fp being a valid file
+        # object.  In some cases, the HTTPError may not have a valid
+        # file object.  If this happens, the simplest workaround is to
+        # not initialize the base classes.
+        if fp is not None:
+            self.__super_init(fp, hdrs, url, code)
+
+    def __str__(self):
+        return 'HTTP Error %s: %s' % (self.code, self.msg)
+
+# copied from cookielib.py
+_cut_port_re = re.compile(r":\d+$")
+def request_host(request):
+    """Return request-host, as defined by RFC 2965.
+
+    Variation from RFC: returned value is lowercased, for convenient
+    comparison.
+
+    """
+    url = request.get_full_url()
+    host = urlparse.urlparse(url)[1]
+    if host == "":
+        host = request.get_header("Host", "")
+
+    # remove port, if present
+    host = _cut_port_re.sub("", host, 1)
+    return host.lower()
+
+class Request:
+
+    def __init__(self, url, data=None, headers={},
+                 origin_req_host=None, unverifiable=False):
+        # unwrap('<URL:type://host/path>') --> 'type://host/path'
+        self.__original = unwrap(url)
+        self.__original, fragment = splittag(self.__original)
+        self.type = None
+        # self.__r_type is what's left after doing the splittype
+        self.host = None
+        self.port = None
+        self._tunnel_host = None
+        self.data = data
+        self.headers = {}
+        for key, value in headers.items():
+            self.add_header(key, value)
+        self.unredirected_hdrs = {}
+        if origin_req_host is None:
+            origin_req_host = request_host(self)
+        self.origin_req_host = origin_req_host
+        self.unverifiable = unverifiable
+
+    def __getattr__(self, attr):
+        # XXX this is a fallback mechanism to guard against these
+        # methods getting called in a non-standard order.  this may be
+        # too complicated and/or unnecessary.
+        # XXX should the __r_XXX attributes be public?
+        if attr[:12] == '_Request__r_':
+            name = attr[12:]
+            if hasattr(Request, 'get_' + name):
+                getattr(self, 'get_' + name)()
+                return getattr(self, attr)
+        raise AttributeError, attr
+
+    def get_method(self):
+        if self.has_data():
+            return "POST"
+        else:
+            return "GET"
+
+    # XXX these helper methods are lame
+
+    def add_data(self, data):
+        self.data = data
+
+    def has_data(self):
+        return self.data is not None
+
+    def get_data(self):
+        return self.data
+
+    def get_full_url(self):
+        return self.__original
+
+    def get_type(self):
+        if self.type is None:
+            self.type, self.__r_type = splittype(self.__original)
+            if self.type is None:
+                raise ValueError, "unknown url type: %s" % self.__original
+        return self.type
+
+    def get_host(self):
+        if self.host is None:
+            self.host, self.__r_host = splithost(self.__r_type)
+            if self.host:
+                self.host = unquote(self.host)
+        return self.host
+
+    def get_selector(self):
+        return self.__r_host
+
+    def set_proxy(self, host, type):
+        if self.type == 'https' and not self._tunnel_host:
+            self._tunnel_host = self.host
+        else:
+            self.type = type
+            self.__r_host = self.__original
+
+        self.host = host
+
+    def has_proxy(self):
+        return self.__r_host == self.__original
+
+    def get_origin_req_host(self):
+        return self.origin_req_host
+
+    def is_unverifiable(self):
+        return self.unverifiable
+
+    def add_header(self, key, val):
+        # useful for something like authentication
+        self.headers[key.capitalize()] = val
+
+    def add_unredirected_header(self, key, val):
+        # will not be added to a redirected request
+        self.unredirected_hdrs[key.capitalize()] = val
+
+    def has_header(self, header_name):
+        return (header_name in self.headers or
+                header_name in self.unredirected_hdrs)
+
+    def get_header(self, header_name, default=None):
+        return self.headers.get(
+            header_name,
+            self.unredirected_hdrs.get(header_name, default))
+
+    def header_items(self):
+        hdrs = self.unredirected_hdrs.copy()
+        hdrs.update(self.headers)
+        return hdrs.items()
+
+class OpenerDirector:
+    def __init__(self):
+        client_version = "Python-urllib/%s" % __version__
+        self.addheaders = [('User-agent', client_version)]
+        # manage the individual handlers
+        self.handlers = []
+        self.handle_open = {}
+        self.handle_error = {}
+        self.process_response = {}
+        self.process_request = {}
+
+    def add_handler(self, handler):
+        if not hasattr(handler, "add_parent"):
+            raise TypeError("expected BaseHandler instance, got %r" %
+                            type(handler))
+
+        added = False
+        for meth in dir(handler):
+            if meth in ["redirect_request", "do_open", "proxy_open"]:
+                # oops, coincidental match
+                continue
+
+            i = meth.find("_")
+            protocol = meth[:i]
+            condition = meth[i+1:]
+
+            if condition.startswith("error"):
+                j = condition.find("_") + i + 1
+                kind = meth[j+1:]
+                try:
+                    kind = int(kind)
+                except ValueError:
+                    pass
+                lookup = self.handle_error.get(protocol, {})
+                self.handle_error[protocol] = lookup
+            elif condition == "open":
+                kind = protocol
+                lookup = self.handle_open
+            elif condition == "response":
+                kind = protocol
+                lookup = self.process_response
+            elif condition == "request":
+                kind = protocol
+                lookup = self.process_request
+            else:
+                continue
+
+            handlers = lookup.setdefault(kind, [])
+            if handlers:
+                bisect.insort(handlers, handler)
+            else:
+                handlers.append(handler)
+            added = True
+
+        if added:
+            # the handlers must work in an specific order, the order
+            # is specified in a Handler attribute
+            bisect.insort(self.handlers, handler)
+            handler.add_parent(self)
+
+    def close(self):
+        # Only exists for backwards compatibility.
+        pass
+
+    def _call_chain(self, chain, kind, meth_name, *args):
+        # Handlers raise an exception if no one else should try to handle
+        # the request, or return None if they can't but another handler
+        # could.  Otherwise, they return the response.
+        handlers = chain.get(kind, ())
+        for handler in handlers:
+            func = getattr(handler, meth_name)
+
+            result = func(*args)
+            if result is not None:
+                return result
+
+    def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+        # accept a URL or a Request object
+        if isinstance(fullurl, basestring):
+            req = Request(fullurl, data)
+        else:
+            req = fullurl
+            if data is not None:
+                req.add_data(data)
+
+        req.timeout = timeout
+        protocol = req.get_type()
+
+        # pre-process request
+        meth_name = protocol+"_request"
+        for processor in self.process_request.get(protocol, []):
+            meth = getattr(processor, meth_name)
+            req = meth(req)
+
+        response = self._open(req, data)
+
+        # post-process response
+        meth_name = protocol+"_response"
+        for processor in self.process_response.get(protocol, []):
+            meth = getattr(processor, meth_name)
+            try:
+                response = meth(req, response)
+            except:
+                response.close()
+                raise
+
+        return response
+
+    def _open(self, req, data=None):
+        result = self._call_chain(self.handle_open, 'default',
+                                  'default_open', req)
+        if result:
+            return result
+
+        protocol = req.get_type()
+        result = self._call_chain(self.handle_open, protocol, protocol +
+                                  '_open', req)
+        if result:
+            return result
+
+        return self._call_chain(self.handle_open, 'unknown',
+                                'unknown_open', req)
+
+    def error(self, proto, *args):
+        if proto in ('http', 'https'):
+            # XXX http[s] protocols are special-cased
+            dict = self.handle_error['http'] # https is not different than http
+            proto = args[2]  # YUCK!
+            meth_name = 'http_error_%s' % proto
+            http_err = 1
+            orig_args = args
+        else:
+            dict = self.handle_error
+            meth_name = proto + '_error'
+            http_err = 0
+        args = (dict, proto, meth_name) + args
+        result = self._call_chain(*args)
+        if result:
+            return result
+
+        if http_err:
+            args = (dict, 'default', 'http_error_default') + orig_args
+            return self._call_chain(*args)
+
+# XXX probably also want an abstract factory that knows when it makes
+# sense to skip a superclass in favor of a subclass and when it might
+# make sense to include both
+
+def build_opener(*handlers):
+    """Create an opener object from a list of handlers.
+
+    The opener will use several default handlers, including support
+    for HTTP, FTP and when applicable, HTTPS.
+
+    If any of the handlers passed as arguments are subclasses of the
+    default handlers, the default handlers will not be used.
+    """
+    import types
+    def isclass(obj):
+        return isinstance(obj, (types.ClassType, type))
+
+    opener = OpenerDirector()
+    default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
+                       HTTPDefaultErrorHandler, HTTPRedirectHandler,
+                       FTPHandler, FileHandler, HTTPErrorProcessor]
+    if hasattr(httplib, 'HTTPS'):
+        default_classes.append(HTTPSHandler)
+    skip = set()
+    for klass in default_classes:
+        for check in handlers:
+            if isclass(check):
+                if issubclass(check, klass):
+                    skip.add(klass)
+            elif isinstance(check, klass):
+                skip.add(klass)
+    for klass in skip:
+        default_classes.remove(klass)
+
+    for klass in default_classes:
+        opener.add_handler(klass())
+
+    for h in handlers:
+        if isclass(h):
+            h = h()
+        opener.add_handler(h)
+    return opener
+
+class BaseHandler:
+    handler_order = 500
+
+    def add_parent(self, parent):
+        self.parent = parent
+
+    def close(self):
+        # Only exists for backwards compatibility
+        pass
+
+    def __lt__(self, other):
+        if not hasattr(other, "handler_order"):
+            # Try to preserve the old behavior of having custom classes
+            # inserted after default ones (works only for custom user
+            # classes which are not aware of handler_order).
+            return True
+        return self.handler_order < other.handler_order
+
+
+class HTTPErrorProcessor(BaseHandler):
+    """Process HTTP error responses."""
+    handler_order = 1000  # after all other processing
+
+    def http_response(self, request, response):
+        code, msg, hdrs = response.code, response.msg, response.info()
+
+        # According to RFC 2616, "2xx" code indicates that the client's
+        # request was successfully received, understood, and accepted.
+        if not (200 <= code < 300):
+            response = self.parent.error(
+                'http', request, response, code, msg, hdrs)
+
+        return response
+
+    https_response = http_response
+
+class HTTPDefaultErrorHandler(BaseHandler):
+    def http_error_default(self, req, fp, code, msg, hdrs):
+        raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
+
+class HTTPRedirectHandler(BaseHandler):
+    # maximum number of redirections to any single URL
+    # this is needed because of the state that cookies introduce
+    max_repeats = 4
+    # maximum total number of redirections (regardless of URL) before
+    # assuming we're in a loop
+    max_redirections = 10
+
+    def redirect_request(self, req, fp, code, msg, headers, newurl):
+        """Return a Request or None in response to a redirect.
+
+        This is called by the http_error_30x methods when a
+        redirection response is received.  If a redirection should
+        take place, return a new Request to allow http_error_30x to
+        perform the redirect.  Otherwise, raise HTTPError if no-one
+        else should try to handle this url.  Return None if you can't
+        but another Handler might.
+        """
+        m = req.get_method()
+        if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
+            or code in (301, 302, 303) and m == "POST"):
+            # Strictly (according to RFC 2616), 301 or 302 in response
+            # to a POST MUST NOT cause a redirection without confirmation
+            # from the user (of urllib2, in this case).  In practice,
+            # essentially all clients do redirect in this case, so we
+            # do the same.
+            # be conciliant with URIs containing a space
+            newurl = newurl.replace(' ', '%20')
+            newheaders = dict((k,v) for k,v in req.headers.items()
+                              if k.lower() not in ("content-length", "content-type")
+                             )
+            return Request(newurl,
+                           headers=newheaders,
+                           origin_req_host=req.get_origin_req_host(),
+                           unverifiable=True)
+        else:
+            raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+    # Implementation note: To avoid the server sending us into an
+    # infinite loop, the request object needs to track what URLs we
+    # have already seen.  Do this by adding a handler-specific
+    # attribute to the Request object.
+    def http_error_302(self, req, fp, code, msg, headers):
+        # Some servers (incorrectly) return multiple Location headers
+        # (so probably same goes for URI).  Use first header.
+        if 'location' in headers:
+            newurl = headers.getheaders('location')[0]
+        elif 'uri' in headers:
+            newurl = headers.getheaders('uri')[0]
+        else:
+            return
+
+        # fix a possible malformed URL
+        urlparts = urlparse.urlparse(newurl)
+        if not urlparts.path:
+            urlparts = list(urlparts)
+            urlparts[2] = "/"
+        newurl = urlparse.urlunparse(urlparts)
+
+        newurl = urlparse.urljoin(req.get_full_url(), newurl)
+
+        # XXX Probably want to forget about the state of the current
+        # request, although that might interact poorly with other
+        # handlers that also use handler-specific request attributes
+        new = self.redirect_request(req, fp, code, msg, headers, newurl)
+        if new is None:
+            return
+
+        # loop detection
+        # .redirect_dict has a key url if url was previously visited.
+        if hasattr(req, 'redirect_dict'):
+            visited = new.redirect_dict = req.redirect_dict
+            if (visited.get(newurl, 0) >= self.max_repeats or
+                len(visited) >= self.max_redirections):
+                raise HTTPError(req.get_full_url(), code,
+                                self.inf_msg + msg, headers, fp)
+        else:
+            visited = new.redirect_dict = req.redirect_dict = {}
+        visited[newurl] = visited.get(newurl, 0) + 1
+
+        # Don't close the fp until we are sure that we won't use it
+        # with HTTPError.
+        fp.read()
+        fp.close()
+
+        return self.parent.open(new, timeout=req.timeout)
+
+    http_error_301 = http_error_303 = http_error_307 = http_error_302
+
+    inf_msg = "The HTTP server returned a redirect error that would " \
+              "lead to an infinite loop.\n" \
+              "The last 30x error message was:\n"
+
+
+def _parse_proxy(proxy):
+    """Return (scheme, user, password, host/port) given a URL or an authority.
+
+    If a URL is supplied, it must have an authority (host:port) component.
+    According to RFC 3986, having an authority component means the URL must
+    have two slashes after the scheme:
+
+    >>> _parse_proxy('file:/ftp.example.com/')
+    Traceback (most recent call last):
+    ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
+
+    The first three items of the returned tuple may be None.
+
+    Examples of authority parsing:
+
+    >>> _parse_proxy('proxy.example.com')
+    (None, None, None, 'proxy.example.com')
+    >>> _parse_proxy('proxy.example.com:3128')
+    (None, None, None, 'proxy.example.com:3128')
+
+    The authority component may optionally include userinfo (assumed to be
+    username:password):
+
+    >>> _parse_proxy('joe:password at proxy.example.com')
+    (None, 'joe', 'password', 'proxy.example.com')
+    >>> _parse_proxy('joe:password at proxy.example.com:3128')
+    (None, 'joe', 'password', 'proxy.example.com:3128')
+
+    Same examples, but with URLs instead:
+
+    >>> _parse_proxy('http://proxy.example.com/')
+    ('http', None, None, 'proxy.example.com')
+    >>> _parse_proxy('http://proxy.example.com:3128/')
+    ('http', None, None, 'proxy.example.com:3128')
+    >>> _parse_proxy('http://joe:password@proxy.example.com/')
+    ('http', 'joe', 'password', 'proxy.example.com')
+    >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
+    ('http', 'joe', 'password', 'proxy.example.com:3128')
+
+    Everything after the authority is ignored:
+
+    >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
+    ('ftp', 'joe', 'password', 'proxy.example.com')
+
+    Test for no trailing '/' case:
+
+    >>> _parse_proxy('http://joe:password@proxy.example.com')
+    ('http', 'joe', 'password', 'proxy.example.com')
+
+    """
+    scheme, r_scheme = splittype(proxy)
+    if not r_scheme.startswith("/"):
+        # authority
+        scheme = None
+        authority = proxy
+    else:
+        # URL
+        if not r_scheme.startswith("//"):
+            raise ValueError("proxy URL with no authority: %r" % proxy)
+        # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
+        # and 3.3.), path is empty or starts with '/'
+        end = r_scheme.find("/", 2)
+        if end == -1:
+            end = None
+        authority = r_scheme[2:end]
+    userinfo, hostport = splituser(authority)
+    if userinfo is not None:
+        user, password = splitpasswd(userinfo)
+    else:
+        user = password = None
+    return scheme, user, password, hostport
+
+class ProxyHandler(BaseHandler):
+    # Proxies must be in front
+    handler_order = 100
+
+    def __init__(self, proxies=None):
+        if proxies is None:
+            proxies = getproxies()
+        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
+        self.proxies = proxies
+        for type, url in proxies.items():
+            setattr(self, '%s_open' % type,
+                    lambda r, proxy=url, type=type, meth=self.proxy_open: \
+                    meth(r, proxy, type))
+
+    def proxy_open(self, req, proxy, type):
+        orig_type = req.get_type()
+        proxy_type, user, password, hostport = _parse_proxy(proxy)
+
+        if proxy_type is None:
+            proxy_type = orig_type
+
+        if req.host and proxy_bypass(req.host):
+            return None
+
+        if user and password:
+            user_pass = '%s:%s' % (unquote(user), unquote(password))
+            creds = base64.b64encode(user_pass).strip()
+            req.add_header('Proxy-authorization', 'Basic ' + creds)
+        hostport = unquote(hostport)
+        req.set_proxy(hostport, proxy_type)
+
+        if orig_type == proxy_type or orig_type == 'https':
+            # let other handlers take care of it
+            return None
+        else:
+            # need to start over, because the other handlers don't
+            # grok the proxy's URL type
+            # e.g. if we have a constructor arg proxies like so:
+            # {'http': 'ftp://proxy.example.com'}, we may end up turning
+            # a request for http://acme.example.com/a into one for
+            # ftp://proxy.example.com/a
+            return self.parent.open(req, timeout=req.timeout)
+
+class HTTPPasswordMgr:
+
+    def __init__(self):
+        self.passwd = {}
+
+    def add_password(self, realm, uri, user, passwd):
+        # uri could be a single URI or a sequence
+        if isinstance(uri, basestring):
+            uri = [uri]
+        if not realm in self.passwd:
+            self.passwd[realm] = {}
+        for default_port in True, False:
+            reduced_uri = tuple(
+                [self.reduce_uri(u, default_port) for u in uri])
+            self.passwd[realm][reduced_uri] = (user, passwd)
+
+    def find_user_password(self, realm, authuri):
+        domains = self.passwd.get(realm, {})
+        for default_port in True, False:
+            reduced_authuri = self.reduce_uri(authuri, default_port)
+            for uris, authinfo in domains.iteritems():
+                for uri in uris:
+                    if self.is_suburi(uri, reduced_authuri):
+                        return authinfo
+        return None, None
+
+    def reduce_uri(self, uri, default_port=True):
+        """Accept authority or URI and extract only the authority and path."""
+        # note HTTP URLs do not have a userinfo component
+        parts = urlparse.urlsplit(uri)
+        if parts[1]:
+            # URI
+            scheme = parts[0]
+            authority = parts[1]
+            path = parts[2] or '/'
+        else:
+            # host or host:port
+            scheme = None
+            authority = uri
+            path = '/'
+        host, port = splitport(authority)
+        if default_port and port is None and scheme is not None:
+            dport = {"http": 80,
+                     "https": 443,
+                     }.get(scheme)
+            if dport is not None:
+                authority = "%s:%d" % (host, dport)
+        return authority, path
+
+    def is_suburi(self, base, test):
+        """Check if test is below base in a URI tree
+
+        Both args must be URIs in reduced form.
+        """
+        if base == test:
+            return True
+        if base[0] != test[0]:
+            return False
+        common = posixpath.commonprefix((base[1], test[1]))
+        if len(common) == len(base[1]):
+            return True
+        return False
+
+
+class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
+
+    def find_user_password(self, realm, authuri):
+        user, password = HTTPPasswordMgr.find_user_password(self, realm,
+                                                            authuri)
+        if user is not None:
+            return user, password
+        return HTTPPasswordMgr.find_user_password(self, None, authuri)
+
+
+class AbstractBasicAuthHandler:
+
+    # XXX this allows for multiple auth-schemes, but will stupidly pick
+    # the last one with a realm specified.
+
+    # allow for double- and single-quoted realm values
+    # (single quotes are a violation of the RFC, but appear in the wild)
+    rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
+                    'realm=(["\'])(.*?)\\2', re.I)
+
+    # XXX could pre-emptively send auth info already accepted (RFC 2617,
+    # end of section 2, and section 1.2 immediately after "credentials"
+    # production).
+
+    def __init__(self, password_mgr=None):
+        if password_mgr is None:
+            password_mgr = HTTPPasswordMgr()
+        self.passwd = password_mgr
+        self.add_password = self.passwd.add_password
+        self.retried = 0
+
+    def reset_retry_count(self):
+        self.retried = 0
+
+    def http_error_auth_reqed(self, authreq, host, req, headers):
+        # host may be an authority (without userinfo) or a URL with an
+        # authority
+        # XXX could be multiple headers
+        authreq = headers.get(authreq, None)
+
+        if self.retried > 5:
+            # retry sending the username:password 5 times before failing.
+            raise HTTPError(req.get_full_url(), 401, "basic auth failed",
+                            headers, None)
+        else:
+            self.retried += 1
+
+        if authreq:
+            mo = AbstractBasicAuthHandler.rx.search(authreq)
+            if mo:
+                scheme, quote, realm = mo.groups()
+                if scheme.lower() == 'basic':
+                    response = self.retry_http_basic_auth(host, req, realm)
+                    if response and response.code != 401:
+                        self.retried = 0
+                    return response
+
+    def retry_http_basic_auth(self, host, req, realm):
+        user, pw = self.passwd.find_user_password(realm, host)
+        if pw is not None:
+            raw = "%s:%s" % (user, pw)
+            auth = 'Basic %s' % base64.b64encode(raw).strip()
+            if req.headers.get(self.auth_header, None) == auth:
+                return None
+            req.add_unredirected_header(self.auth_header, auth)
+            return self.parent.open(req, timeout=req.timeout)
+        else:
+            return None
+
+
+class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+    auth_header = 'Authorization'
+
+    def http_error_401(self, req, fp, code, msg, headers):
+        url = req.get_full_url()
+        response = self.http_error_auth_reqed('www-authenticate',
+                                              url, req, headers)
+        self.reset_retry_count()
+        return response
+
+
+class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+    auth_header = 'Proxy-authorization'
+
+    def http_error_407(self, req, fp, code, msg, headers):
+        # http_error_auth_reqed requires that there is no userinfo component in
+        # authority.  Assume there isn't one, since urllib2 does not (and
+        # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
+        # userinfo.
+        authority = req.get_host()
+        response = self.http_error_auth_reqed('proxy-authenticate',
+                                          authority, req, headers)
+        self.reset_retry_count()
+        return response
+
+
+def randombytes(n):
+    """Return n random bytes."""
+    # Use /dev/urandom if it is available.  Fall back to random module
+    # if not.  It might be worthwhile to extend this function to use
+    # other platform-specific mechanisms for getting random bytes.
+    if os.path.exists("/dev/urandom"):
+        f = open("/dev/urandom")
+        s = f.read(n)
+        f.close()
+        return s
+    else:
+        L = [chr(random.randrange(0, 256)) for i in range(n)]
+        return "".join(L)
+
+class AbstractDigestAuthHandler:
+    # Digest authentication is specified in RFC 2617.
+
+    # XXX The client does not inspect the Authentication-Info header
+    # in a successful response.
+
+    # XXX It should be possible to test this implementation against
+    # a mock server that just generates a static set of challenges.
+
+    # XXX qop="auth-int" supports is shaky
+
+    def __init__(self, passwd=None):
+        if passwd is None:
+            passwd = HTTPPasswordMgr()
+        self.passwd = passwd
+        self.add_password = self.passwd.add_password
+        self.retried = 0
+        self.nonce_count = 0
+        self.last_nonce = None
+
+    def reset_retry_count(self):
+        self.retried = 0
+
+    def http_error_auth_reqed(self, auth_header, host, req, headers):
+        authreq = headers.get(auth_header, None)
+        if self.retried > 5:
+            # Don't fail endlessly - if we failed once, we'll probably
+            # fail a second time. Hm. Unless the Password Manager is
+            # prompting for the information. Crap. This isn't great
+            # but it's better than the current 'repeat until recursion
+            # depth exceeded' approach <wink>
+            raise HTTPError(req.get_full_url(), 401, "digest auth failed",
+                            headers, None)
+        else:
+            self.retried += 1
+        if authreq:
+            scheme = authreq.split()[0]
+            if scheme.lower() == 'digest':
+                return self.retry_http_digest_auth(req, authreq)
+
+    def retry_http_digest_auth(self, req, auth):
+        token, challenge = auth.split(' ', 1)
+        chal = parse_keqv_list(parse_http_list(challenge))
+        auth = self.get_authorization(req, chal)
+        if auth:
+            auth_val = 'Digest %s' % auth
+            if req.headers.get(self.auth_header, None) == auth_val:
+                return None
+            req.add_unredirected_header(self.auth_header, auth_val)
+            resp = self.parent.open(req, timeout=req.timeout)
+            return resp
+
+    def get_cnonce(self, nonce):
+        # The cnonce-value is an opaque
+        # quoted string value provided by the client and used by both client
+        # and server to avoid chosen plaintext attacks, to provide mutual
+        # authentication, and to provide some message integrity protection.
+        # This isn't a fabulous effort, but it's probably Good Enough.
+        dig = hashlib.sha1("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
+                                            randombytes(8))).hexdigest()
+        return dig[:16]
+
+    def get_authorization(self, req, chal):
+        try:
+            realm = chal['realm']
+            nonce = chal['nonce']
+            qop = chal.get('qop')
+            algorithm = chal.get('algorithm', 'MD5')
+            # mod_digest doesn't send an opaque, even though it isn't
+            # supposed to be optional
+            opaque = chal.get('opaque', None)
+        except KeyError:
+            return None
+
+        H, KD = self.get_algorithm_impls(algorithm)
+        if H is None:
+            return None
+
+        user, pw = self.passwd.find_user_password(realm, req.get_full_url())
+        if user is None:
+            return None
+
+        # XXX not implemented yet
+        if req.has_data():
+            entdig = self.get_entity_digest(req.get_data(), chal)
+        else:
+            entdig = None
+
+        A1 = "%s:%s:%s" % (user, realm, pw)
+        A2 = "%s:%s" % (req.get_method(),
+                        # XXX selector: what about proxies and full urls
+                        req.get_selector())
+        if qop == 'auth':
+            if nonce == self.last_nonce:
+                self.nonce_count += 1
+            else:
+                self.nonce_count = 1
+                self.last_nonce = nonce
+
+            ncvalue = '%08x' % self.nonce_count
+            cnonce = self.get_cnonce(nonce)
+            noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
+            respdig = KD(H(A1), noncebit)
+        elif qop is None:
+            respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
+        else:
+            # XXX handle auth-int.
+            raise URLError("qop '%s' is not supported." % qop)
+
+        # XXX should the partial digests be encoded too?
+
+        base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
+               'response="%s"' % (user, realm, nonce, req.get_selector(),
+                                  respdig)
+        if opaque:
+            base += ', opaque="%s"' % opaque
+        if entdig:
+            base += ', digest="%s"' % entdig
+        base += ', algorithm="%s"' % algorithm
+        if qop:
+            base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
+        return base
+
+    def get_algorithm_impls(self, algorithm):
+        # algorithm should be case-insensitive according to RFC2617
+        algorithm = algorithm.upper()
+        # lambdas assume digest modules are imported at the top level
+        if algorithm == 'MD5':
+            H = lambda x: hashlib.md5(x).hexdigest()
+        elif algorithm == 'SHA':
+            H = lambda x: hashlib.sha1(x).hexdigest()
+        # XXX MD5-sess
+        KD = lambda s, d: H("%s:%s" % (s, d))
+        return H, KD
+
+    def get_entity_digest(self, data, chal):
+        # XXX not implemented yet
+        return None
+
+
+class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+    """An authentication protocol defined by RFC 2069
+
+    Digest authentication improves on basic authentication because it
+    does not transmit passwords in the clear.
+    """
+
+    auth_header = 'Authorization'
+    handler_order = 490  # before Basic auth
+
+    def http_error_401(self, req, fp, code, msg, headers):
+        host = urlparse.urlparse(req.get_full_url())[1]
+        retry = self.http_error_auth_reqed('www-authenticate',
+                                           host, req, headers)
+        self.reset_retry_count()
+        return retry
+
+
+class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+
+    auth_header = 'Proxy-Authorization'
+    handler_order = 490  # before Basic auth
+
+    def http_error_407(self, req, fp, code, msg, headers):
+        host = req.get_host()
+        retry = self.http_error_auth_reqed('proxy-authenticate',
+                                           host, req, headers)
+        self.reset_retry_count()
+        return retry
+
+class AbstractHTTPHandler(BaseHandler):
+
+    def __init__(self, debuglevel=0):
+        self._debuglevel = debuglevel
+
+    def set_http_debuglevel(self, level):
+        self._debuglevel = level
+
+    def do_request_(self, request):
+        host = request.get_host()
+        if not host:
+            raise URLError('no host given')
+
+        if request.has_data():  # POST
+            data = request.get_data()
+            if not request.has_header('Content-type'):
+                request.add_unredirected_header(
+                    'Content-type',
+                    'application/x-www-form-urlencoded')
+            if not request.has_header('Content-length'):
+                request.add_unredirected_header(
+                    'Content-length', '%d' % len(data))
+
+        sel_host = host
+        if request.has_proxy():
+            scheme, sel = splittype(request.get_selector())
+            sel_host, sel_path = splithost(sel)
+
+        if not request.has_header('Host'):
+            request.add_unredirected_header('Host', sel_host)
+        for name, value in self.parent.addheaders:
+            name = name.capitalize()
+            if not request.has_header(name):
+                request.add_unredirected_header(name, value)
+
+        return request
+
+    def do_open(self, http_class, req):
+        """Return an addinfourl object for the request, using http_class.
+
+        http_class must implement the HTTPConnection API from httplib.
+        The addinfourl return value is a file-like object.  It also
+        has methods and attributes including:
+            - info(): return a mimetools.Message object for the headers
+            - geturl(): return the original request URL
+            - code: HTTP status code
+        """
+        host = req.get_host()
+        if not host:
+            raise URLError('no host given')
+
+        h = http_class(host, timeout=req.timeout) # will parse host:port
+        h.set_debuglevel(self._debuglevel)
+
+        headers = dict(req.unredirected_hdrs)
+        headers.update(dict((k, v) for k, v in req.headers.items()
+                            if k not in headers))
+
+        # We want to make an HTTP/1.1 request, but the addinfourl
+        # class isn't prepared to deal with a persistent connection.
+        # It will try to read all remaining data from the socket,
+        # which will block while the server waits for the next request.
+        # So make sure the connection gets closed after the (only)
+        # request.
+        headers["Connection"] = "close"
+        headers = dict(
+            (name.title(), val) for name, val in headers.items())
+
+        if req._tunnel_host:
+            tunnel_headers = {}
+            proxy_auth_hdr = "Proxy-Authorization"
+            if proxy_auth_hdr in headers:
+                tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
+                # Proxy-Authorization should not be sent to origin
+                # server.
+                del headers[proxy_auth_hdr]
+            h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
+
+        try:
+            h.request(req.get_method(), req.get_selector(), req.data, headers)
+            try:
+                r = h.getresponse(buffering=True)
+            except TypeError: #buffering kw not supported
+                r = h.getresponse()
+        except socket.error, err: # XXX what error?
+            h.close()
+            raise URLError(err)
+
+        # Pick apart the HTTPResponse object to get the addinfourl
+        # object initialized properly.
+
+        # Wrap the HTTPResponse object in socket's file object adapter
+        # for Windows.  That adapter calls recv(), so delegate recv()
+        # to read().  This weird wrapping allows the returned object to
+        # have readline() and readlines() methods.
+
+        # XXX It might be better to extract the read buffering code
+        # out of socket._fileobject() and into a base class.
+
+        r.recv = r.read
+        fp = socket._fileobject(r, close=True)
+
+        resp = addinfourl(fp, r.msg, req.get_full_url())
+        resp.code = r.status
+        resp.msg = r.reason
+        return resp
+
+
+class HTTPHandler(AbstractHTTPHandler):
+
+    def http_open(self, req):
+        return self.do_open(httplib.HTTPConnection, req)
+
+    http_request = AbstractHTTPHandler.do_request_
+
+if hasattr(httplib, 'HTTPS'):
+    class HTTPSHandler(AbstractHTTPHandler):
+
+        def https_open(self, req):
+            return self.do_open(httplib.HTTPSConnection, req)
+
+        https_request = AbstractHTTPHandler.do_request_
+
+class HTTPCookieProcessor(BaseHandler):
+    def __init__(self, cookiejar=None):
+        import cookielib
+        if cookiejar is None:
+            cookiejar = cookielib.CookieJar()
+        self.cookiejar = cookiejar
+
+    def http_request(self, request):
+        self.cookiejar.add_cookie_header(request)
+        return request
+
+    def http_response(self, request, response):
+        self.cookiejar.extract_cookies(response, request)
+        return response
+
+    https_request = http_request
+    https_response = http_response
+
+class UnknownHandler(BaseHandler):
+    def unknown_open(self, req):
+        type = req.get_type()
+        raise URLError('unknown url type: %s' % type)
+
+def parse_keqv_list(l):
+    """Parse list of key=value strings where keys are not duplicated."""
+    parsed = {}
+    for elt in l:
+        k, v = elt.split('=', 1)
+        if v[0] == '"' and v[-1] == '"':
+            v = v[1:-1]
+        parsed[k] = v
+    return parsed
+
+def parse_http_list(s):
+    """Parse lists as described by RFC 2068 Section 2.
+
+    In particular, parse comma-separated lists where the elements of
+    the list may include quoted-strings.  A quoted-string could
+    contain a comma.  A non-quoted string could have quotes in the
+    middle.  Neither commas nor quotes count if they are escaped.
+    Only double-quotes count, not single-quotes.
+    """
+    res = []
+    part = ''
+
+    escape = quote = False
+    for cur in s:
+        if escape:
+            part += cur
+            escape = False
+            continue
+        if quote:
+            if cur == '\\':
+                escape = True
+                continue
+            elif cur == '"':
+                quote = False
+            part += cur
+            continue
+
+        if cur == ',':
+            res.append(part)
+            part = ''
+            continue
+
+        if cur == '"':
+            quote = True
+
+        part += cur
+
+    # append last part
+    if part:
+        res.append(part)
+
+    return [part.strip() for part in res]
+
+def _safe_gethostbyname(host):
+    try:
+        return socket.gethostbyname(host)
+    except socket.gaierror:
+        return None
+
+class FileHandler(BaseHandler):
+    # Use local file or FTP depending on form of URL
+    def file_open(self, req):
+        url = req.get_selector()
+        if url[:2] == '//' and url[2:3] != '/' and (req.host and
+                req.host != 'localhost'):
+            req.type = 'ftp'
+            return self.parent.open(req)
+        else:
+            return self.open_local_file(req)
+
+    # names for the localhost
+    names = None
+    def get_names(self):
+        if FileHandler.names is None:
+            try:
+                FileHandler.names = tuple(
+                    socket.gethostbyname_ex('localhost')[2] +
+                    socket.gethostbyname_ex(socket.gethostname())[2])
+            except socket.gaierror:
+                FileHandler.names = (socket.gethostbyname('localhost'),)
+        return FileHandler.names
+
+    # not entirely sure what the rules are here
+    def open_local_file(self, req):
+        import email.utils
+        import mimetypes
+        host = req.get_host()
+        filename = req.get_selector()
+        localfile = url2pathname(filename)
+        try:
+            stats = os.stat(localfile)
+            size = stats.st_size
+            modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
+            mtype = mimetypes.guess_type(filename)[0]
+            headers = mimetools.Message(StringIO(
+                'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
+                (mtype or 'text/plain', size, modified)))
+            if host:
+                host, port = splitport(host)
+            if not host or \
+                (not port and _safe_gethostbyname(host) in self.get_names()):
+                if host:
+                    origurl = 'file://' + host + filename
+                else:
+                    origurl = 'file://' + filename
+                return addinfourl(open(localfile, 'rb'), headers, origurl)
+        except OSError, msg:
+            # urllib2 users shouldn't expect OSErrors coming from urlopen()
+            raise URLError(msg)
+        raise URLError('file not on local host')
+
+class FTPHandler(BaseHandler):
+    def ftp_open(self, req):
+        import ftplib
+        import mimetypes
+        host = req.get_host()
+        if not host:
+            raise URLError('ftp error: no host given')
+        host, port = splitport(host)
+        if port is None:
+            port = ftplib.FTP_PORT
+        else:
+            port = int(port)
+
+        # username/password handling
+        user, host = splituser(host)
+        if user:
+            user, passwd = splitpasswd(user)
+        else:
+            passwd = None
+        host = unquote(host)
+        user = user or ''
+        passwd = passwd or ''
+
+        try:
+            host = socket.gethostbyname(host)
+        except socket.error, msg:
+            raise URLError(msg)
+        path, attrs = splitattr(req.get_selector())
+        dirs = path.split('/')
+        dirs = map(unquote, dirs)
+        dirs, file = dirs[:-1], dirs[-1]
+        if dirs and not dirs[0]:
+            dirs = dirs[1:]
+        try:
+            fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
+            type = file and 'I' or 'D'
+            for attr in attrs:
+                attr, value = splitvalue(attr)
+                if attr.lower() == 'type' and \
+                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
+                    type = value.upper()
+            fp, retrlen = fw.retrfile(file, type)
+            headers = ""
+            mtype = mimetypes.guess_type(req.get_full_url())[0]
+            if mtype:
+                headers += "Content-type: %s\n" % mtype
+            if retrlen is not None and retrlen >= 0:
+                headers += "Content-length: %d\n" % retrlen
+            sf = StringIO(headers)
+            headers = mimetools.Message(sf)
+            return addinfourl(fp, headers, req.get_full_url())
+        except ftplib.all_errors, msg:
+            raise URLError, ('ftp error: %s' % msg), sys.exc_info()[2]
+
+    def connect_ftp(self, user, passwd, host, port, dirs, timeout):
+        fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
+##        fw.ftp.set_debuglevel(1)
+        return fw
+
+class CacheFTPHandler(FTPHandler):
+    # XXX would be nice to have pluggable cache strategies
+    # XXX this stuff is definitely not thread safe
+    def __init__(self):
+        self.cache = {}
+        self.timeout = {}
+        self.soonest = 0
+        self.delay = 60
+        self.max_conns = 16
+
+    def setTimeout(self, t):
+        self.delay = t
+
+    def setMaxConns(self, m):
+        self.max_conns = m
+
+    def connect_ftp(self, user, passwd, host, port, dirs, timeout):
+        key = user, host, port, '/'.join(dirs), timeout
+        if key in self.cache:
+            self.timeout[key] = time.time() + self.delay
+        else:
+            self.cache[key] = ftpwrapper(user, passwd, host, port, dirs, timeout)
+            self.timeout[key] = time.time() + self.delay
+        self.check_cache()
+        return self.cache[key]
+
+    def check_cache(self):
+        # first check for old ones
+        t = time.time()
+        if self.soonest <= t:
+            for k, v in self.timeout.items():
+                if v < t:
+                    self.cache[k].close()
+                    del self.cache[k]
+                    del self.timeout[k]
+        self.soonest = min(self.timeout.values())
+
+        # then check the size
+        if len(self.cache) == self.max_conns:
+            for k, v in self.timeout.items():
+                if v == self.soonest:
+                    del self.cache[k]
+                    del self.timeout[k]
+                    break
+            self.soonest = min(self.timeout.values())


More information about the pypy-commit mailing list