[Python-checkins] python/dist/src/Lib httplib.py,1.42.10.6,1.42.10.7

jhylton@users.sourceforge.net jhylton@users.sourceforge.net
Fri, 12 Jul 2002 07:23:45 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv18737

Modified Files:
      Tag: release22-maint
	httplib.py 
Log Message:
Backport changes.

Change _begin() back to begin().
Fix for SF bug 579107.
Fix for SF bug #432621: httplib: multiple Set-Cookie headers
Fix SF bug #575360
Handle HTTP/0.9 responses.


Index: httplib.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/httplib.py,v
retrieving revision 1.42.10.6
retrieving revision 1.42.10.7
diff -C2 -d -r1.42.10.6 -r1.42.10.7
*** httplib.py	2 Jul 2002 17:19:47 -0000	1.42.10.6
--- httplib.py	12 Jul 2002 14:23:38 -0000	1.42.10.7
***************
*** 94,102 ****
  _CS_REQ_SENT = 'Request-sent'
  
  
  class HTTPResponse:
!     def __init__(self, sock, debuglevel=0):
          self.fp = sock.makefile('rb', 0)
          self.debuglevel = debuglevel
  
          self.msg = None
--- 94,217 ----
  _CS_REQ_SENT = 'Request-sent'
  
+ class HTTPMessage(mimetools.Message):
+ 
+     def addheader(self, key, value):
+         """Add header for field key handling repeats."""
+         prev = self.dict.get(key)
+         if prev is None:
+             self.dict[key] = value
+         else:
+             combined = ", ".join((prev, value))
+             self.dict[key] = combined
+ 
+     def addcontinue(self, key, more):
+         """Add more field data from a continuation line."""
+         prev = self.dict[key]
+         self.dict[key] = prev + "\n " + more
+ 
+     def readheaders(self):
+         """Read header lines.
+ 
+         Read header lines up to the entirely blank line that terminates them.
+         The (normally blank) line that ends the headers is skipped, but not
+         included in the returned list.  If a non-header line ends the headers,
+         (which is an error), an attempt is made to backspace over it; it is
+         never included in the returned list.
+ 
+         The variable self.status is set to the empty string if all went well,
+         otherwise it is an error message.  The variable self.headers is a
+         completely uninterpreted list of lines contained in the header (so
+         printing them will reproduce the header exactly as it appears in the
+         file).
+ 
+         If multiple header fields with the same name occur, they are combined
+         according to the rules in RFC 2616 sec 4.2:
+ 
+         Appending each subsequent field-value to the first, each separated
+         by a comma. The order in which header fields with the same field-name
+         are received is significant to the interpretation of the combined
+         field value.
+         """
+         # XXX The implementation overrides the readheaders() method of
+         # rfc822.Message.  The base class design isn't amenable to
+         # customized behavior here so the method here is a copy of the
+         # base class code with a few small changes.
+ 
+         self.dict = {}
+         self.unixfrom = ''
+         self.headers = list = []
+         self.status = ''
+         headerseen = ""
+         firstline = 1
+         startofline = unread = tell = None
+         if hasattr(self.fp, 'unread'):
+             unread = self.fp.unread
+         elif self.seekable:
+             tell = self.fp.tell
+         while 1:
+             if tell:
+                 try:
+                     startofline = tell()
+                 except IOError:
+                     startofline = tell = None
+                     self.seekable = 0
+             line = self.fp.readline()
+             if not line:
+                 self.status = 'EOF in headers'
+                 break
+             # Skip unix From name time lines
+             if firstline and line.startswith('From '):
+                 self.unixfrom = self.unixfrom + line
+                 continue
+             firstline = 0
+             if headerseen and line[0] in ' \t':
+                 # XXX Not sure if continuation lines are handled properly
+                 # for http and/or for repeating headers
+                 # It's a continuation line.
+                 list.append(line)
+                 x = self.dict[headerseen] + "\n " + line.strip()
+                 self.addcontinue(headerseen, line.strip())
+                 continue
+             elif self.iscomment(line):
+                 # It's a comment.  Ignore it.
+                 continue
+             elif self.islast(line):
+                 # Note! No pushback here!  The delimiter line gets eaten.
+                 break
+             headerseen = self.isheader(line)
+             if headerseen:
+                 # It's a legal header line, save it.
+                 list.append(line)
+                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
+                 continue
+             else:
+                 # It's not a header line; throw it back and stop here.
+                 if not self.dict:
+                     self.status = 'No headers'
+                 else:
+                     self.status = 'Non-header line where header expected'
+                 # Try to undo the read.
+                 if unread:
+                     unread(line)
+                 elif tell:
+                     self.fp.seek(startofline)
+                 else:
+                     self.status = self.status + '; bad seek'
+                 break
  
  class HTTPResponse:
! 
!     # strict: If true, raise BadStatusLine if the status line can't be
!     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
!     # false because it prvents clients from talking to HTTP/0.9
!     # servers.  Note that a response with a sufficiently corrupted
!     # status line will look like an HTTP/0.9 response.
! 
!     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
! 
!     def __init__(self, sock, debuglevel=0, strict=0):
          self.fp = sock.makefile('rb', 0)
          self.debuglevel = debuglevel
+         self.strict = strict
  
          self.msg = None
***************
*** 113,116 ****
--- 228,232 ----
  
      def _read_status(self):
+         # Initialize with Simple-Response defaults
          line = self.fp.readline()
          if self.debuglevel > 0:
***************
*** 123,132 ****
                  reason = ""
              except ValueError:
!                 version = "HTTP/0.9"
!                 status = "200"
!                 reason = ""
!         if version[:5] != 'HTTP/':
!             self.close()
!             raise BadStatusLine(line)
  
          # The status code is a three-digit number
--- 239,253 ----
                  reason = ""
              except ValueError:
!                 # empty version will cause next test to fail and status
!                 # will be treated as 0.9 response.
!                 version = ""
!         if not version.startswith('HTTP/'):
!             if self.strict:
!                 self.close()
!                 raise BadStatusLine(line)
!             else:
!                 # assume it's a Simple-Response from an 0.9 server
!                 self.fp = LineAndFileWrapper(line, self.fp)
!                 return "HTTP/0.9", 200, ""
  
          # The status code is a three-digit number
***************
*** 139,143 ****
          return version, status, reason
  
!     def _begin(self):
          if self.msg is not None:
              # we've already started reading the response
--- 260,264 ----
          return version, status, reason
  
!     def begin(self):
          if self.msg is not None:
              # we've already started reading the response
***************
*** 170,177 ****
          if self.version == 9:
              self.chunked = 0
!             self.msg = mimetools.Message(StringIO())
              return
  
!         self.msg = mimetools.Message(self.fp, 0)
          if self.debuglevel > 0:
              for hdr in self.msg.headers:
--- 291,299 ----
          if self.version == 9:
              self.chunked = 0
!             self.will_close = 1
!             self.msg = HTTPMessage(StringIO())
              return
  
!         self.msg = HTTPMessage(self.fp, 0)
          if self.debuglevel > 0:
              for hdr in self.msg.headers:
***************
*** 354,364 ****
      auto_open = 1
      debuglevel = 0
  
!     def __init__(self, host, port=None):
          self.sock = None
          self.__response = None
          self.__state = _CS_IDLE
! 
          self._set_hostport(host, port)
  
      def _set_hostport(self, host, port):
--- 476,489 ----
      auto_open = 1
      debuglevel = 0
+     strict = 0
  
!     def __init__(self, host, port=None, strict=None):
          self.sock = None
          self.__response = None
          self.__state = _CS_IDLE
!         
          self._set_hostport(host, port)
+         if strict is not None:
+             self.strict = strict
  
      def _set_hostport(self, host, port):
***************
*** 369,373 ****
                      port = int(host[i+1:])
                  except ValueError:
!                     raise InvalidURL, "nonnumeric port: '%s'"%host[i+1:]
                  host = host[:i]
              else:
--- 494,498 ----
                      port = int(host[i+1:])
                  except ValueError:
!                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
                  host = host[:i]
              else:
***************
*** 611,619 ****
  
          if self.debuglevel > 0:
!             response = self.response_class(self.sock, self.debuglevel)
          else:
!             response = self.response_class(self.sock)
  
!         response._begin()
          assert response.will_close != _UNKNOWN
          self.__state = _CS_IDLE
--- 736,745 ----
  
          if self.debuglevel > 0:
!             response = self.response_class(self.sock, self.debuglevel,
!                                            strict=self.strict)
          else:
!             response = self.response_class(self.sock, strict=self.strict)
  
!         response.begin()
          assert response.will_close != _UNKNOWN
          self.__state = _CS_IDLE
***************
*** 628,632 ****
          return response
  
! class SSLFile:
      """File-like object wrapping an SSL socket."""
  
--- 754,804 ----
          return response
  
! # The next several classes are used to define FakeSocket,a socket-like
! # interface to an SSL connection.
! 
! # The primary complexity comes from faking a makefile() method.  The
! # standard socket makefile() implementation calls dup() on the socket
! # file descriptor.  As a consequence, clients can call close() on the
! # parent socket and its makefile children in any order.  The underlying
! # socket isn't closed until they are all closed.
! 
! # The implementation uses reference counting to keep the socket open
! # until the last client calls close().  SharedSocket keeps track of
! # the reference counting and SharedSocketClient provides an constructor
! # and close() method that call incref() and decref() correctly.
! 
! class SharedSocket:
! 
!     def __init__(self, sock):
!         self.sock = sock
!         self._refcnt = 0
! 
!     def incref(self):
!         self._refcnt += 1
! 
!     def decref(self):
!         self._refcnt -= 1
!         assert self._refcnt >= 0
!         if self._refcnt == 0:
!             self.sock.close()
! 
!     def __del__(self):
!         self.sock.close()
! 
! class SharedSocketClient:
! 
!     def __init__(self, shared):
!         self._closed = 0
!         self._shared = shared
!         self._shared.incref()
!         self._sock = shared.sock
! 
!     def close(self):
!         if not self._closed:
!             self._shared.decref()
!             self._closed = 1
!             self._shared = None
! 
! class SSLFile(SharedSocketClient):
      """File-like object wrapping an SSL socket."""
  
***************
*** 634,638 ****
      
      def __init__(self, sock, ssl, bufsize=None):
!         self._sock = sock
          self._ssl = ssl
          self._buf = ''
--- 806,810 ----
      
      def __init__(self, sock, ssl, bufsize=None):
!         SharedSocketClient.__init__(self, sock)
          self._ssl = ssl
          self._buf = ''
***************
*** 703,730 ****
              return line
  
!     def close(self):
!         self._sock.close()
  
- class FakeSocket:
      def __init__(self, sock, ssl):
!         self.__sock = sock
!         self.__ssl = ssl
  
      def makefile(self, mode, bufsize=None):
          if mode != 'r' and mode != 'rb':
              raise UnimplementedFileMode()
!         return SSLFile(self.__sock, self.__ssl, bufsize)
  
      def send(self, stuff, flags = 0):
!         return self.__ssl.write(stuff)
  
!     def sendall(self, stuff, flags = 0):
!         return self.__ssl.write(stuff)
  
      def recv(self, len = 1024, flags = 0):
!         return self.__ssl.read(len)
  
      def __getattr__(self, attr):
!         return getattr(self.__sock, attr)
  
  
--- 875,908 ----
              return line
  
! class FakeSocket(SharedSocketClient):
! 
!     class _closedsocket:
!         def __getattr__(self, name):
!             raise error(9, 'Bad file descriptor')
  
      def __init__(self, sock, ssl):
!         sock = SharedSocket(sock)
!         SharedSocketClient.__init__(self, sock)
!         self._ssl = ssl
! 
!     def close(self):
!         SharedSocketClient.close(self)
!         self._sock = self.__class__._closedsocket()
  
      def makefile(self, mode, bufsize=None):
          if mode != 'r' and mode != 'rb':
              raise UnimplementedFileMode()
!         return SSLFile(self._shared, self._ssl, bufsize)
  
      def send(self, stuff, flags = 0):
!         return self._ssl.write(stuff)
  
!     sendall = send
  
      def recv(self, len = 1024, flags = 0):
!         return self._ssl.read(len)
  
      def __getattr__(self, attr):
!         return getattr(self._sock, attr)
  
  
***************
*** 734,739 ****
      default_port = HTTPS_PORT
  
!     def __init__(self, host, port=None, key_file=None, cert_file=None):
!         HTTPConnection.__init__(self, host, port)
          self.key_file = key_file
          self.cert_file = cert_file
--- 912,918 ----
      default_port = HTTPS_PORT
  
!     def __init__(self, host, port=None, key_file=None, cert_file=None,
!                  strict=None):
!         HTTPConnection.__init__(self, host, port, strict)
          self.key_file = key_file
          self.cert_file = cert_file
***************
*** 761,765 ****
      _connection_class = HTTPConnection
  
!     def __init__(self, host='', port=None):
          "Provide a default host, since the superclass requires one."
  
--- 940,944 ----
      _connection_class = HTTPConnection
  
!     def __init__(self, host='', port=None, strict=None):
          "Provide a default host, since the superclass requires one."
  
***************
*** 771,775 ****
          # an error when we attempt to connect. Presumably, the client code
          # will call connect before then, with a proper host.
!         self._setup(self._connection_class(host, port))
  
      def _setup(self, conn):
--- 950,954 ----
          # an error when we attempt to connect. Presumably, the client code
          # will call connect before then, with a proper host.
!         self._setup(self._connection_class(host, port, strict))
  
      def _setup(self, conn):
***************
*** 851,855 ****
          _connection_class = HTTPSConnection
  
!         def __init__(self, host='', port=None, **x509):
              # provide a default host, pass the X509 cert info
  
--- 1030,1035 ----
          _connection_class = HTTPSConnection
  
!         def __init__(self, host='', port=None, key_file=None, cert_file=None,
!                      strict=None):
              # provide a default host, pass the X509 cert info
  
***************
*** 857,869 ****
              if port == 0:
                  port = None
!             self._setup(self._connection_class(host, port, **x509))
  
              # we never actually use these for anything, but we keep them
              # here for compatibility with post-1.5.2 CVS.
!             self.key_file = x509.get('key_file')
!             self.cert_file = x509.get('cert_file')
  
  
  class HTTPException(Exception):
      pass
  
--- 1037,1052 ----
              if port == 0:
                  port = None
!             self._setup(self._connection_class(host, port, key_file,
!                                                cert_file, strict))
  
              # we never actually use these for anything, but we keep them
              # here for compatibility with post-1.5.2 CVS.
!             self.key_file = key_file
!             self.cert_file = cert_file
  
  
  class HTTPException(Exception):
+     # Subclasses that define an __init__ must call Exception.__init__
+     # or define self.args.  Otherwise, str() will fail.
      pass
  
***************
*** 876,879 ****
--- 1059,1063 ----
  class UnknownProtocol(HTTPException):
      def __init__(self, version):
+         self.args = version,
          self.version = version
  
***************
*** 886,889 ****
--- 1070,1074 ----
  class IncompleteRead(HTTPException):
      def __init__(self, partial):
+         self.args = partial,
          self.partial = partial
  
***************
*** 902,905 ****
--- 1087,1091 ----
  class BadStatusLine(HTTPException):
      def __init__(self, line):
+         self.args = line,
          self.line = line
  
***************
*** 907,920 ****
  error = HTTPException
  
  
- #
- # snarfed from httplib.py for now...
- #
  def test():
      """Test this module.
  
!     The test consists of retrieving and displaying the Python
!     home page, along with the error code and error string returned
!     by the www.python.org server.
      """
  
--- 1093,1161 ----
  error = HTTPException
  
+ class LineAndFileWrapper:
+     """A limited file-like object for HTTP/0.9 responses."""
+ 
+     # The status-line parsing code calls readline(), which normally
+     # get the HTTP status line.  For a 0.9 response, however, this is
+     # actually the first line of the body!  Clients need to get a
+     # readable file object that contains that line.
+ 
+     def __init__(self, line, file):
+         self._line = line
+         self._file = file
+         self._line_consumed = 0
+         self._line_offset = 0
+         self._line_left = len(line)
+ 
+     def __getattr__(self, attr):
+         return getattr(self._file, attr)
+ 
+     def _done(self):
+         # called when the last byte is read from the line.  After the
+         # call, all read methods are delegated to the underlying file
+         # obhect.
+         self._line_consumed = 1
+         self.read = self._file.read
+         self.readline = self._file.readline
+         self.readlines = self._file.readlines
+ 
+     def read(self, amt=None):
+         assert not self._line_consumed and self._line_left
+         if amt is None or amt > self._line_left:
+             s = self._line[self._line_offset:]
+             self._done()
+             if amt is None:
+                 return s + self._file.read()
+             else:
+                 return s + self._file.read(amt - len(s))                
+         else:
+             assert amt <= self._line_left
+             i = self._line_offset
+             j = i + amt
+             s = self._line[i:j]
+             self._line_offset = j
+             self._line_left -= amt
+             if self._line_left == 0:
+                 self._done()
+             return s
+         
+     def readline(self):
+         s = self._line[self._line_offset:]
+         self._done()
+         return s
+ 
+     def readlines(self, size=None):
+         L = [self._line[self._line_offset:]]
+         self._done()
+         if size is None:
+             return L + self._file.readlines()
+         else:
+             return L + self._file.readlines(size)
  
  def test():
      """Test this module.
  
!     A hodge podge of tests collected here, because they have too many
!     external dependencies for the regular test suite.
      """
  
***************
*** 937,945 ****
      print 'status =', status
      print 'reason =', reason
      print
      if headers:
          for header in headers.headers: print header.strip()
      print
-     print "read", len(h.getfile().read())
  
      # minimal test that code to extract host from url works
--- 1178,1186 ----
      print 'status =', status
      print 'reason =', reason
+     print "read", len(h.getfile().read())
      print
      if headers:
          for header in headers.headers: print header.strip()
      print
  
      # minimal test that code to extract host from url works
***************
*** 955,974 ****
  
      if hasattr(socket, 'ssl'):
!         host = 'sourceforge.net'
!         selector = '/projects/python'
!         hs = HTTPS()
!         hs.connect(host)
!         hs.putrequest('GET', selector)
!         hs.endheaders()
!         status, reason, headers = hs.getreply()
!         # XXX why does this give a 302 response?
!         print 'status =', status
!         print 'reason =', reason
!         print
!         if headers:
!             for header in headers.headers: print header.strip()
!         print
!         print "read", len(hs.getfile().read())
  
  
  if __name__ == '__main__':
--- 1196,1250 ----
  
      if hasattr(socket, 'ssl'):
!         
!         for host, selector in (('sourceforge.net', '/projects/python'),
!                                ('dbserv2.theopalgroup.com', '/mediumfile'),
!                                ('dbserv2.theopalgroup.com', '/smallfile'),
!                                ):
!             print "https://%s%s" % (host, selector)
!             hs = HTTPS()
!             hs.connect(host)
!             hs.putrequest('GET', selector)
!             hs.endheaders()
!             status, reason, headers = hs.getreply()
!             print 'status =', status
!             print 'reason =', reason
!             print "read", len(hs.getfile().read())
!             print
!             if headers:
!                 for header in headers.headers: print header.strip()
!             print
! 
!     return
  
+     # Test a buggy server -- returns garbled status line.
+     # http://www.yahoo.com/promotions/mom_com97/supermom.html
+     c = HTTPConnection("promotions.yahoo.com")
+     c.set_debuglevel(1)
+     c.connect()
+     c.request("GET", "/promotions/mom_com97/supermom.html")
+     r = c.getresponse()
+     print r.status, r.version
+     lines = r.read().split("\n")
+     print "\n".join(lines[:5])
+ 
+     c = HTTPConnection("promotions.yahoo.com", strict=1)
+     c.set_debuglevel(1)
+     c.connect()
+     c.request("GET", "/promotions/mom_com97/supermom.html")
+     try:
+         r = c.getresponse()
+     except BadStatusLine, err:
+         print "strict mode failed as expected"
+         print err
+     else:
+         print "XXX strict mode should have failed"
+ 
+     for strict in 0, 1:
+         h = HTTP(strict=strict)
+         h.connect("promotions.yahoo.com")
+         h.putrequest('GET', "/promotions/mom_com97/supermom.html")
+         h.endheaders()
+         status, reason, headers = h.getreply()
+         assert (strict and status == -1) or status == 200, (strict, status)
  
  if __name__ == '__main__':