[Python-checkins] python/dist/src/Lib httplib.py,1.54,1.55

jhylton@users.sourceforge.net jhylton@users.sourceforge.net
Sat, 06 Jul 2002 11:48:10 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv2953

Modified Files:
	httplib.py 
Log Message:
Handle HTTP/0.9 responses.

Section 19.6 of RFC 2616 (HTTP/1.1):

   It is beyond the scope of a protocol specification to mandate
   compliance with previous versions. HTTP/1.1 was deliberately
   designed, however, to make supporting previous versions easy....

   And we would expect HTTP/1.1 clients to:

      - recognize the format of the Status-Line for HTTP/1.0 and 1.1
        responses;

      - understand any valid response in the format of HTTP/0.9, 1.0, or
        1.1.

The changes to the code do handle response in the format of HTTP/0.9.
Some users may consider this a bug because all responses with a
sufficiently corrupted status line will look like an HTTP/0.9
response.  These users can pass strict=1 to the HTTP constructors to
get a BadStatusLine exception instead.

While this is a new feature of sorts, it enhances the robustness of
the code (be tolerant in what you accept).  Thus, I consider it a bug
fix candidate.

XXX strict needs to be documented.


Index: httplib.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/httplib.py,v
retrieving revision 1.54
retrieving revision 1.55
diff -C2 -d -r1.54 -r1.55
*** httplib.py	2 Jul 2002 20:19:08 -0000	1.54
--- httplib.py	6 Jul 2002 18:48:07 -0000	1.55
***************
*** 96,102 ****
  
  class HTTPResponse:
!     def __init__(self, sock, debuglevel=0):
          self.fp = sock.makefile('rb', 0)
          self.debuglevel = debuglevel
  
          self.msg = None
--- 96,112 ----
  
  class HTTPResponse:
! 
!     # strict: If true, raise BadStatusLine if the status line can't be
!     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
!     # false because it prvents clients from talking to HTTP/0.9
!     # servers.  Note that a response with a sufficiently corrupted
!     # status line will look like an HTTP/0.9 response.
! 
!     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
! 
!     def __init__(self, sock, debuglevel=0, strict=0):
          self.fp = sock.makefile('rb', 0)
          self.debuglevel = debuglevel
+         self.strict = strict
  
          self.msg = None
***************
*** 113,116 ****
--- 123,127 ----
  
      def _read_status(self):
+         # Initialize with Simple-Response defaults
          line = self.fp.readline()
          if self.debuglevel > 0:
***************
*** 123,132 ****
                  reason = ""
              except ValueError:
!                 version = "HTTP/0.9"
!                 status = "200"
!                 reason = ""
!         if version[:5] != 'HTTP/':
!             self.close()
!             raise BadStatusLine(line)
  
          # The status code is a three-digit number
--- 134,148 ----
                  reason = ""
              except ValueError:
!                 # empty version will cause next test to fail and status
!                 # will be treated as 0.9 response.
!                 version = ""
!         if not version.startswith('HTTP/'):
!             if self.strict:
!                 self.close()
!                 raise BadStatusLine(line)
!             else:
!                 # assume it's a Simple-Response from an 0.9 server
!                 self.fp = LineAndFileWrapper(line, self.fp)
!                 return "HTTP/0.9", 200, ""
  
          # The status code is a three-digit number
***************
*** 170,173 ****
--- 186,190 ----
          if self.version == 9:
              self.chunked = 0
+             self.will_close = 1
              self.msg = mimetools.Message(StringIO())
              return
***************
*** 354,364 ****
      auto_open = 1
      debuglevel = 0
  
!     def __init__(self, host, port=None):
          self.sock = None
          self.__response = None
          self.__state = _CS_IDLE
! 
          self._set_hostport(host, port)
  
      def _set_hostport(self, host, port):
--- 371,384 ----
      auto_open = 1
      debuglevel = 0
+     strict = 0
  
!     def __init__(self, host, port=None, strict=None):
          self.sock = None
          self.__response = None
          self.__state = _CS_IDLE
!         
          self._set_hostport(host, port)
+         if strict is not None:
+             self.strict = strict
  
      def _set_hostport(self, host, port):
***************
*** 611,617 ****
  
          if self.debuglevel > 0:
!             response = self.response_class(self.sock, self.debuglevel)
          else:
!             response = self.response_class(self.sock)
  
          response._begin()
--- 631,638 ----
  
          if self.debuglevel > 0:
!             response = self.response_class(self.sock, self.debuglevel,
!                                            strict=self.strict)
          else:
!             response = self.response_class(self.sock, strict=self.strict)
  
          response._begin()
***************
*** 734,739 ****
      default_port = HTTPS_PORT
  
!     def __init__(self, host, port=None, key_file=None, cert_file=None):
!         HTTPConnection.__init__(self, host, port)
          self.key_file = key_file
          self.cert_file = cert_file
--- 755,761 ----
      default_port = HTTPS_PORT
  
!     def __init__(self, host, port=None, key_file=None, cert_file=None,
!                  strict=None):
!         HTTPConnection.__init__(self, host, port, strict)
          self.key_file = key_file
          self.cert_file = cert_file
***************
*** 761,765 ****
      _connection_class = HTTPConnection
  
!     def __init__(self, host='', port=None):
          "Provide a default host, since the superclass requires one."
  
--- 783,787 ----
      _connection_class = HTTPConnection
  
!     def __init__(self, host='', port=None, strict=None):
          "Provide a default host, since the superclass requires one."
  
***************
*** 771,775 ****
          # an error when we attempt to connect. Presumably, the client code
          # will call connect before then, with a proper host.
!         self._setup(self._connection_class(host, port))
  
      def _setup(self, conn):
--- 793,797 ----
          # an error when we attempt to connect. Presumably, the client code
          # will call connect before then, with a proper host.
!         self._setup(self._connection_class(host, port, strict))
  
      def _setup(self, conn):
***************
*** 851,855 ****
          _connection_class = HTTPSConnection
  
!         def __init__(self, host='', port=None, **x509):
              # provide a default host, pass the X509 cert info
  
--- 873,878 ----
          _connection_class = HTTPSConnection
  
!         def __init__(self, host='', port=None, key_file=None, cert_file=None,
!                      strict=None):
              # provide a default host, pass the X509 cert info
  
***************
*** 857,866 ****
              if port == 0:
                  port = None
!             self._setup(self._connection_class(host, port, **x509))
  
              # we never actually use these for anything, but we keep them
              # here for compatibility with post-1.5.2 CVS.
!             self.key_file = x509.get('key_file')
!             self.cert_file = x509.get('cert_file')
  
  
--- 880,890 ----
              if port == 0:
                  port = None
!             self._setup(self._connection_class(host, port, key_file,
!                                                cert_file, strict))
  
              # we never actually use these for anything, but we keep them
              # here for compatibility with post-1.5.2 CVS.
!             self.key_file = key_file
!             self.cert_file = cert_file
  
  
***************
*** 907,910 ****
--- 931,993 ----
  error = HTTPException
  
+ class LineAndFileWrapper:
+     """A limited file-like object for HTTP/0.9 responses."""
+ 
+     # The status-line parsing code calls readline(), which normally
+     # get the HTTP status line.  For a 0.9 response, however, this is
+     # actually the first line of the body!  Clients need to get a
+     # readable file object that contains that line.
+ 
+     def __init__(self, line, file):
+         self._line = line
+         self._file = file
+         self._line_consumed = 0
+         self._line_offset = 0
+         self._line_left = len(line)
+ 
+     def __getattr__(self, attr):
+         return getattr(self._file, attr)
+ 
+     def _done(self):
+         # called when the last byte is read from the line.  After the
+         # call, all read methods are delegated to the underlying file
+         # obhect.
+         self._line_consumed = 1
+         self.read = self._file.read
+         self.readline = self._file.readline
+         self.readlines = self._file.readlines
+ 
+     def read(self, amt=None):
+         assert not self._line_consumed and self._line_left
+         if amt is None or amt > self._line_left:
+             s = self._line[self._line_offset:]
+             self._done()
+             if amt is None:
+                 return s + self._file.read()
+             else:
+                 return s + self._file.read(amt - len(s))                
+         else:
+             assert amt <= self._line_left
+             i = self._line_offset
+             j = i + amt
+             s = self._line[i:j]
+             self._line_offset = j
+             self._line_left -= amt
+             if self._line_left == 0:
+                 self._done()
+             return s
+         
+     def readline(self):
+         s = self._line[self._line_offset:]
+         self._done()
+         return s
+ 
+     def readlines(self, size=None):
+         L = [self._line[self._line_offset:]]
+         self._done()
+         if size is None:
+             return L + self._file.readlines()
+         else:
+             return L + self._file.readlines(size)
  
  #
***************
*** 971,974 ****
--- 1054,1087 ----
          print "read", len(hs.getfile().read())
  
+ 
+     # Test a buggy server -- returns garbled status line.
+     # http://www.yahoo.com/promotions/mom_com97/supermom.html
+     c = HTTPConnection("promotions.yahoo.com")
+     c.set_debuglevel(1)
+     c.connect()
+     c.request("GET", "/promotions/mom_com97/supermom.html")
+     r = c.getresponse()
+     print r.status, r.version
+     lines = r.read().split("\n")
+     print "\n".join(lines[:5])
+ 
+     c = HTTPConnection("promotions.yahoo.com", strict=1)
+     c.set_debuglevel(1)
+     c.connect()
+     c.request("GET", "/promotions/mom_com97/supermom.html")
+     try:
+         r = c.getresponse()
+     except BadStatusLine, err:
+         print "strict mode failed as expected"
+     else:
+         print "XXX strict mode should have failed"
+ 
+     for strict in 0, 1:
+         h = HTTP(strict=strict)
+         h.connect("promotions.yahoo.com")
+         h.putrequest('GET', "/promotions/mom_com97/supermom.html")
+         h.endheaders()
+         status, reason, headers = h.getreply()
+         assert (strict and status == -1) or status == 200, (strict, status)
  
  if __name__ == '__main__':