[Python-checkins] python/dist/src/Lib urllib2.py,1.56,1.57

jhylton at users.sourceforge.net jhylton at users.sourceforge.net
Sun Dec 14 00:27:36 EST 2003


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv20853/Lib

Modified Files:
	urllib2.py 
Log Message:
SF patch 852995: add processors feature to urllib2

John J. Lee writes: "the patch makes it possible to implement
functionality like HTTP cookie handling, Refresh handling,
etc. etc. using handler objects. At the moment urllib2's handler
objects aren't quite up to the job, which results in a lot of
cut-n-paste and subclassing. I believe the changes are
backwards-compatible, with the exception of people who've
reimplemented build_opener()'s functionality -- those people would
need to call opener.add_handler(HTTPErrorProcessor).

The main change is allowing handlers to implement
methods like:

http_request(request)
http_response(request, response)

In addition to the usual

http_open(request)
http_error{_*}(...) 
"

Note that the change isn't well documented at least in part because
handlers aren't well documented at all.  Need to fix this.

Add a bunch of new tests.  It appears that none of these tests
actually use the network, so they don't need to be guarded by a
resource flag.


Index: urllib2.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/urllib2.py,v
retrieving revision 1.56
retrieving revision 1.57
diff -C2 -d -r1.56 -r1.57
*** urllib2.py	21 Oct 2003 18:07:07 -0000	1.56
--- urllib2.py	14 Dec 2003 05:27:34 -0000	1.57
***************
*** 106,109 ****
--- 106,110 ----
  import time
  import urlparse
+ import bisect
  
  try:
***************
*** 193,196 ****
--- 194,198 ----
          for key, value in headers.items():
              self.add_header(key, value)
+         self.unredirected_hdrs = {}
  
      def __getattr__(self, attr):
***************
*** 249,252 ****
--- 251,263 ----
          self.headers[key.capitalize()] = val
  
+     def add_unredirected_header(self, key, val):
+         # will not be added to a redirected request
+         self.unredirected_hdrs[key.capitalize()] = val
+ 
+     def has_header(self, header_name):
+         return bool(header_name in self.headers or
+                     header_name in self.unredirected_hdrs)
+ 
+ 
  class OpenerDirector:
      def __init__(self):
***************
*** 257,277 ****
          self.handle_open = {}
          self.handle_error = {}
  
      def add_handler(self, handler):
!         added = 0
          for meth in dir(handler):
!             if meth[-5:] == '_open':
!                 protocol = meth[:-5]
!                 if protocol in self.handle_open:
!                     self.handle_open[protocol].append(handler)
!                     self.handle_open[protocol].sort()
!                 else:
!                     self.handle_open[protocol] = [handler]
!                 added = 1
!                 continue
!             i = meth.find('_')
!             j = meth[i+1:].find('_') + i + 1
!             if j != -1 and meth[i+1:j] == 'error':
!                 proto = meth[:i]
                  kind = meth[j+1:]
                  try:
--- 268,283 ----
          self.handle_open = {}
          self.handle_error = {}
+         self.process_response = {}
+         self.process_request = {}
  
      def add_handler(self, handler):
!         added = False
          for meth in dir(handler):
!             i = meth.find("_")
!             protocol = meth[:i]
!             condition = meth[i+1:]
! 
!             if condition.startswith("error"):
!                 j = meth[i+1:].find("_") + i + 1
                  kind = meth[j+1:]
                  try:
***************
*** 279,294 ****
                  except ValueError:
                      pass
!                 dict = self.handle_error.get(proto, {})
!                 if kind in dict:
!                     dict[kind].append(handler)
!                     dict[kind].sort()
!                 else:
!                     dict[kind] = [handler]
!                 self.handle_error[proto] = dict
!                 added = 1
                  continue
          if added:
!             self.handlers.append(handler)
!             self.handlers.sort()
              handler.add_parent(self)
  
--- 285,309 ----
                  except ValueError:
                      pass
!                 lookup = self.handle_error.get(protocol, {})
!                 self.handle_error[protocol] = lookup
!             elif condition == "open":
!                 kind = protocol
!                 lookup = getattr(self, "handle_"+condition)
!             elif condition in ["response", "request"]:
!                 kind = protocol
!                 lookup = getattr(self, "process_"+condition)
!             else:
                  continue
+ 
+             handlers = lookup.setdefault(kind, [])
+             if handlers:
+                 bisect.insort(handlers, handler)
+             else:
+                 handlers.append(handler)
+             added = True
+ 
          if added:
!             # XXX why does self.handlers need to be sorted?
!             bisect.insort(self.handlers, handler)
              handler.add_parent(self)
  
***************
*** 321,324 ****
--- 336,358 ----
                  req.add_data(data)
  
+         protocol = req.get_type()
+ 
+         # pre-process request
+         meth_name = protocol+"_request"
+         for processor in self.process_request.get(protocol, []):
+             meth = getattr(processor, meth_name)
+             req = meth(req)
+ 
+         response = self._open(req, data)
+ 
+         # post-process response
+         meth_name = protocol+"_response"
+         for processor in self.process_response.get(protocol, []):
+             meth = getattr(processor, meth_name)
+             response = meth(req, response)
+ 
+         return response
+ 
+     def _open(self, req, data=None):
          result = self._call_chain(self.handle_open, 'default',
                                    'default_open', req)
***************
*** 326,331 ****
              return result
  
!         type_ = req.get_type()
!         result = self._call_chain(self.handle_open, type_, type_ + \
                                    '_open', req)
          if result:
--- 360,365 ----
              return result
  
!         protocol = req.get_type()
!         result = self._call_chain(self.handle_open, protocol, protocol +
                                    '_open', req)
          if result:
***************
*** 340,344 ****
              dict = self.handle_error['http'] # https is not different than http
              proto = args[2]  # YUCK!
!             meth_name = 'http_error_%d' % proto
              http_err = 1
              orig_args = args
--- 374,378 ----
              dict = self.handle_error['http'] # https is not different than http
              proto = args[2]  # YUCK!
!             meth_name = 'http_error_%s' % proto
              http_err = 1
              orig_args = args
***************
*** 373,377 ****
      default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
                         HTTPDefaultErrorHandler, HTTPRedirectHandler,
!                        FTPHandler, FileHandler]
      if hasattr(httplib, 'HTTPS'):
          default_classes.append(HTTPSHandler)
--- 407,411 ----
      default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
                         HTTPDefaultErrorHandler, HTTPRedirectHandler,
!                        FTPHandler, FileHandler, HTTPErrorProcessor]
      if hasattr(httplib, 'HTTPS'):
          default_classes.append(HTTPSHandler)
***************
*** 401,406 ****
--- 435,442 ----
      def add_parent(self, parent):
          self.parent = parent
+         
      def close(self):
          self.parent = None
+         
      def __lt__(self, other):
          if not hasattr(other, "handler_order"):
***************
*** 412,415 ****
--- 448,466 ----
  
  
+ class HTTPErrorProcessor(BaseHandler):
+     """Process HTTP error responses."""
+     handler_order = 1000  # after all other processing
+ 
+     def http_response(self, request, response):
+         code, msg, hdrs = response.code, response.msg, response.info()
+ 
+         if code != 200:
+             response = self.parent.error(
+                 'http', request, response, code, msg, hdrs)
+ 
+         return response
+ 
+     https_response = http_response
+ 
  class HTTPDefaultErrorHandler(BaseHandler):
      def http_error_default(self, req, fp, code, msg, hdrs):
***************
*** 417,420 ****
--- 468,474 ----
  
  class HTTPRedirectHandler(BaseHandler):
+     # maximum number of redirections before assuming we're in a loop
+     max_redirections = 10
+ 
      def redirect_request(self, req, fp, code, msg, headers, newurl):
          """Return a Request or None in response to a redirect.
***************
*** 460,471 ****
  
          # loop detection
!         new.error_302_dict = {}
!         if hasattr(req, 'error_302_dict'):
!             if len(req.error_302_dict)>10 or \
!                newurl in req.error_302_dict:
                  raise HTTPError(req.get_full_url(), code,
                                  self.inf_msg + msg, headers, fp)
!             new.error_302_dict.update(req.error_302_dict)
!         new.error_302_dict[newurl] = newurl
  
          # Don't close the fp until we are sure that we won't use it
--- 514,531 ----
  
          # loop detection
!         # .redirect_dict has a key (url, code) if url was previously
!         # visited as a result of a redirection with that code.  The
!         # code is needed in addition to the URL because visiting a URL
!         # twice isn't necessarily a loop: there is more than one way
!         # to redirect (301, 302, 303, 307, refresh).
!         key = (newurl, code)
!         if hasattr(req, 'redirect_dict'):
!             visited = new.redirect_dict = req.redirect_dict
!             if key in visited or len(visited) >= self.max_redirections:
                  raise HTTPError(req.get_full_url(), code,
                                  self.inf_msg + msg, headers, fp)
!         else:
!             visited = new.redirect_dict = req.redirect_dict = {}
!         visited[key] = None
  
          # Don't close the fp until we are sure that we won't use it
***************
*** 854,857 ****
--- 914,949 ----
  class AbstractHTTPHandler(BaseHandler):
  
+     def __init__(self, debuglevel=0):
+         self._debuglevel = debuglevel
+ 
+     def set_http_debuglevel(self, level):
+         self._debuglevel = level
+ 
+     def do_request(self, request):
+         host = request.get_host()
+         if not host:
+             raise URLError('no host given')
+ 
+         if request.has_data():  # POST
+             data = request.get_data()
+             if not request.has_header('Content-type'):
+                 request.add_unredirected_header(
+                     'Content-type',
+                     'application/x-www-form-urlencoded')
+             if not request.has_header('Content-length'):
+                 request.add_unredirected_header(
+                     'Content-length', '%d' % len(data))
+ 
+         scheme, sel = splittype(request.get_selector())
+         sel_host, sel_path = splithost(sel)
+         if not request.has_header('Host'):
+             request.add_unredirected_header('Host', sel_host or host)
+         for name, value in self.parent.addheaders:
+             name = name.capitalize()
+             if not request.has_header(name):
+                 request.add_unredirected_header(name, value)
+ 
+         return request
+ 
      # XXX Should rewrite do_open() to use the new httplib interface,
      # would be a little simpler.
***************
*** 863,886 ****
  
          h = http_class(host) # will parse host:port
!         if req.has_data():
!             data = req.get_data()
!             h.putrequest('POST', req.get_selector())
!             if not 'Content-type' in req.headers:
!                 h.putheader('Content-type',
!                             'application/x-www-form-urlencoded')
!             if not 'Content-length' in req.headers:
!                 h.putheader('Content-length', '%d' % len(data))
!         else:
!             h.putrequest('GET', req.get_selector())
  
!         scheme, sel = splittype(req.get_selector())
!         sel_host, sel_path = splithost(sel)
!         h.putheader('Host', sel_host or host)
!         for name, value in self.parent.addheaders:
!             name = name.capitalize()
!             if name not in req.headers:
!                 h.putheader(name, value)
          for k, v in req.headers.items():
              h.putheader(k, v)
          # httplib will attempt to connect() here.  be prepared
          # to convert a socket error to a URLError.
--- 955,965 ----
  
          h = http_class(host) # will parse host:port
!         h.set_debuglevel(self._debuglevel)
  
!         h.putrequest(req.get_method(), req.get_selector())
          for k, v in req.headers.items():
              h.putheader(k, v)
+         for k, v in req.unredirected_hdrs.items():
+             h.putheader(k, v)
          # httplib will attempt to connect() here.  be prepared
          # to convert a socket error to a URLError.
***************
*** 890,901 ****
              raise URLError(err)
          if req.has_data():
!             h.send(data)
  
          code, msg, hdrs = h.getreply()
          fp = h.getfile()
!         if code == 200:
!             return addinfourl(fp, hdrs, req.get_full_url())
!         else:
!             return self.parent.error('http', req, fp, code, msg, hdrs)
  
  
--- 969,981 ----
              raise URLError(err)
          if req.has_data():
!             h.send(req.get_data())
  
          code, msg, hdrs = h.getreply()
          fp = h.getfile()
!         response = addinfourl(fp, hdrs, req.get_full_url())
!         # XXXX should these be methods, for uniformity with rest of interface?
!         response.code = code
!         response.msg = msg
!         return response
  
  
***************
*** 905,908 ****
--- 985,989 ----
          return self.do_open(httplib.HTTP, req)
  
+     http_request = AbstractHTTPHandler.do_request
  
  if hasattr(httplib, 'HTTPS'):
***************
*** 912,915 ****
--- 993,997 ----
              return self.do_open(httplib.HTTPS, req)
  
+         https_request = AbstractHTTPHandler.do_request
  
  class UnknownHandler(BaseHandler):





More information about the Python-checkins mailing list