[Python-Dev] Bug#137399: patch for urllib2.py: fix behavior with proxies

Chris Lawrence Chris Lawrence <lawrencc@debian.org>, 137399@bugs.debian.org
Fri, 08 Mar 2002 13:29:03 -0600


Package: python2.1, python2.2
Version: 2.1.2-2
Severity: normal
File: /usr/lib/python2.1/urllib2.py
Tags: patch upstream

The following patch against Python 2.1 fixes some problems with the
urllib2 module when used with proxies; in particular, if
$http_proxy="http://user:passwd@host:port/" is used.  It also
generates the correct Host header for proxy requests (some proxies,
such as oops, get confused otherwise, despite RFC 2616 section 5.2
which says they are to ignore it in the case of a full URL on the
request line).

I believe this patch will apply against 2.2 and CVS as well, but I
haven't checked yet.

--- /usr/lib/python2.1/urllib2.py	Fri Jan 18 11:07:32 2002
+++ urllib2.py	Fri Mar  8 13:22:14 2002
@@ -478,11 +478,14 @@
     def proxy_open(self, req, proxy, type):
         orig_type = req.get_type()
         type, r_type = splittype(proxy)
-        host, XXX = splithost(r_type)
-        if '@' in host:
-            user_pass, host = host.split('@', 1)
+        if '@' in r_type:
+            user_pass, r_type = r_type.split('@', 1)
+            if user_pass[:2] == '//':
+                user_pass = user_pass[2:]
+            r_type = '//'+r_type
             user_pass = base64.encodestring(unquote(user_pass)).strip()
             req.add_header('Proxy-Authorization', 'Basic '+user_pass)
+        host, XXX = splithost(r_type)
         host = unquote(host)
         req.set_proxy(host, type)
         if orig_type == type:
@@ -780,6 +783,7 @@
         hexrep.append(hex(n)[-1])
     return ''.join(hexrep)
 
+_selectorexp = re.compile(r'[a-z]+://([^/]+)/?', re.I)
 
 class AbstractHTTPHandler(BaseHandler):
 
@@ -787,23 +791,29 @@
         host = req.get_host()
         if not host:
             raise URLError('no host given')
+        selector = req.get_selector()
 
         try:
             h = http_class(host) # will parse host:port
             if req.has_data():
                 data = req.get_data()
-                h.putrequest('POST', req.get_selector())
+                h.putrequest('POST', selector)
                 if not req.headers.has_key('Content-type'):
                     h.putheader('Content-type',
                                 'application/x-www-form-urlencoded')
                 if not req.headers.has_key('Content-length'):
                     h.putheader('Content-length', '%d' % len(data))
             else:
-                h.putrequest('GET', req.get_selector())
+                h.putrequest('GET', selector)
         except socket.error, err:
             raise URLError(err)
 
-        h.putheader('Host', host)
+        m = _selectorexp.match(selector)
+        if m:
+            h.putheader('Host', m.group(1))
+        else:
+            h.putheader('Host', host)
+        
         for args in self.parent.addheaders:
             h.putheader(*args)
         for k, v in req.headers.items():