[Python-checkins] r86676 - in python/branches/py3k: Lib/test/test_urllib2.py Lib/urllib/parse.py Lib/urllib/request.py Misc/NEWS

senthil.kumaran python-checkins at python.org
Mon Nov 22 05:48:26 CET 2010


Author: senthil.kumaran
Date: Mon Nov 22 05:48:26 2010
New Revision: 86676

Log:
Fix Issue4493 - urllib2 adds '/' to the path component of url, when it does not
starts with one. This behavior is exhibited by browser and other clients.



Modified:
   python/branches/py3k/Lib/test/test_urllib2.py
   python/branches/py3k/Lib/urllib/parse.py
   python/branches/py3k/Lib/urllib/request.py
   python/branches/py3k/Misc/NEWS

Modified: python/branches/py3k/Lib/test/test_urllib2.py
==============================================================================
--- python/branches/py3k/Lib/test/test_urllib2.py	(original)
+++ python/branches/py3k/Lib/test/test_urllib2.py	Mon Nov 22 05:48:26 2010
@@ -848,6 +848,25 @@
             p_ds_req = h.do_request_(ds_req)
             self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
 
+    def test_fixpath_in_weirdurls(self):
+        # Issue4493: urllib2 to supply '/' when to urls where path does not
+        # start with'/'
+
+        h = urllib.request.AbstractHTTPHandler()
+        o = h.parent = MockOpener()
+
+        weird_url = 'http://www.python.org?getspam'
+        req = Request(weird_url)
+        newreq = h.do_request_(req)
+        self.assertEqual(newreq.host,'www.python.org')
+        self.assertEqual(newreq.selector,'/?getspam')
+
+        url_without_path = 'http://www.python.org'
+        req = Request(url_without_path)
+        newreq = h.do_request_(req)
+        self.assertEqual(newreq.host,'www.python.org')
+        self.assertEqual(newreq.selector,'')
+
 
     def test_errors(self):
         h = urllib.request.HTTPErrorProcessor()

Modified: python/branches/py3k/Lib/urllib/parse.py
==============================================================================
--- python/branches/py3k/Lib/urllib/parse.py	(original)
+++ python/branches/py3k/Lib/urllib/parse.py	Mon Nov 22 05:48:26 2010
@@ -699,7 +699,12 @@
         _hostprog = re.compile('^//([^/?]*)(.*)$')
 
     match = _hostprog.match(url)
-    if match: return match.group(1, 2)
+    if match:
+        host_port = match.group(1)
+        path = match.group(2)
+        if path and not path.startswith('/'):
+            path = '/' + path
+        return host_port, path
     return None, url
 
 _userprog = None

Modified: python/branches/py3k/Lib/urllib/request.py
==============================================================================
--- python/branches/py3k/Lib/urllib/request.py	(original)
+++ python/branches/py3k/Lib/urllib/request.py	Mon Nov 22 05:48:26 2010
@@ -105,7 +105,7 @@
 # check for SSL
 try:
     import ssl
-except:
+except ImportError:
     _have_ssl = False
 else:
     _have_ssl = True

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Mon Nov 22 05:48:26 2010
@@ -32,6 +32,9 @@
 Library
 -------
 
+- Issue #4493: urllib2 adds '/' in front of path components which does not
+  start with '/. Common behavior exhibited by browsers and other clients.
+
 - Issue #6378: idle.bat now runs with the appropriate Python version rather than
   the system default. Patch by Sridhar Ratnakumar.
 


More information about the Python-checkins mailing list