[Python-checkins] cpython (merge 3.5 -> default): Issue #14132, Issue #17214: Merge two redirect handling fixes from 3.5

martin.panter python-checkins at python.org
Mon May 16 04:15:11 EDT 2016


https://hg.python.org/cpython/rev/841a9a3f3cf6
changeset:   101372:841a9a3f3cf6
parent:      101367:8c973a2f4f50
parent:      101371:cb09fdef19f5
user:        Martin Panter <vadmium+py at gmail.com>
date:        Mon May 16 07:45:28 2016 +0000
summary:
  Issue #14132, Issue #17214: Merge two redirect handling fixes from 3.5

files:
  Lib/test/test_urllib.py  |   7 ++-
  Lib/test/test_urllib2.py |  51 ++++++++++++++++++++++++++++
  Lib/urllib/request.py    |  14 ++++++-
  Misc/NEWS                |   9 ++++
  4 files changed, 76 insertions(+), 5 deletions(-)


diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1,4 +1,4 @@
-"""Regresssion tests for urllib"""
+"""Regresssion tests for what was in Python 2's "urllib" module"""
 
 import urllib.parse
 import urllib.request
@@ -86,10 +86,11 @@
 
         # buffer to store data for verification in urlopen tests.
         buf = None
-        fakesock = FakeSocket(fakedata)
 
         def connect(self):
-            self.sock = self.fakesock
+            self.sock = FakeSocket(self.fakedata)
+            type(self).fakesock = self.sock
+    FakeHTTPConnection.fakedata = fakedata
 
     return FakeHTTPConnection
 
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -1208,6 +1208,57 @@
         fp = o.open('http://www.example.com')
         self.assertEqual(fp.geturl(), redirected_url.strip())
 
+    def test_redirect_no_path(self):
+        # Issue 14132: Relative redirect strips original path
+        real_class = http.client.HTTPConnection
+        response1 = b"HTTP/1.1 302 Found\r\nLocation: ?query\r\n\r\n"
+        http.client.HTTPConnection = test_urllib.fakehttp(response1)
+        self.addCleanup(setattr, http.client, "HTTPConnection", real_class)
+        urls = iter(("/path", "/path?query"))
+        def request(conn, method, url, *pos, **kw):
+            self.assertEqual(url, next(urls))
+            real_class.request(conn, method, url, *pos, **kw)
+            # Change response for subsequent connection
+            conn.__class__.fakedata = b"HTTP/1.1 200 OK\r\n\r\nHello!"
+        http.client.HTTPConnection.request = request
+        fp = urllib.request.urlopen("http://python.org/path")
+        self.assertEqual(fp.geturl(), "http://python.org/path?query")
+
+    def test_redirect_encoding(self):
+        # Some characters in the redirect target may need special handling,
+        # but most ASCII characters should be treated as already encoded
+        class Handler(urllib.request.HTTPHandler):
+            def http_open(self, req):
+                result = self.do_open(self.connection, req)
+                self.last_buf = self.connection.buf
+                # Set up a normal response for the next request
+                self.connection = test_urllib.fakehttp(
+                    b'HTTP/1.1 200 OK\r\n'
+                    b'Content-Length: 3\r\n'
+                    b'\r\n'
+                    b'123'
+                )
+                return result
+        handler = Handler()
+        opener = urllib.request.build_opener(handler)
+        tests = (
+            (b'/p\xC3\xA5-dansk/', b'/p%C3%A5-dansk/'),
+            (b'/spaced%20path/', b'/spaced%20path/'),
+            (b'/spaced path/', b'/spaced%20path/'),
+            (b'/?p\xC3\xA5-dansk', b'/?p%C3%A5-dansk'),
+        )
+        for [location, result] in tests:
+            with self.subTest(repr(location)):
+                handler.connection = test_urllib.fakehttp(
+                    b'HTTP/1.1 302 Redirect\r\n'
+                    b'Location: ' + location + b'\r\n'
+                    b'\r\n'
+                )
+                response = opener.open('http://example.com/')
+                expected = b'GET ' + result + b' '
+                request = handler.last_buf
+                self.assertTrue(request.startswith(expected), repr(request))
+
     def test_proxy(self):
         o = OpenerDirector()
         ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -91,6 +91,7 @@
 import posixpath
 import re
 import socket
+import string
 import sys
 import time
 import collections
@@ -676,8 +677,12 @@
         # from the user (of urllib.request, in this case).  In practice,
         # essentially all clients do redirect in this case, so we do
         # the same.
-        # be conciliant with URIs containing a space
+
+        # Be conciliant with URIs containing a space.  This is mainly
+        # redundant with the more complete encoding done in http_error_302(),
+        # but it is kept for compatibility with other callers.
         newurl = newurl.replace(' ', '%20')
+
         CONTENT_HEADERS = ("content-length", "content-type")
         newheaders = dict((k, v) for k, v in req.headers.items()
                           if k.lower() not in CONTENT_HEADERS)
@@ -712,11 +717,16 @@
                 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
                 headers, fp)
 
-        if not urlparts.path:
+        if not urlparts.path and urlparts.netloc:
             urlparts = list(urlparts)
             urlparts[2] = "/"
         newurl = urlunparse(urlparts)
 
+        # http.client.parse_headers() decodes as ISO-8859-1.  Recover the
+        # original bytes and percent-encode non-ASCII bytes, and any special
+        # characters such as the space.
+        newurl = quote(
+            newurl, encoding="iso-8859-1", safe=string.punctuation)
         newurl = urljoin(req.full_url, newurl)
 
         # XXX Probably want to forget about the state of the current
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -277,6 +277,15 @@
 Library
 -------
 
+- Issue #14132: Fix urllib.request redirect handling when the target only has
+  a query string.  Original fix by Ján Janech.
+
+- Issue #17214: The "urllib.request" module now percent-encodes non-ASCII
+  bytes found in redirect target URLs.  Some servers send Location header
+  fields with non-ASCII bytes, but "http.client" requires the request target
+  to be ASCII-encodable, otherwise a UnicodeEncodeError is raised.  Based on
+  patch by Christian Heimes.
+
 - Issue #27033: The default value of the decode_data parameter for
   smtpd.SMTPChannel and smtpd.SMTPServer constructors is changed to False.
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list