[Python-checkins] cpython (2.7): Fix Issue11703 - urllib2.get_url does not handle fragment in url properly.

senthil.kumaran python-checkins at python.org
Wed Apr 13 01:36:24 CEST 2011


http://hg.python.org/cpython/rev/6e73f75ee034
changeset:   69292:6e73f75ee034
branch:      2.7
parent:      69282:09459397f807
user:        Senthil Kumaran <orsenthil at gmail.com>
date:        Wed Apr 13 07:31:45 2011 +0800
summary:
  Fix Issue11703 - urllib2.get_url does not handle fragment in url properly.

files:
  Lib/test/test_urllib.py     |  10 ++++++++++
  Lib/test/test_urllib2.py    |  15 ++++++++++++++-
  Lib/test/test_urllib2net.py |   2 +-
  Lib/urllib2.py              |   7 +++++--
  4 files changed, 30 insertions(+), 4 deletions(-)


diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -148,6 +148,16 @@
         finally:
             self.unfakehttp()
 
+    def test_url_fragment(self):
+        # Issue #11703: geturl() omits fragments in the original URL.
+        url = 'http://docs.python.org/library/urllib.html#OK'
+        self.fakehttp('Hello!')
+        try:
+            fp = urllib.urlopen(url)
+            self.assertEqual(fp.geturl(), url)
+        finally:
+            self.unfakehttp()
+
     def test_read_bogus(self):
         # urlopen() should raise IOError for many error codes.
         self.fakehttp('''HTTP/1.1 401 Authentication Required
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -1007,6 +1007,15 @@
         o.open("http://www.example.com/")
         self.assertTrue(not hh.req.has_header("Cookie"))
 
+    def test_redirect_fragment(self):
+        redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n'
+        hh = MockHTTPHandler(302, 'Location: ' + redirected_url)
+        hdeh = urllib2.HTTPDefaultErrorHandler()
+        hrh = urllib2.HTTPRedirectHandler()
+        o = build_test_opener(hh, hdeh, hrh)
+        fp = o.open('http://www.example.com')
+        self.assertEqual(fp.geturl(), redirected_url.strip())
+
     def test_proxy(self):
         o = OpenerDirector()
         ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
@@ -1292,12 +1301,16 @@
         req = Request("<URL:http://www.python.org>")
         self.assertEqual("www.python.org", req.get_host())
 
-    def test_urlwith_fragment(self):
+    def test_url_fragment(self):
         req = Request("http://www.python.org/?qs=query#fragment=true")
         self.assertEqual("/?qs=query", req.get_selector())
         req = Request("http://www.python.org/#fun=true")
         self.assertEqual("/", req.get_selector())
 
+        # Issue 11703: geturl() omits fragment in the original URL.
+        url = 'http://docs.python.org/library/urllib2.html#OK'
+        req = Request(url)
+        self.assertEqual(req.get_full_url(), url)
 
 def test_main(verbose=None):
     from test import test_urllib2
diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py
--- a/Lib/test/test_urllib2net.py
+++ b/Lib/test/test_urllib2net.py
@@ -160,7 +160,7 @@
             req = urllib2.Request(urlwith_frag)
             res = urllib2.urlopen(req)
             self.assertEqual(res.geturl(),
-                    "http://docs.python.org/glossary.html")
+                    "http://docs.python.org/glossary.html#glossary")
 
     def test_fileno(self):
         req = urllib2.Request("http://www.python.org")
diff --git a/Lib/urllib2.py b/Lib/urllib2.py
--- a/Lib/urllib2.py
+++ b/Lib/urllib2.py
@@ -190,7 +190,7 @@
                  origin_req_host=None, unverifiable=False):
         # unwrap('<URL:type://host/path>') --> 'type://host/path'
         self.__original = unwrap(url)
-        self.__original, fragment = splittag(self.__original)
+        self.__original, self.__fragment = splittag(self.__original)
         self.type = None
         # self.__r_type is what's left after doing the splittype
         self.host = None
@@ -236,7 +236,10 @@
         return self.data
 
     def get_full_url(self):
-        return self.__original
+        if self.__fragment:
+            return '%s#%s' % (self.__original, self.__fragment)
+        else:
+            return self.__original
 
     def get_type(self):
         if self.type is None:

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list