[Python-checkins] r80277 - in python/trunk/Lib: test/test_urlparse.py urlparse.py

senthil.kumaran python-checkins at python.org
Tue Apr 20 22:37:59 CEST 2010


Author: senthil.kumaran
Date: Tue Apr 20 22:37:59 2010
New Revision: 80277

Log:
Issue2987 - Added additional Invalid URL and changed the Invalid URL checking code for better.



Modified:
   python/trunk/Lib/test/test_urlparse.py
   python/trunk/Lib/urlparse.py

Modified: python/trunk/Lib/test/test_urlparse.py
==============================================================================
--- python/trunk/Lib/test/test_urlparse.py	(original)
+++ python/trunk/Lib/test/test_urlparse.py	Tue Apr 20 22:37:59 2010
@@ -272,6 +272,7 @@
         for invalid_url in [
                 'http://::12.34.56.78]/',
                 'http://[::1/foo/',
+                'http://[::1/foo/bad]/bad',
                 'http://[::ffff:12.34.56.78']:
             self.assertRaises(ValueError, lambda : urlparse.urlparse(invalid_url).hostname)
             self.assertRaises(ValueError, lambda : urlparse.urlparse(invalid_url))

Modified: python/trunk/Lib/urlparse.py
==============================================================================
--- python/trunk/Lib/urlparse.py	(original)
+++ python/trunk/Lib/urlparse.py	Tue Apr 20 22:37:59 2010
@@ -90,8 +90,6 @@
         netloc = self.netloc.split('@')[-1]
         if '[' in netloc and ']' in netloc:
             return netloc.split(']')[0][1:].lower()
-        elif '[' in netloc or ']' in netloc:
-            raise ValueError("Invalid IPv6 hostname")
         elif ':' in netloc:
             return netloc.split(':')[0].lower()
         elif netloc == '':
@@ -151,10 +149,6 @@
 
 def _splitnetloc(url, start=0):
     delim = len(url)   # position of end of domain part of url, default is end
-    if '[' in url:     # check for invalid IPv6 URL
-        if not ']' in url: raise ValueError("Invalid IPv6 URL")
-    elif ']' in url:
-        if not '[' in url: raise ValueError("Invalid IPv6 URL")
     for c in '/?#':    # look for delimiters; the order is NOT important
         wdelim = url.find(c, start)        # find first of this delim
         if wdelim >= 0:                    # if found
@@ -182,6 +176,10 @@
             url = url[i+1:]
             if url[:2] == '//':
                 netloc, url = _splitnetloc(url, 2)
+                if '[' in netloc :
+                    if not ']' in netloc: raise ValueError("Invalid IPv6 URL")
+                if ']' in netloc:
+                    if not '[' in netloc: raise ValueError("Invalid IPv6 URL")
             if allow_fragments and '#' in url:
                 url, fragment = url.split('#', 1)
             if '?' in url:
@@ -197,6 +195,10 @@
 
     if url[:2] == '//':
         netloc, url = _splitnetloc(url, 2)
+        if '[' in netloc:
+            if not ']' in netloc: raise ValueError("Invalid IPv6 URL")
+        if ']' in netloc:
+            if not '[' in netloc: raise ValueError("Invalid IPv6 URL")
     if allow_fragments and scheme in uses_fragment and '#' in url:
         url, fragment = url.split('#', 1)
     if scheme in uses_query and '?' in url:


More information about the Python-checkins mailing list