[Python-checkins] r80101 - in python/trunk: Lib/test/test_urlparse.py Lib/urlparse.py Misc/NEWS

Ezio Melotti ezio.melotti at gmail.com
Mon Apr 19 10:31:00 CEST 2010


Hi,

On 16/04/2010 5.46, senthil.kumaran wrote:
> Author: senthil.kumaran
> Date: Fri Apr 16 04:46:46 2010
> New Revision: 80101
>
> Log:
> Fix issue2987: RFC2732 support for urlparse (IPv6 addresses)
>
>
>
> Modified:
>     python/trunk/Lib/test/test_urlparse.py
>     python/trunk/Lib/urlparse.py
>     python/trunk/Misc/NEWS
>
> Modified: python/trunk/Lib/test/test_urlparse.py
> ==============================================================================
> --- python/trunk/Lib/test/test_urlparse.py	(original)
> +++ python/trunk/Lib/test/test_urlparse.py	Fri Apr 16 04:46:46 2010
> @@ -238,10 +238,44 @@
>           #self.checkJoin(RFC2396_BASE, '?y', 'http://a/b/c/?y')
>           #self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
>
> +
>       def test_RFC3986(self):
>           self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
>           self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
>
> +    def test_RFC2732(self):
> +        for url, hostname, port in [
> +            ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
> +            ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
> +            ('http://[::1]:5432/foo/', '::1', 5432),
> +            ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
> +            ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
> +            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
> +             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
> +            ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
> +            ('http://[::ffff:12.34.56.78]:5432/foo/',
> +             '::ffff:12.34.56.78', 5432),
> +            ('http://Test.python.org/foo/', 'test.python.org', None),
> +            ('http://12.34.56.78/foo/', '12.34.56.78', None),
> +            ('http://[::1]/foo/', '::1', None),
> +            ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
> +            ('http://[dead:beef::]/foo/', 'dead:beef::', None),
> +            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
> +             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
> +            ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
> +            ('http://[::ffff:12.34.56.78]/foo/',
> +             '::ffff:12.34.56.78', None),
> +            ]:
> +            urlparsed = urlparse.urlparse(url)
> +            self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
> +
> +        for invalid_url in [
> +                'http://::12.34.56.78]/',
> +                'http://[::1/foo/',
> +                'http://[::ffff:12.34.56.78']:
> +            self.assertRaises(ValueError, lambda : urlparse.urlparse(invalid_url).hostname)
> +            self.assertRaises(ValueError, lambda : urlparse.urlparse(invalid_url))

For all these 3 invalid URLs the exception is raised by 
urlparse.urlparse(), so the one raised by .hostname is not tested (and 
the two assertRaises are equivalent).
If it's possible to parse an URL and have the exception raised by 
.hostname, a separate test should check that the urlparse() succeeded 
and that getting the hostname raises an exception. If it's not possible, 
maybe the raise in .hostname [1] should be an assert.
Also you can use self.assertRaises(ValueError, urlparse.urlparse, 
invalid_url) instead of the lambda, or even:
result = urlparse.urlparse(invalid_url)
with self.assertRaises(ValueError):
     result.hostname
on 2.7/3.2.

> +
>       def test_urldefrag(self):
>           for url, defrag, frag in [
>               ('http://python.org#frag', 'http://python.org', 'frag'),
>
> Modified: python/trunk/Lib/urlparse.py
> ==============================================================================
> --- python/trunk/Lib/urlparse.py	(original)
> +++ python/trunk/Lib/urlparse.py	Fri Apr 16 04:46:46 2010
> @@ -64,22 +64,26 @@
>
>       @property
>       def hostname(self):
> -        netloc = self.netloc
> -        if "@" in netloc:
> -            netloc = netloc.rsplit("@", 1)[1]
> -        if ":" in netloc:
> -            netloc = netloc.split(":", 1)[0]
> -        return netloc.lower() or None
> +        netloc = self.netloc.split('@')[-1]
> +        if '[' in netloc and ']' in netloc:
> +            return netloc.split(']')[0][1:].lower()

[1]:
> +        elif '[' in netloc or ']' in netloc:
> +            raise ValueError("Invalid IPv6 hostname")
> +        elif ':' in netloc:
> +            return netloc.split(':')[0].lower()
> +        elif netloc == '':
> +            return None
> +        else:
> +            return netloc.lower()
>
>       @property
>       def port(self):
> -        netloc = self.netloc
> -        if "@" in netloc:
> -            netloc = netloc.rsplit("@", 1)[1]
> -        if ":" in netloc:
> -            port = netloc.split(":", 1)[1]
> +        netloc = self.netloc.split('@')[-1].split(']')[-1]
> +        if ':' in netloc:
> +            port = netloc.split(':')[1]
>               return int(port, 10)
> -        return None
> +        else:
> +            return None
>
>   from collections import namedtuple
>
> @@ -124,6 +128,10 @@
>
>   def _splitnetloc(url, start=0):
>       delim = len(url)   # position of end of domain part of url, default is end
> +    if '[' in url:     # check for invalid IPv6 URL
> +        if not ']' in url: raise ValueError("Invalid IPv6 URL")
> +    elif ']' in url:
> +        if not '[' in url: raise ValueError("Invalid IPv6 URL")

Isn't this equivalent to:
if (('[' in url and ']' not in url) or
     (']' in url and '[' not in url)):
     raise ValueError("Invalid IPv6 URL")
I find this version more readable and it avoids 4 if and the duplication 
of the raise. The 'delim = len(url)' can also be moved after this check.

>       for c in '/?#':    # look for delimiters; the order is NOT important
>           wdelim = url.find(c, start)        # find first of this delim
>           if wdelim>= 0:                    # if found
>
> Modified: python/trunk/Misc/NEWS
> ==============================================================================
> --- python/trunk/Misc/NEWS	(original)
> +++ python/trunk/Misc/NEWS	Fri Apr 16 04:46:46 2010
> @@ -15,6 +15,9 @@
>   Library
>   -------
>
> +- Issue #2987: RFC2732 support for urlparse (IPv6 addresses). Patch by Tony
> +  Locke and Hans Ulrich Niedermann.
> +
>   - Issue #7585: difflib context and unified diffs now place a tab between
>     filename and date, conforming to the 'standards' they were originally
>     designed to follow.  This improves compatibility with patch tools.
> _______________________________________________
> Python-checkins mailing list
> Python-checkins at python.org
> http://mail.python.org/mailman/listinfo/python-checkins
>

This should also be documented and have a versionchanged added.

Best Regards,
Ezio Melotti


More information about the Python-checkins mailing list