[Python-checkins] cpython (3.2): Issue9374 - Generic parsing of query and fragment portion of urls for any scheme

senthil.kumaran python-checkins at python.org
Sat May 19 02:13:01 CEST 2012


http://hg.python.org/cpython/rev/a9d43e21f7d8
changeset:   77045:a9d43e21f7d8
branch:      3.2
parent:      77042:895246f1a06a
user:        Senthil Kumaran <senthil at uthcode.com>
date:        Sat May 19 08:12:00 2012 +0800
summary:
  Issue9374 - Generic parsing of query and fragment portion of urls for any scheme

files:
  Lib/test/test_urlparse.py |   9 +++++++++
  Lib/urllib/parse.py       |  11 ++---------
  Misc/NEWS                 |   3 +++
  3 files changed, 14 insertions(+), 9 deletions(-)


diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -636,11 +636,20 @@
                          ('s3', 'foo.com', '/stuff', '', '', ''))
         self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
                          ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
+        self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
+                         ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
+        self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
+                         ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
+
         # And for bytes...
         self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
                          (b's3', b'foo.com', b'/stuff', b'', b'', b''))
         self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
                          (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
+        self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
+                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
+        self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
+                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
 
     def test_mixed_types_rejected(self):
         # Several functions that process either strings or ASCII encoded bytes
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -44,16 +44,9 @@
                'imap', 'wais', 'file', 'mms', 'https', 'shttp',
                'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
                'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']
-non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
-                    'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
 uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
                'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
                'mms', '', 'sftp']
-uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
-              'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
-uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
-                 'nntp', 'wais', 'https', 'shttp', 'snews',
-                 'file', 'prospero', '']
 
 # Characters valid in scheme names
 scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
@@ -357,9 +350,9 @@
         if (('[' in netloc and ']' not in netloc) or
                 (']' in netloc and '[' not in netloc)):
             raise ValueError("Invalid IPv6 URL")
-    if allow_fragments and scheme in uses_fragment and '#' in url:
+    if allow_fragments and '#' in url:
         url, fragment = url.split('#', 1)
-    if scheme in uses_query and '?' in url:
+    if '?' in url:
         url, query = url.split('?', 1)
     v = SplitResult(scheme, netloc, url, query, fragment)
     _parse_cache[key] = v
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -63,6 +63,9 @@
 Library
 -------
 
+- Issue #9374: Generic parsing of query and fragment portions of url for any
+  scheme. Supported both by RFC3986 and RFC2396.
+
 - Issue #14798: Fix the functions in pyclbr to raise an ImportError
   when the first part of a dotted name is not a package. Patch by
   Xavier de Gaye.

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list