Revert "bpo-27657: Fix urlparse() with numeric paths (GH-661)" (#18526)
https://github.com/python/cpython/commit/505b6015a1579fc50d9697e4a285ecc6497... commit: 505b6015a1579fc50d9697e4a285ecc64976397a branch: 3.7 author: Senthil Kumaran <senthil@uthcode.com> committer: GitHub <noreply@github.com> date: 2020-02-16T13:07:25-08:00 summary: Revert "bpo-27657: Fix urlparse() with numeric paths (GH-661)" (#18526) This reverts commit 82b5f6b16e051f8a2ac6e87ba86b082fa1c4a77f. The change broke the backwards compatibility of parsing behavior in a patch release of Python (3.7.6). A decision was taken to revert this patch in 3.7.7. In https://bugs.python.org/issue27657 it was decided that the previous behavior like
urlparse('localhost:8080') ParseResult(scheme='', netloc='', path='localhost:8080', params='', query='', fragment='')
urlparse('undefined:8080') ParseResult(scheme='', netloc='', path='undefined:8080', params='', query='', fragment='')
needs to be preserved in patch releases as number of users rely upon it. Explicitly mention the releases involved with the revert in NEWS. Adopt the wording suggested by @ned-deily. files: A Misc/NEWS.d/next/Library/2020-02-16-07-47-55.bpo-27657.9kZchc.rst M Lib/test/test_urlparse.py M Lib/urllib/parse.py diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 999272d3e3ad6..68f633ca3a7db 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -709,17 +709,15 @@ def test_withoutscheme(self): def test_portseparator(self): # Issue 754016 makes changes for port separator ':' from scheme separator - self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','','')) - self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','','')) - self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','','')) + self.assertEqual(urllib.parse.urlparse("path:80"), + ('','','path:80','','','')) self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','','')) self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','','')) self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), ('http','www.python.org:80','','','','')) # As usual, need to check bytes input as well - self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b'')) - self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b'')) - self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"path:80"), + (b'',b'',b'path:80',b'',b'',b'')) self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index dc3de51a5c2cb..94df275c4677e 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -426,11 +426,31 @@ def urlsplit(url, scheme='', allow_fragments=True): netloc = query = fragment = '' i = url.find(':') if i > 0: + if url[:i] == 'http': # optimize the common case + url = url[i+1:] + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: + url, query = url.split('?', 1) + _checknetloc(netloc) + v = SplitResult('http', netloc, url, query, fragment) + _parse_cache[key] = v + return _coerce_result(v) for c in url[:i]: if c not in scheme_chars: break else: - scheme, url = url[:i].lower(), url[i+1:] + # make sure "url" is not actually a port number (in which case + # "scheme" is really part of the path) + rest = url[i+1:] + if not rest or any(c not in '0123456789' for c in rest): + # not a port number + scheme, url = url[:i].lower(), rest if url[:2] == '//': netloc, url = _splitnetloc(url, 2) diff --git a/Misc/NEWS.d/next/Library/2020-02-16-07-47-55.bpo-27657.9kZchc.rst b/Misc/NEWS.d/next/Library/2020-02-16-07-47-55.bpo-27657.9kZchc.rst new file mode 100644 index 0000000000000..2a0aca855c2ba --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-02-16-07-47-55.bpo-27657.9kZchc.rst @@ -0,0 +1,5 @@ +The original fix for bpo-27657, "Fix urlparse() with numeric paths" (GH-16839) +included in 3.7.6, inadvertently introduced a behavior change that broke +several third-party packages relying on the original undefined parsing +behavior. The change is reverted in 3.7.7, restoring the behavior of 3.7.5 and +earlier releases.
participants (1)
-
Senthil Kumaran