[Python-checkins] gh-96035: Make urllib.parse.urlparse reject non-numeric ports (#98273)
JelleZijlstra
webhook-mailer at python.org
Thu Oct 20 17:01:02 EDT 2022
https://github.com/python/cpython/commit/6f15ca8c7afa23e1adc87f2b66b958b721f9acab
commit: 6f15ca8c7afa23e1adc87f2b66b958b721f9acab
branch: main
author: Ben Kallus <49924171+kenballus at users.noreply.github.com>
committer: JelleZijlstra <jelle.zijlstra at gmail.com>
date: 2022-10-20T14:00:56-07:00
summary:
gh-96035: Make urllib.parse.urlparse reject non-numeric ports (#98273)
Co-authored-by: Jelle Zijlstra <jelle.zijlstra at gmail.com>
files:
A Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst
M Lib/test/test_urlparse.py
M Lib/urllib/parse.py
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 81d6018bd1a4..59a601d9e85b 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -653,13 +653,16 @@ def test_attributes_bad_port(self):
"""Check handling of invalid ports."""
for bytes in (False, True):
for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
- for port in ("foo", "1.5", "-1", "0x10"):
+ for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"):
with self.subTest(bytes=bytes, parse=parse, port=port):
netloc = "www.example.net:" + port
url = "http://" + netloc
if bytes:
- netloc = netloc.encode("ascii")
- url = url.encode("ascii")
+ if netloc.isascii() and port.isascii():
+ netloc = netloc.encode("ascii")
+ url = url.encode("ascii")
+ else:
+ continue
p = parse(url)
self.assertEqual(p.netloc, netloc)
with self.assertRaises(ValueError):
@@ -1199,6 +1202,7 @@ def test_splitnport(self):
self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
+ self.assertEqual(splitnport('parrot: +1_0 '), ('parrot', None))
def test_splitquery(self):
# Normal cases are exercised by other tests; ensure that we also
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 3734c73948c6..9a3102afd63b 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -167,12 +167,11 @@ def hostname(self):
def port(self):
port = self._hostinfo[1]
if port is not None:
- try:
- port = int(port, 10)
- except ValueError:
- message = f'Port could not be cast to integer value as {port!r}'
- raise ValueError(message) from None
- if not ( 0 <= port <= 65535):
+ if port.isdigit() and port.isascii():
+ port = int(port)
+ else:
+ raise ValueError(f"Port could not be cast to integer value as {port!r}")
+ if not (0 <= port <= 65535):
raise ValueError("Port out of range 0-65535")
return port
@@ -1132,15 +1131,15 @@ def splitnport(host, defport=-1):
def _splitnport(host, defport=-1):
"""Split host and port, returning numeric port.
Return given default port if no ':' found; defaults to -1.
- Return numerical port if a valid number are found after ':'.
+ Return numerical port if a valid number is found after ':'.
Return None if ':' but not a valid number."""
host, delim, port = host.rpartition(':')
if not delim:
host = port
elif port:
- try:
+ if port.isdigit() and port.isascii():
nport = int(port)
- except ValueError:
+ else:
nport = None
return host, nport
return host, defport
diff --git a/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst b/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst
new file mode 100644
index 000000000000..f04a0fd0915e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst
@@ -0,0 +1,3 @@
+Fix bug in :func:`urllib.parse.urlparse` that causes certain port numbers
+containing whitespace, underscores, plus and minus signs, or non-ASCII digits to be
+incorrectly accepted.
More information about the Python-checkins
mailing list