[Python-checkins] gh-96035: Make urllib.parse.urlparse reject non-numeric ports (#98273)

JelleZijlstra webhook-mailer at python.org
Thu Oct 20 17:01:02 EDT 2022


https://github.com/python/cpython/commit/6f15ca8c7afa23e1adc87f2b66b958b721f9acab
commit: 6f15ca8c7afa23e1adc87f2b66b958b721f9acab
branch: main
author: Ben Kallus <49924171+kenballus at users.noreply.github.com>
committer: JelleZijlstra <jelle.zijlstra at gmail.com>
date: 2022-10-20T14:00:56-07:00
summary:

gh-96035: Make urllib.parse.urlparse reject non-numeric ports (#98273)

Co-authored-by: Jelle Zijlstra <jelle.zijlstra at gmail.com>

files:
A Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst
M Lib/test/test_urlparse.py
M Lib/urllib/parse.py

diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 81d6018bd1a4..59a601d9e85b 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -653,13 +653,16 @@ def test_attributes_bad_port(self):
         """Check handling of invalid ports."""
         for bytes in (False, True):
             for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
-                for port in ("foo", "1.5", "-1", "0x10"):
+                for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"):
                     with self.subTest(bytes=bytes, parse=parse, port=port):
                         netloc = "www.example.net:" + port
                         url = "http://" + netloc
                         if bytes:
-                            netloc = netloc.encode("ascii")
-                            url = url.encode("ascii")
+                            if netloc.isascii() and port.isascii():
+                                netloc = netloc.encode("ascii")
+                                url = url.encode("ascii")
+                            else:
+                                continue
                         p = parse(url)
                         self.assertEqual(p.netloc, netloc)
                         with self.assertRaises(ValueError):
@@ -1199,6 +1202,7 @@ def test_splitnport(self):
         self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
         self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
         self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
+        self.assertEqual(splitnport('parrot: +1_0 '), ('parrot', None))
 
     def test_splitquery(self):
         # Normal cases are exercised by other tests; ensure that we also
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 3734c73948c6..9a3102afd63b 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -167,12 +167,11 @@ def hostname(self):
     def port(self):
         port = self._hostinfo[1]
         if port is not None:
-            try:
-                port = int(port, 10)
-            except ValueError:
-                message = f'Port could not be cast to integer value as {port!r}'
-                raise ValueError(message) from None
-            if not ( 0 <= port <= 65535):
+            if port.isdigit() and port.isascii():
+                port = int(port)
+            else:
+                raise ValueError(f"Port could not be cast to integer value as {port!r}")
+            if not (0 <= port <= 65535):
                 raise ValueError("Port out of range 0-65535")
         return port
 
@@ -1132,15 +1131,15 @@ def splitnport(host, defport=-1):
 def _splitnport(host, defport=-1):
     """Split host and port, returning numeric port.
     Return given default port if no ':' found; defaults to -1.
-    Return numerical port if a valid number are found after ':'.
+    Return numerical port if a valid number is found after ':'.
     Return None if ':' but not a valid number."""
     host, delim, port = host.rpartition(':')
     if not delim:
         host = port
     elif port:
-        try:
+        if port.isdigit() and port.isascii():
             nport = int(port)
-        except ValueError:
+        else:
             nport = None
         return host, nport
     return host, defport
diff --git a/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst b/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst
new file mode 100644
index 000000000000..f04a0fd0915e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-10-14-19-57-37.gh-issue-96035.0xcX-p.rst
@@ -0,0 +1,3 @@
+Fix bug in :func:`urllib.parse.urlparse` that causes certain port numbers
+containing whitespace, underscores, plus and minus signs, or non-ASCII digits to be
+incorrectly accepted.



More information about the Python-checkins mailing list