[Python-checkins] (no subject)

Matěj Cepl webhook-mailer at python.org
Wed Mar 18 21:35:48 EDT 2020




To: python-checkins at python.org
Subject: [2.7] closes bpo-38576: Disallow control characters in hostnames in
 http.client. (GH-19052)
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
MIME-Version: 1.0

https://github.com/python/cpython/commit/e176e0c105786e9f476758eb5438c57223b6=
5e7f
commit: e176e0c105786e9f476758eb5438c57223b65e7f
branch: 2.7
author: Mat=C4=9Bj Cepl <mcepl at cepl.eu>
committer: GitHub <noreply at github.com>
date: 2020-03-18T20:35:44-05:00
summary:

[2.7] closes bpo-38576: Disallow control characters in hostnames in http.clie=
nt. (GH-19052)

Add host validation for control characters for more
CVE-2019-18348 protection.
(cherry picked from commit 83fc70159b24)

Co-authored-by: Ashwin Ramaswami <aramaswamis at gmail.com>

files:
A Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst
M Lib/httplib.py
M Lib/test/test_httplib.py
M Lib/test/test_urllib2.py

diff --git a/Lib/httplib.py b/Lib/httplib.py
index 79532b91149b1..fcc4152aaf268 100644
--- a/Lib/httplib.py
+++ b/Lib/httplib.py
@@ -745,6 +745,8 @@ def __init__(self, host, port=3DNone, strict=3DNone,
=20
         (self.host, self.port) =3D self._get_hostport(host, port)
=20
+        self._validate_host(self.host)
+
         # This is stored as an instance variable to allow unittests
         # to replace with a suitable mock
         self._create_connection =3D socket.create_connection
@@ -1029,6 +1031,17 @@ def _validate_path(self, url):
             ).format(matched=3Dmatch.group(), url=3Durl)
             raise InvalidURL(msg)
=20
+    def _validate_host(self, host):
+        """Validate a host so it doesn't contain control characters."""
+        # Prevent CVE-2019-18348.
+        match =3D _contains_disallowed_url_pchar_re.search(host)
+        if match:
+            msg =3D (
+                "URL can't contain control characters. {host!r} "
+                "(found at least {matched!r})"
+            ).format(matched=3Dmatch.group(), host=3Dhost)
+            raise InvalidURL(msg)
+
     def putheader(self, header, *values):
         """Send a request header line to the server.
=20
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 5462fdd503c83..d8a57f73530da 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -702,7 +702,7 @@ def test_proxy_tunnel_without_status_line(self):
         with self.assertRaisesRegexp(socket.error, "Invalid response"):
             conn._tunnel()
=20
-    def test_putrequest_override_validation(self):
+    def test_putrequest_override_domain_validation(self):
         """
         It should be possible to override the default validation
         behavior in putrequest (bpo-38216).
@@ -715,6 +715,17 @@ def _validate_path(self, url):
         conn.sock =3D FakeSocket('')
         conn.putrequest('GET', '/\x00')
=20
+    def test_putrequest_override_host_validation(self):
+        class UnsafeHTTPConnection(httplib.HTTPConnection):
+            def _validate_host(self, url):
+                pass
+
+        conn =3D UnsafeHTTPConnection('example.com\r\n')
+        conn.sock =3D FakeSocket('')
+        # set skip_host so a ValueError is not raised upon adding the
+        # invalid URL as the value of the "Host:" header
+        conn.putrequest('GET', '/', skip_host=3D1)
+
=20
 class OfflineTest(TestCase):
     def test_responses(self):
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index 9531818e16b25..20a0f581436d6 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -1321,7 +1321,7 @@ def test_unsupported_algorithm(self):
         )
=20
     @unittest.skipUnless(ssl, "ssl module required")
-    def test_url_with_control_char_rejected(self):
+    def test_url_path_with_control_char_rejected(self):
         for char_no in range(0, 0x21) + range(0x7f, 0x100):
             char =3D chr(char_no)
             schemeless_url =3D "//localhost:7777/test%s/" % char
@@ -1345,7 +1345,7 @@ def test_url_with_control_char_rejected(self):
                 self.unfakehttp()
=20
     @unittest.skipUnless(ssl, "ssl module required")
-    def test_url_with_newline_header_injection_rejected(self):
+    def test_url_path_with_newline_header_injection_rejected(self):
         self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
         host =3D "localhost:7777?a=3D1 HTTP/1.1\r\nX-injected: header\r\nTES=
T: 123"
         schemeless_url =3D "//" + host + ":8080/test/?test=3Da"
@@ -1357,14 +1357,32 @@ def test_url_with_newline_header_injection_rejected(s=
elf):
             # calls urllib.parse.quote() on the URL which makes all of the
             # above attempts at injection within the url _path_ safe.
             InvalidURL =3D httplib.InvalidURL
-            with self.assertRaisesRegexp(
-                InvalidURL, r"contain control.*\\r.*(found at least . .)"):
-                urllib2.urlopen("http:" + schemeless_url)
-            with self.assertRaisesRegexp(InvalidURL, r"contain control.*\\n"=
):
-                urllib2.urlopen("https:" + schemeless_url)
+            with self.assertRaisesRegexp(InvalidURL,
+                    r"contain control.*\\r.*(found at least . .)"):
+                urllib2.urlopen("http:{}".format(schemeless_url))
+            with self.assertRaisesRegexp(InvalidURL,
+                    r"contain control.*\\n"):
+                urllib2.urlopen("https:{}".format(schemeless_url))
         finally:
             self.unfakehttp()
=20
+    @unittest.skipUnless(ssl, "ssl module required")
+    def test_url_host_with_control_char_rejected(self):
+        for char_no in list(range(0, 0x21)) + [0x7f]:
+            char =3D chr(char_no)
+            schemeless_url =3D "//localhost{}/test/".format(char)
+            self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+            try:
+                escaped_char_repr =3D repr(char).replace('\\', r'\\')
+                InvalidURL =3D httplib.InvalidURL
+                with self.assertRaisesRegexp(InvalidURL,
+                    "contain control.*{}".format(escaped_char_repr)):
+                        urllib2.urlopen("http:{}".format(schemeless_url))
+                with self.assertRaisesRegexp(InvalidURL,
+                    "contain control.*{}".format(escaped_char_repr)):
+                        urllib2.urlopen("https:{}".format(schemeless_url))
+            finally:
+                self.unfakehttp()
=20
=20
 class RequestTests(unittest.TestCase):
diff --git a/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rs=
t b/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst
new file mode 100644
index 0000000000000..96af32d34d096
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst
@@ -0,0 +1,3 @@
+Disallow control characters in hostnames in http.client, addressing
+CVE-2019-18348. Such potentially malicious header injection URLs now cause a
+InvalidURL to be raised.



More information about the Python-checkins mailing list