[Python-checkins] (no subject)
Matěj Cepl
webhook-mailer at python.org
Wed Mar 18 21:35:48 EDT 2020
To: python-checkins at python.org
Subject: [2.7] closes bpo-38576: Disallow control characters in hostnames in
http.client. (GH-19052)
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
MIME-Version: 1.0
https://github.com/python/cpython/commit/e176e0c105786e9f476758eb5438c57223b6=
5e7f
commit: e176e0c105786e9f476758eb5438c57223b65e7f
branch: 2.7
author: Mat=C4=9Bj Cepl <mcepl at cepl.eu>
committer: GitHub <noreply at github.com>
date: 2020-03-18T20:35:44-05:00
summary:
[2.7] closes bpo-38576: Disallow control characters in hostnames in http.clie=
nt. (GH-19052)
Add host validation for control characters for more
CVE-2019-18348 protection.
(cherry picked from commit 83fc70159b24)
Co-authored-by: Ashwin Ramaswami <aramaswamis at gmail.com>
files:
A Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst
M Lib/httplib.py
M Lib/test/test_httplib.py
M Lib/test/test_urllib2.py
diff --git a/Lib/httplib.py b/Lib/httplib.py
index 79532b91149b1..fcc4152aaf268 100644
--- a/Lib/httplib.py
+++ b/Lib/httplib.py
@@ -745,6 +745,8 @@ def __init__(self, host, port=3DNone, strict=3DNone,
=20
(self.host, self.port) =3D self._get_hostport(host, port)
=20
+ self._validate_host(self.host)
+
# This is stored as an instance variable to allow unittests
# to replace with a suitable mock
self._create_connection =3D socket.create_connection
@@ -1029,6 +1031,17 @@ def _validate_path(self, url):
).format(matched=3Dmatch.group(), url=3Durl)
raise InvalidURL(msg)
=20
+ def _validate_host(self, host):
+ """Validate a host so it doesn't contain control characters."""
+ # Prevent CVE-2019-18348.
+ match =3D _contains_disallowed_url_pchar_re.search(host)
+ if match:
+ msg =3D (
+ "URL can't contain control characters. {host!r} "
+ "(found at least {matched!r})"
+ ).format(matched=3Dmatch.group(), host=3Dhost)
+ raise InvalidURL(msg)
+
def putheader(self, header, *values):
"""Send a request header line to the server.
=20
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 5462fdd503c83..d8a57f73530da 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -702,7 +702,7 @@ def test_proxy_tunnel_without_status_line(self):
with self.assertRaisesRegexp(socket.error, "Invalid response"):
conn._tunnel()
=20
- def test_putrequest_override_validation(self):
+ def test_putrequest_override_domain_validation(self):
"""
It should be possible to override the default validation
behavior in putrequest (bpo-38216).
@@ -715,6 +715,17 @@ def _validate_path(self, url):
conn.sock =3D FakeSocket('')
conn.putrequest('GET', '/\x00')
=20
+ def test_putrequest_override_host_validation(self):
+ class UnsafeHTTPConnection(httplib.HTTPConnection):
+ def _validate_host(self, url):
+ pass
+
+ conn =3D UnsafeHTTPConnection('example.com\r\n')
+ conn.sock =3D FakeSocket('')
+ # set skip_host so a ValueError is not raised upon adding the
+ # invalid URL as the value of the "Host:" header
+ conn.putrequest('GET', '/', skip_host=3D1)
+
=20
class OfflineTest(TestCase):
def test_responses(self):
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index 9531818e16b25..20a0f581436d6 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -1321,7 +1321,7 @@ def test_unsupported_algorithm(self):
)
=20
@unittest.skipUnless(ssl, "ssl module required")
- def test_url_with_control_char_rejected(self):
+ def test_url_path_with_control_char_rejected(self):
for char_no in range(0, 0x21) + range(0x7f, 0x100):
char =3D chr(char_no)
schemeless_url =3D "//localhost:7777/test%s/" % char
@@ -1345,7 +1345,7 @@ def test_url_with_control_char_rejected(self):
self.unfakehttp()
=20
@unittest.skipUnless(ssl, "ssl module required")
- def test_url_with_newline_header_injection_rejected(self):
+ def test_url_path_with_newline_header_injection_rejected(self):
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
host =3D "localhost:7777?a=3D1 HTTP/1.1\r\nX-injected: header\r\nTES=
T: 123"
schemeless_url =3D "//" + host + ":8080/test/?test=3Da"
@@ -1357,14 +1357,32 @@ def test_url_with_newline_header_injection_rejected(s=
elf):
# calls urllib.parse.quote() on the URL which makes all of the
# above attempts at injection within the url _path_ safe.
InvalidURL =3D httplib.InvalidURL
- with self.assertRaisesRegexp(
- InvalidURL, r"contain control.*\\r.*(found at least . .)"):
- urllib2.urlopen("http:" + schemeless_url)
- with self.assertRaisesRegexp(InvalidURL, r"contain control.*\\n"=
):
- urllib2.urlopen("https:" + schemeless_url)
+ with self.assertRaisesRegexp(InvalidURL,
+ r"contain control.*\\r.*(found at least . .)"):
+ urllib2.urlopen("http:{}".format(schemeless_url))
+ with self.assertRaisesRegexp(InvalidURL,
+ r"contain control.*\\n"):
+ urllib2.urlopen("https:{}".format(schemeless_url))
finally:
self.unfakehttp()
=20
+ @unittest.skipUnless(ssl, "ssl module required")
+ def test_url_host_with_control_char_rejected(self):
+ for char_no in list(range(0, 0x21)) + [0x7f]:
+ char =3D chr(char_no)
+ schemeless_url =3D "//localhost{}/test/".format(char)
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+ try:
+ escaped_char_repr =3D repr(char).replace('\\', r'\\')
+ InvalidURL =3D httplib.InvalidURL
+ with self.assertRaisesRegexp(InvalidURL,
+ "contain control.*{}".format(escaped_char_repr)):
+ urllib2.urlopen("http:{}".format(schemeless_url))
+ with self.assertRaisesRegexp(InvalidURL,
+ "contain control.*{}".format(escaped_char_repr)):
+ urllib2.urlopen("https:{}".format(schemeless_url))
+ finally:
+ self.unfakehttp()
=20
=20
class RequestTests(unittest.TestCase):
diff --git a/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rs=
t b/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst
new file mode 100644
index 0000000000000..96af32d34d096
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-03-18-01-30-50.bpo-38576.cvI68q.rst
@@ -0,0 +1,3 @@
+Disallow control characters in hostnames in http.client, addressing
+CVE-2019-18348. Such potentially malicious header injection URLs now cause a
+InvalidURL to be raised.
More information about the Python-checkins
mailing list