[Python-checkins] bpo-39057: Fix urllib.request.proxy_bypass_environment(). (GH-17619)

Serhiy Storchaka webhook-mailer at python.org
Sun Jan 5 07:14:39 EST 2020


https://github.com/python/cpython/commit/6a265f0d0c0a4b3b8fecf4275d49187a384167f4
commit: 6a265f0d0c0a4b3b8fecf4275d49187a384167f4
branch: master
author: Serhiy Storchaka <storchaka at gmail.com>
committer: GitHub <noreply at github.com>
date: 2020-01-05T14:14:31+02:00
summary:

bpo-39057: Fix urllib.request.proxy_bypass_environment(). (GH-17619)

Ignore leading dots and no longer ignore a trailing newline.

files:
A Misc/NEWS.d/next/Library/2019-12-15-21-47-54.bpo-39057.FOxn-w.rst
M Lib/test/test_urllib.py
M Lib/urllib/parse.py
M Lib/urllib/request.py

diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 95c4ecc4dcf29..2e82fc7b7b861 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -270,14 +270,36 @@ def test_proxy_bypass_environment_host_match(self):
         self.assertTrue(bypass('localhost'))
         self.assertTrue(bypass('LocalHost'))                 # MixedCase
         self.assertTrue(bypass('LOCALHOST'))                 # UPPERCASE
+        self.assertTrue(bypass('.localhost'))
         self.assertTrue(bypass('newdomain.com:1234'))
+        self.assertTrue(bypass('.newdomain.com:1234'))
         self.assertTrue(bypass('foo.d.o.t'))                 # issue 29142
+        self.assertTrue(bypass('d.o.t'))
         self.assertTrue(bypass('anotherdomain.com:8888'))
+        self.assertTrue(bypass('.anotherdomain.com:8888'))
         self.assertTrue(bypass('www.newdomain.com:1234'))
         self.assertFalse(bypass('prelocalhost'))
         self.assertFalse(bypass('newdomain.com'))            # no port
         self.assertFalse(bypass('newdomain.com:1235'))       # wrong port
 
+    def test_proxy_bypass_environment_always_match(self):
+        bypass = urllib.request.proxy_bypass_environment
+        self.env.set('NO_PROXY', '*')
+        self.assertTrue(bypass('newdomain.com'))
+        self.assertTrue(bypass('newdomain.com:1234'))
+        self.env.set('NO_PROXY', '*, anotherdomain.com')
+        self.assertTrue(bypass('anotherdomain.com'))
+        self.assertFalse(bypass('newdomain.com'))
+        self.assertFalse(bypass('newdomain.com:1234'))
+
+    def test_proxy_bypass_environment_newline(self):
+        bypass = urllib.request.proxy_bypass_environment
+        self.env.set('NO_PROXY',
+                     'localhost, anotherdomain.com, newdomain.com:1234')
+        self.assertFalse(bypass('localhost\n'))
+        self.assertFalse(bypass('anotherdomain.com:8888\n'))
+        self.assertFalse(bypass('newdomain.com:1234\n'))
+
 
 class ProxyTests_withOrderedEnv(unittest.TestCase):
 
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 31fd7e16ee72c..34d5f95dd79bd 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -1056,9 +1056,9 @@ def _splitport(host):
     """splitport('host:port') --> 'host', 'port'."""
     global _portprog
     if _portprog is None:
-        _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)
+        _portprog = re.compile('(.*):([0-9]*)', re.DOTALL)
 
-    match = _portprog.match(host)
+    match = _portprog.fullmatch(host)
     if match:
         host, port = match.groups()
         if port:
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 39553d809a3f1..a6d350a97a452 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -2492,24 +2492,26 @@ def proxy_bypass_environment(host, proxies=None):
     try:
         no_proxy = proxies['no']
     except KeyError:
-        return 0
+        return False
     # '*' is special case for always bypass
     if no_proxy == '*':
-        return 1
+        return True
+    host = host.lower()
     # strip port off host
     hostonly, port = _splitport(host)
     # check if the host ends with any of the DNS suffixes
-    no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
-    for name in no_proxy_list:
+    for name in no_proxy.split(','):
+        name = name.strip()
         if name:
             name = name.lstrip('.')  # ignore leading dots
-            name = re.escape(name)
-            pattern = r'(.+\.)?%s$' % name
-            if (re.match(pattern, hostonly, re.I)
-                    or re.match(pattern, host, re.I)):
-                return 1
+            name = name.lower()
+            if hostonly == name or host == name:
+                return True
+            name = '.' + name
+            if hostonly.endswith(name) or host.endswith(name):
+                return True
     # otherwise, don't bypass
-    return 0
+    return False
 
 
 # This code tests an OSX specific data structure but is testable on all
@@ -2635,7 +2637,7 @@ def getproxies_registry():
                     for p in proxyServer.split(';'):
                         protocol, address = p.split('=', 1)
                         # See if address has a type:// prefix
-                        if not re.match('^([^/:]+)://', address):
+                        if not re.match('(?:[^/:]+)://', address):
                             address = '%s://%s' % (protocol, address)
                         proxies[protocol] = address
                 else:
diff --git a/Misc/NEWS.d/next/Library/2019-12-15-21-47-54.bpo-39057.FOxn-w.rst b/Misc/NEWS.d/next/Library/2019-12-15-21-47-54.bpo-39057.FOxn-w.rst
new file mode 100644
index 0000000000000..24a17444b97da
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-12-15-21-47-54.bpo-39057.FOxn-w.rst
@@ -0,0 +1,2 @@
+:func:`urllib.request.proxy_bypass_environment` now ignores leading dots and
+no longer ignores a trailing newline.



More information about the Python-checkins mailing list