[Python-checkins] cpython (merge 3.4 -> 3.5): Issues #25232, #24657: Merge two CGI server fixes from 3.4 into 3.5

Sat Oct 3 02:44:33 EDT 2015

https://hg.python.org/cpython/rev/ba1e3c112e42
changeset:   98511:ba1e3c112e42
branch:      3.5
parent:      98506:227f959f43e7
parent:      98510:634fe6a90e0c
user:        Martin Panter <vadmium+py at gmail.com>
date:        Sat Oct 03 06:03:25 2015 +0000
summary:
  Issues #25232, #24657: Merge two CGI server fixes from 3.4 into 3.5

files:
  Lib/http/server.py           |  19 +++++++------
  Lib/test/test_httpservers.py |  31 ++++++++++++++++++++++++
  Misc/ACKS                    |   1 +
  Misc/NEWS                    |   6 ++++
  4 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/Lib/http/server.py b/Lib/http/server.py
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -837,13 +837,15 @@
     The utility of this function is limited to is_cgi method and helps
     preventing some security attacks.
 
-    Returns: A tuple of (head, tail) where tail is everything after the final /
-    and head is everything before it.  Head will always start with a '/' and,
-    if it contains anything else, never have a trailing '/'.
+    Returns: The reconstituted URL, which will always start with a '/'.
 
     Raises: IndexError if too many '..' occur within the path.
 
     """
+    # Query component should not be involved.
+    path, _, query = path.partition('?')
+    path = urllib.parse.unquote(path)
+
     # Similar to os.path.split(os.path.normpath(path)) but specific to URL
     # path semantics rather than local operating system semantics.
     path_parts = path.split('/')
@@ -864,6 +866,9 @@
     else:
         tail_part = ''
 
+    if query:
+        tail_part = '?'.join((tail_part, query))
+
     splitpath = ('/' + '/'.join(head_parts), tail_part)
     collapsed_path = "/".join(splitpath)
 
@@ -947,7 +952,7 @@
         (and the next character is a '/' or the end of the string).
 
         """
-        collapsed_path = _url_collapse_path(urllib.parse.unquote(self.path))
+        collapsed_path = _url_collapse_path(self.path)
         dir_sep = collapsed_path.find('/', 1)
         head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
         if head in self.cgi_directories:
@@ -984,11 +989,7 @@
                 break
 
         # find an explicit query string, if present.
-        i = rest.rfind('?')
-        if i >= 0:
-            rest, query = rest[:i], rest[i+1:]
-        else:
-            query = ''
+        rest, _, query = rest.partition('?')
 
         # dissect the part after the directory name into a script name &
         # a possible additional path, to be stored in PATH_INFO.
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -425,6 +425,16 @@
                           form.getfirst("bacon")))
 """
 
+cgi_file4 = """\
+#!%s
+import os
+
+print("Content-type: text/html")
+print()
+
+print(os.environ["%s"])
+"""
+
 
 @unittest.skipIf(hasattr(os, 'geteuid') and os.geteuid() == 0,
         "This test can't be run reliably as root (issue #13308).")
@@ -446,6 +456,7 @@
         self.file1_path = None
         self.file2_path = None
         self.file3_path = None
+        self.file4_path = None
 
         # The shebang line should be pure ASCII: use symlink if possible.
         # See issue #7668.
@@ -484,6 +495,11 @@
             file3.write(cgi_file1 % self.pythonexe)
         os.chmod(self.file3_path, 0o777)
 
+        self.file4_path = os.path.join(self.cgi_dir, 'file4.py')
+        with open(self.file4_path, 'w', encoding='utf-8') as file4:
+            file4.write(cgi_file4 % (self.pythonexe, 'QUERY_STRING'))
+        os.chmod(self.file4_path, 0o777)
+
         os.chdir(self.parent_dir)
 
     def tearDown(self):
@@ -499,6 +515,8 @@
                 os.remove(self.file2_path)
             if self.file3_path:
                 os.remove(self.file3_path)
+            if self.file4_path:
+                os.remove(self.file4_path)
             os.rmdir(self.cgi_child_dir)
             os.rmdir(self.cgi_dir)
             os.rmdir(self.parent_dir)
@@ -606,6 +624,19 @@
             (b'Hello World' + self.linesep, 'text/html', HTTPStatus.OK),
             (res.read(), res.getheader('Content-type'), res.status))
 
+    def test_query_with_multiple_question_mark(self):
+        res = self.request('/cgi-bin/file4.py?a=b?c=d')
+        self.assertEqual(
+            (b'a=b?c=d' + self.linesep, 'text/html', 200),
+            (res.read(), res.getheader('Content-type'), res.status))
+
+    def test_query_with_continuous_slashes(self):
+        res = self.request('/cgi-bin/file4.py?k=aa%2F%2Fbb&//q//p//=//a//b//')
+        self.assertEqual(
+            (b'k=aa%2F%2Fbb&//q//p//=//a//b//' + self.linesep,
+             'text/html', 200),
+            (res.read(), res.getheader('Content-type'), res.status))
+
 
 class SocketlessRequestHandler(SimpleHTTPRequestHandler):
     def __init__(self):
diff --git a/Misc/ACKS b/Misc/ACKS
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1580,6 +1580,7 @@
 Wei Wu
 Heiko Wundram
 Doug Wyatt
+Xiang Zhang
 Robert Xiao
 Florent Xicluna
 Hirokazu Yamamoto
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -34,6 +34,12 @@
 Library
 -------
 
+- Issue #25232: Fix CGIRequestHandler to split the query from the URL at the
+  first question mark (?) rather than the last. Patch from Xiang Zhang.
+
+- Issue #24657: Prevent CGIRequestHandler from collapsing slashes in the
+  query part of the URL as if it were a path. Patch from Xiang Zhang.
+
 - Issue #24483: C implementation of functools.lru_cache() now calculates key's
   hash only once.
 

-- 
Repository URL: https://hg.python.org/cpython