[Python-checkins] r86146 - in python/branches/py3k: Doc/library/wsgiref.rst Lib/test/test_wsgiref.py Lib/wsgiref/handlers.py Lib/wsgiref/simple_server.py Misc/NEWS

phillip.eby python-checkins at python.org
Wed Nov 3 23:39:01 CET 2010


Author: phillip.eby
Date: Wed Nov  3 23:39:01 2010
New Revision: 86146

Log:
Implement http://bugs.python.org/issue10155 using And Clover's patch, w/added
docs and support for more client-generated CGI variables.  (This should
complete the WSGI 1.0.1 compliance changes for Python 3.x.)


Modified:
   python/branches/py3k/Doc/library/wsgiref.rst
   python/branches/py3k/Lib/test/test_wsgiref.py
   python/branches/py3k/Lib/wsgiref/handlers.py
   python/branches/py3k/Lib/wsgiref/simple_server.py
   python/branches/py3k/Misc/NEWS

Modified: python/branches/py3k/Doc/library/wsgiref.rst
==============================================================================
--- python/branches/py3k/Doc/library/wsgiref.rst	(original)
+++ python/branches/py3k/Doc/library/wsgiref.rst	Wed Nov  3 23:39:01 2010
@@ -456,6 +456,32 @@
    environment.
 
 
+.. class:: IISCGIHandler()
+
+   A specialized alternative to :class:`CGIHandler`, for use when deploying on
+   Microsoft's IIS web server, without having set the config allowPathInfo
+   option (IIS>=7) or metabase allowPathInfoForScriptMappings (IIS<7).
+
+   By default, IIS gives a ``PATH_INFO`` that duplicates the ``SCRIPT_NAME`` at
+   the front, causing problems for WSGI applications that wish to implement
+   routing. This handler strips any such duplicated path.
+
+   IIS can be configured to pass the correct ``PATH_INFO``, but this causes
+   another bug where ``PATH_TRANSLATED`` is wrong. Luckily this variable is
+   rarely used and is not guaranteed by WSGI. On IIS<7, though, the
+   setting can only be made on a vhost level, affecting all other script
+   mappings, many of which break when exposed to the ``PATH_TRANSLATED`` bug.
+   For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
+   rarely uses it because there is still no UI for it.)
+
+   There is no way for CGI code to tell whether the option was set, so a
+   separate handler class is provided.  It is used in the same way as
+   :class:`CGIHandler`, i.e., by calling ``IISCGIHandler().run(app)``, where
+   ``app`` is the WSGI application object you wish to invoke.
+
+   .. versionadded:: 3.2
+
+
 .. class:: BaseCGIHandler(stdin, stdout, stderr, environ, multithread=True, multiprocess=False)
 
    Similar to :class:`CGIHandler`, but instead of using the :mod:`sys` and
@@ -696,6 +722,24 @@
       version of the response set to the client.  It defaults to ``"1.0"``.
 
 
+.. function:: read_environ()
+
+   Transcode CGI variables from ``os.environ`` to PEP 3333 "bytes in unicode"
+   strings, returning a new dictionary.  This function is used by
+   :class:`CGIHandler` and :class:`IISCGIHandler` in place of directly using
+   ``os.environ``, which is not necessarily WSGI-compliant on all platforms
+   and web servers using Python 3 -- specifically, ones where the OS's
+   actual environment is Unicode (i.e. Windows), or ones where the environment
+   is bytes, but the system encoding used by Python to decode it is anything
+   other than ISO-8859-1 (e.g. Unix systems using UTF-8).
+
+   If you are implementing a CGI-based handler of your own, you probably want
+   to use this routine instead of just copying values out of ``os.environ``
+   directly.
+
+   .. versionadded:: 3.2
+
+
 Examples
 --------
 

Modified: python/branches/py3k/Lib/test/test_wsgiref.py
==============================================================================
--- python/branches/py3k/Lib/test/test_wsgiref.py	(original)
+++ python/branches/py3k/Lib/test/test_wsgiref.py	Wed Nov  3 23:39:01 2010
@@ -131,7 +131,7 @@
     def check_hello(self, out, has_length=True):
         self.assertEqual(out,
             ("HTTP/1.0 200 OK\r\n"
-            "Server: WSGIServer/0.1 Python/"+sys.version.split()[0]+"\r\n"
+            "Server: WSGIServer/0.2 Python/"+sys.version.split()[0]+"\r\n"
             "Content-Type: text/plain\r\n"
             "Date: Mon, 05 Jun 2006 18:49:54 GMT\r\n" +
             (has_length and  "Content-Length: 13\r\n" or "") +
@@ -187,7 +187,7 @@
         ver = sys.version.split()[0].encode('ascii')
         self.assertEqual(
                 b"HTTP/1.0 200 OK\r\n"
-                b"Server: WSGIServer/0.1 Python/" + ver + b"\r\n"
+                b"Server: WSGIServer/0.2 Python/" + ver + b"\r\n"
                 b"Content-Type: text/plain; charset=utf-8\r\n"
                 b"Date: Wed, 24 Dec 2008 13:29:32 GMT\r\n"
                 b"\r\n"

Modified: python/branches/py3k/Lib/wsgiref/handlers.py
==============================================================================
--- python/branches/py3k/Lib/wsgiref/handlers.py	(original)
+++ python/branches/py3k/Lib/wsgiref/handlers.py	Wed Nov  3 23:39:01 2010
@@ -5,7 +5,10 @@
 
 import sys, os, time
 
-__all__ = ['BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler']
+__all__ = [
+    'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler',
+    'IISCGIHandler', 'read_environ'
+]
 
 # Weekday and month names for HTTP date/time formatting; always English!
 _weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
@@ -19,6 +22,74 @@
         _weekdayname[wd], day, _monthname[month], year, hh, mm, ss
     )
 
+_is_request = {
+    'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE',
+    'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT',
+}.__contains__
+
+def _needs_transcode(k):
+    return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \
+        or (k.startswith('REDIRECT_') and _needs_transcode(k[9:]))
+
+def read_environ():
+    """Read environment, fixing HTTP variables"""
+    enc = sys.getfilesystemencoding()
+    esc = 'surrogateescape'
+    try:
+        ''.encode('utf-8', esc)
+    except LookupError:
+        esc = 'replace'
+    environ = {}
+
+    # Take the basic environment from native-unicode os.environ. Attempt to
+    # fix up the variables that come from the HTTP request to compensate for
+    # the bytes->unicode decoding step that will already have taken place.
+    for k, v in os.environ.items():
+        if _needs_transcode(k):
+
+            # On win32, the os.environ is natively Unicode. Different servers
+            # decode the request bytes using different encodings.
+            if sys.platform == 'win32':
+                software = os.environ.get('SERVER_SOFTWARE', '').lower()
+
+                # On IIS, the HTTP request will be decoded as UTF-8 as long
+                # as the input is a valid UTF-8 sequence. Otherwise it is
+                # decoded using the system code page (mbcs), with no way to
+                # detect this has happened. Because UTF-8 is the more likely
+                # encoding, and mbcs is inherently unreliable (an mbcs string
+                # that happens to be valid UTF-8 will not be decoded as mbcs)
+                # always recreate the original bytes as UTF-8.
+                if software.startswith('microsoft-iis/'):
+                    v = v.encode('utf-8').decode('iso-8859-1')
+
+                # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
+                # to the Unicode environ. No modification needed.
+                elif software.startswith('apache/'):
+                    pass
+
+                # Python 3's http.server.CGIHTTPRequestHandler decodes
+                # using the urllib.unquote default of UTF-8, amongst other
+                # issues.
+                elif (
+                    software.startswith('simplehttp/')
+                    and 'python/3' in software
+                ):
+                    v = v.encode('utf-8').decode('iso-8859-1')
+
+                # For other servers, guess that they have written bytes to
+                # the environ using stdio byte-oriented interfaces, ending up
+                # with the system code page.
+                else:
+                    v = v.encode(enc, 'replace').decode('iso-8859-1')
+
+            # Recover bytes from unicode environ, using surrogate escapes
+            # where available (Python 3.1+).
+            else:
+                v = v.encode(enc, esc).decode('iso-8859-1')
+
+        environ[k] = v
+    return environ
+
 
 class BaseHandler:
     """Manage the invocation of a WSGI application"""
@@ -36,7 +107,7 @@
     # os_environ is used to supply configuration from the OS environment:
     # by default it's a copy of 'os.environ' as of import time, but you can
     # override this in e.g. your __init__ method.
-    os_environ = dict(os.environ.items())
+    os_environ= read_environ()
 
     # Collaborator classes
     wsgi_file_wrapper = FileWrapper     # set to None to disable
@@ -431,6 +502,42 @@
 
     def __init__(self):
         BaseCGIHandler.__init__(
-            self, sys.stdin, sys.stdout, sys.stderr, dict(os.environ.items()),
-            multithread=False, multiprocess=True
+            self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
+            read_environ(), multithread=False, multiprocess=True
+        )
+
+
+class IISCGIHandler(BaseCGIHandler):
+    """CGI-based invocation with workaround for IIS path bug
+
+    This handler should be used in preference to CGIHandler when deploying on
+    Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
+    or metabase allowPathInfoForScriptMappings (IIS<7).
+    """
+    wsgi_run_once = True
+    os_environ = {}
+
+    # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
+    # the front, causing problems for WSGI applications that wish to implement
+    # routing. This handler strips any such duplicated path.
+
+    # IIS can be configured to pass the correct PATH_INFO, but this causes
+    # another bug where PATH_TRANSLATED is wrong. Luckily this variable is
+    # rarely used and is not guaranteed by WSGI. On IIS<7, though, the
+    # setting can only be made on a vhost level, affecting all other script
+    # mappings, many of which break when exposed to the PATH_TRANSLATED bug.
+    # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
+    # rarely uses it because there is still no UI for it.)
+
+    # There is no way for CGI code to tell whether the option was set, so a
+    # separate handler class is provided.
+    def __init__(self):
+        environ= read_environ()
+        path = environ.get('PATH_INFO', '')
+        script = environ.get('SCRIPT_NAME', '')
+        if (path+'/').startswith(script+'/'):
+            environ['PATH_INFO'] = path[len(script):]
+        BaseCGIHandler.__init__(
+            self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
+            environ, multithread=False, multiprocess=True
         )

Modified: python/branches/py3k/Lib/wsgiref/simple_server.py
==============================================================================
--- python/branches/py3k/Lib/wsgiref/simple_server.py	(original)
+++ python/branches/py3k/Lib/wsgiref/simple_server.py	Wed Nov  3 23:39:01 2010
@@ -15,7 +15,7 @@
 import urllib.parse
 from wsgiref.handlers import SimpleHandler
 
-__version__ = "0.1"
+__version__ = "0.2"
 __all__ = ['WSGIServer', 'WSGIRequestHandler', 'demo_app', 'make_server']
 
 
@@ -74,13 +74,14 @@
     def get_environ(self):
         env = self.server.base_environ.copy()
         env['SERVER_PROTOCOL'] = self.request_version
+        env['SERVER_SOFTWARE'] = self.server_version
         env['REQUEST_METHOD'] = self.command
         if '?' in self.path:
             path,query = self.path.split('?',1)
         else:
             path,query = self.path,''
 
-        env['PATH_INFO'] = urllib.parse.unquote(path)
+        env['PATH_INFO'] = urllib.parse.unquote_to_bytes(path).decode('iso-8859-1')
         env['QUERY_STRING'] = query
 
         host = self.address_string()

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Wed Nov  3 23:39:01 2010
@@ -59,6 +59,10 @@
 Library
 -------
 
+- Issue #10155: Add IISCGIHandler to wsgiref.handlers to support IIS
+  CGI environment better, and to correct unicode environment values
+  for WSGI 1.0.1.
+
 - Issue #10281: nntplib now returns None for absent fields in the OVER/XOVER
   response, instead of raising an exception.
 


More information about the Python-checkins mailing list