[Python-checkins] r80885 - in python/branches/py3k: Doc/library/os.rst Doc/library/posix.rst Lib/os.py Lib/test/test_os.py Lib/test/test_subprocess.py Misc/NEWS Modules/posixmodule.c

victor.stinner python-checkins at python.org
Fri May 7 00:05:07 CEST 2010


Author: victor.stinner
Date: Fri May  7 00:05:07 2010
New Revision: 80885

Log:
Issue #8603: Create a bytes version of os.environ for Unix

Create os.environb mapping and os.getenvb() function, os.unsetenv() encodes str
argument to the file system encoding with the surrogateescape error handler
(instead of utf8/strict) and accepts bytes, and posix.environ keys and values
are bytes.


Modified:
   python/branches/py3k/Doc/library/os.rst
   python/branches/py3k/Doc/library/posix.rst
   python/branches/py3k/Lib/os.py
   python/branches/py3k/Lib/test/test_os.py
   python/branches/py3k/Lib/test/test_subprocess.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Modules/posixmodule.c

Modified: python/branches/py3k/Doc/library/os.rst
==============================================================================
--- python/branches/py3k/Doc/library/os.rst	(original)
+++ python/branches/py3k/Doc/library/os.rst	Fri May  7 00:05:07 2010
@@ -107,6 +107,10 @@
    to modify the environment as well as query the environment.  :func:`putenv` will
    be called automatically when the mapping is modified.
 
+   On Unix, keys and values use :func:`sys.getfilesystemencoding` and
+   ``'surrogateescape'`` error handler. Use :data:`environb` if you would like
+   to use a different encoding.
+
    .. note::
 
       Calling :func:`putenv` directly does not change ``os.environ``, so it's better
@@ -128,6 +132,16 @@
    one of the :meth:`pop` or :meth:`clear` methods is called.
 
 
+.. data:: environb
+
+   Bytes version of :data:`environ`: a mapping object representing the
+   environment as byte strings. :data:`environ` and :data:`environb` are
+   synchronized (modify :data:`environb` updates :data:`environ`, and vice
+   versa).
+
+   Availability: Unix.
+
+
 .. function:: chdir(path)
               fchdir(fd)
               getcwd()
@@ -251,7 +265,19 @@
 .. function:: getenv(key, default=None)
 
    Return the value of the environment variable *key* if it exists, or
-   *default* if it doesn't.  Availability: most flavors of Unix, Windows.
+   *default* if it doesn't. *key*, *default* and the result are str.
+   Availability: most flavors of Unix, Windows.
+
+   On Unix, keys and values are decoded with :func:`sys.getfilesystemencoding`
+   and ``'surrogateescape'`` error handler. Use :func:`os.getenvb` if you
+   would like to use a different encoding.
+
+
+.. function:: getenvb(key, default=None)
+
+   Return the value of the environment variable *key* if it exists, or
+   *default* if it doesn't. *key*, *default* and the result are bytes.
+   Availability: most flavors of Unix.
 
 
 .. function:: putenv(key, value)

Modified: python/branches/py3k/Doc/library/posix.rst
==============================================================================
--- python/branches/py3k/Doc/library/posix.rst	(original)
+++ python/branches/py3k/Doc/library/posix.rst	Fri May  7 00:05:07 2010
@@ -69,17 +69,22 @@
 .. data:: environ
 
    A dictionary representing the string environment at the time the interpreter
-   was started.  For example, ``environ['HOME']`` is the pathname of your home
-   directory, equivalent to ``getenv("HOME")`` in C.
+   was started. Keys and values are bytes on Unix and str on Windows. For
+   example, ``environ[b'HOME']`` (``environ['HOME']`` on Windows) is the
+   pathname of your home directory, equivalent to ``getenv("HOME")`` in C.
 
    Modifying this dictionary does not affect the string environment passed on by
    :func:`execv`, :func:`popen` or :func:`system`; if you need to change the
    environment, pass ``environ`` to :func:`execve` or add variable assignments and
    export statements to the command string for :func:`system` or :func:`popen`.
 
+   .. versionchanged:: 3.2
+      On Unix, keys and values are bytes.
+
    .. note::
 
-      The :mod:`os` module provides an alternate implementation of ``environ`` which
-      updates the environment on modification.  Note also that updating ``os.environ``
-      will render this dictionary obsolete.  Use of the :mod:`os` module version of
-      this is recommended over direct access to the :mod:`posix` module.
+      The :mod:`os` module provides an alternate implementation of ``environ``
+      which updates the environment on modification. Note also that updating
+      :data:`os.environ` will render this dictionary obsolete. Use of the
+      :mod:`os` module version of this is recommended over direct access to the
+      :mod:`posix` module.

Modified: python/branches/py3k/Lib/os.py
==============================================================================
--- python/branches/py3k/Lib/os.py	(original)
+++ python/branches/py3k/Lib/os.py	Fri May  7 00:05:07 2010
@@ -387,29 +387,33 @@
 from _abcoll import MutableMapping  # Can't use collections (bootstrap)
 
 class _Environ(MutableMapping):
-    def __init__(self, environ, keymap, putenv, unsetenv):
-        self.keymap = keymap
+    def __init__(self, data, encodekey, decodekey, encodevalue, decodevalue, putenv, unsetenv):
+        self.encodekey = encodekey
+        self.decodekey = decodekey
+        self.encodevalue = encodevalue
+        self.decodevalue = decodevalue
         self.putenv = putenv
         self.unsetenv = unsetenv
-        self.data = data = {}
-        for key, value in environ.items():
-            data[keymap(key)] = str(value)
+        self.data = data
 
     def __getitem__(self, key):
-        return self.data[self.keymap(key)]
+        value = self.data[self.encodekey(key)]
+        return self.decodevalue(value)
 
     def __setitem__(self, key, value):
-        value = str(value)
+        key = self.encodekey(key)
+        value = self.encodevalue(value)
         self.putenv(key, value)
-        self.data[self.keymap(key)] = value
+        self.data[key] = value
 
     def __delitem__(self, key):
+        key = self.encodekey(key)
         self.unsetenv(key)
-        del self.data[self.keymap(key)]
+        del self.data[key]
 
     def __iter__(self):
         for key in self.data:
-            yield key
+            yield self.decodekey(key)
 
     def __len__(self):
         return len(self.data)
@@ -439,22 +443,67 @@
 else:
     __all__.append("unsetenv")
 
-if name in ('os2', 'nt'): # Where Env Var Names Must Be UPPERCASE
-    _keymap = lambda key: str(key.upper())
-else:  # Where Env Var Names Can Be Mixed Case
-    _keymap = lambda key: str(key)
-
-environ = _Environ(environ, _keymap, _putenv, _unsetenv)
+def _createenviron():
+    if name in ('os2', 'nt'):
+        # Where Env Var Names Must Be UPPERCASE
+        def check_str(value):
+            if not isinstance(value, str):
+                raise TypeError("str expected, not %s" % type(value).__name__)
+            return value
+        encode = check_str
+        decode = str
+        def encodekey(key):
+            return encode(key).upper()
+        data = {}
+        for key, value in environ.items():
+            data[encodekey(key)] = value
+    else:
+        # Where Env Var Names Can Be Mixed Case
+        def encode(value):
+            if not isinstance(value, str):
+                raise TypeError("str expected, not %s" % type(value).__name__)
+            return value.encode(sys.getfilesystemencoding(), 'surrogateescape')
+        def decode(value):
+            return value.decode(sys.getfilesystemencoding(), 'surrogateescape')
+        encodekey = encode
+        data = environ
+    return _Environ(data,
+        encodekey, decode,
+        encode, decode,
+        _putenv, _unsetenv)
+
+# unicode environ
+environ = _createenviron()
+del _createenviron
 
 
 def getenv(key, default=None):
     """Get an environment variable, return None if it doesn't exist.
-    The optional second argument can specify an alternate default."""
-    if isinstance(key, bytes):
-        key = key.decode(sys.getfilesystemencoding(), "surrogateescape")
+    The optional second argument can specify an alternate default.
+    key, default and the result are str."""
     return environ.get(key, default)
 __all__.append("getenv")
 
+if name not in ('os2', 'nt'):
+    def _check_bytes(value):
+        if not isinstance(value, bytes):
+            raise TypeError("bytes expected, not %s" % type(value).__name__)
+        return value
+
+    # bytes environ
+    environb = _Environ(environ.data,
+        _check_bytes, bytes,
+        _check_bytes, bytes,
+        _putenv, _unsetenv)
+    del _check_bytes
+
+    def getenvb(key, default=None):
+        """Get an environment variable, return None if it doesn't exist.
+        The optional second argument can specify an alternate default.
+        key, default and the result are bytes."""
+        return environb.get(key, default)
+    __all__.append("getenvb")
+
 def _exists(name):
     return name in globals()
 

Modified: python/branches/py3k/Lib/test/test_os.py
==============================================================================
--- python/branches/py3k/Lib/test/test_os.py	(original)
+++ python/branches/py3k/Lib/test/test_os.py	Fri May  7 00:05:07 2010
@@ -369,12 +369,15 @@
 
     def setUp(self):
         self.__save = dict(os.environ)
+        self.__saveb = dict(os.environb)
         for key, value in self._reference().items():
             os.environ[key] = value
 
     def tearDown(self):
         os.environ.clear()
         os.environ.update(self.__save)
+        os.environb.clear()
+        os.environb.update(self.__saveb)
 
     def _reference(self):
         return {"KEY1":"VALUE1", "KEY2":"VALUE2", "KEY3":"VALUE3"}
@@ -439,6 +442,24 @@
         # Supplied PATH environment variable
         self.assertSequenceEqual(test_path, os.get_exec_path(test_env))
 
+    @unittest.skipIf(sys.platform == "win32", "POSIX specific test")
+    def test_environb(self):
+        # os.environ -> os.environb
+        value = 'euro\u20ac'
+        try:
+            value_bytes = value.encode(sys.getfilesystemencoding(), 'surrogateescape')
+        except UnicodeEncodeError:
+            raise unittest.SkipTest("U+20AC character is not encodable to %s" % sys.getfilesystemencoding())
+        os.environ['unicode'] = value
+        self.assertEquals(os.environ['unicode'], value)
+        self.assertEquals(os.environb[b'unicode'], value_bytes)
+
+        # os.environb -> os.environ
+        value = b'\xff'
+        os.environb[b'bytes'] = value
+        self.assertEquals(os.environb[b'bytes'], value)
+        value_str = value.decode(sys.getfilesystemencoding(), 'surrogateescape')
+        self.assertEquals(os.environ['bytes'], value_str)
 
 class WalkTests(unittest.TestCase):
     """Tests for os.walk()."""

Modified: python/branches/py3k/Lib/test/test_subprocess.py
==============================================================================
--- python/branches/py3k/Lib/test/test_subprocess.py	(original)
+++ python/branches/py3k/Lib/test/test_subprocess.py	Fri May  7 00:05:07 2010
@@ -803,8 +803,6 @@
 
     def test_undecodable_env(self):
         for key, value in (('test', 'abc\uDCFF'), ('test\uDCFF', '42')):
-            value_repr = repr(value).encode("ascii")
-
             # test str with surrogates
             script = "import os; print(repr(os.getenv(%s)))" % repr(key)
             env = os.environ.copy()
@@ -813,19 +811,19 @@
                 [sys.executable, "-c", script],
                 env=env)
             stdout = stdout.rstrip(b'\n\r')
-            self.assertEquals(stdout, value_repr)
+            self.assertEquals(stdout.decode('ascii'), repr(value))
 
             # test bytes
             key = key.encode("ascii", "surrogateescape")
             value = value.encode("ascii", "surrogateescape")
-            script = "import os; print(repr(os.getenv(%s)))" % repr(key)
+            script = "import os; print(repr(os.getenvb(%s)))" % repr(key)
             env = os.environ.copy()
             env[key] = value
             stdout = subprocess.check_output(
                 [sys.executable, "-c", script],
                 env=env)
             stdout = stdout.rstrip(b'\n\r')
-            self.assertEquals(stdout, value_repr)
+            self.assertEquals(stdout.decode('ascii'), repr(value))
 
 
 @unittest.skipUnless(mswindows, "Windows specific tests")

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Fri May  7 00:05:07 2010
@@ -348,6 +348,12 @@
 Library
 -------
 
+- Issue #8603: Create a bytes version of os.environ for Unix: create
+  os.environb mapping and os.getenvb() function, os.unsetenv() encodes str
+  argument to the file system encoding with the surrogateescape error handler
+  (instead of utf8/strict) and accepts bytes, and posix.environ keys and values
+  are bytes.
+
 - Issue #8573: asyncore _strerror() function might throw ValueError.
 
 - Issue #8483: asyncore.dispatcher's __getattr__ method produced confusing 

Modified: python/branches/py3k/Modules/posixmodule.c
==============================================================================
--- python/branches/py3k/Modules/posixmodule.c	(original)
+++ python/branches/py3k/Modules/posixmodule.c	Fri May  7 00:05:07 2010
@@ -498,14 +498,12 @@
         char *p = strchr(*e, '=');
         if (p == NULL)
             continue;
-        k = PyUnicode_Decode(*e, (int)(p-*e),
-                             Py_FileSystemDefaultEncoding, "surrogateescape");
+        k = PyBytes_FromStringAndSize(*e, (int)(p-*e));
         if (k == NULL) {
             PyErr_Clear();
             continue;
         }
-        v = PyUnicode_Decode(p+1, strlen(p+1),
-                             Py_FileSystemDefaultEncoding, "surrogateescape");
+        v = PyBytes_FromStringAndSize(p+1, strlen(p+1));
         if (v == NULL) {
             PyErr_Clear();
             Py_DECREF(k);
@@ -5301,7 +5299,7 @@
     char *s1, *s2;
     char *newenv;
 #endif
-    PyObject *newstr;
+    PyObject *newstr = NULL;
     size_t len;
 
 #ifdef MS_WINDOWS
@@ -5324,15 +5322,19 @@
         APIRET rc;
 
         rc = DosSetExtLIBPATH(s2, BEGIN_LIBPATH);
-        if (rc != NO_ERROR)
-            return os2_error(rc);
+        if (rc != NO_ERROR) {
+            os2_error(rc);
+            goto error;
+        }
 
     } else if (stricmp(s1, "ENDLIBPATH") == 0) {
         APIRET rc;
 
         rc = DosSetExtLIBPATH(s2, END_LIBPATH);
-        if (rc != NO_ERROR)
-            return os2_error(rc);
+        if (rc != NO_ERROR) {
+            os2_error(rc);
+            goto error;
+        }
     } else {
 #endif
     /* XXX This can leak memory -- not easy to fix :-( */
@@ -5342,36 +5344,40 @@
     len = wcslen(s1) + wcslen(s2) + 2;
     newstr = PyUnicode_FromUnicode(NULL, (int)len - 1);
 #else
-    len = strlen(s1) + strlen(s2) + 2;
+    len = PyBytes_GET_SIZE(os1) + PyBytes_GET_SIZE(os2) + 2;
     newstr = PyBytes_FromStringAndSize(NULL, (int)len - 1);
 #endif
-    if (newstr == NULL)
-        return PyErr_NoMemory();
+    if (newstr == NULL) {
+        PyErr_NoMemory();
+        goto error;
+    }
 #ifdef MS_WINDOWS
     newenv = PyUnicode_AsUnicode(newstr);
     _snwprintf(newenv, len, L"%s=%s", s1, s2);
     if (_wputenv(newenv)) {
-        Py_DECREF(newstr);
         posix_error();
-        return NULL;
+        goto error;
     }
 #else
     newenv = PyBytes_AS_STRING(newstr);
     PyOS_snprintf(newenv, len, "%s=%s", s1, s2);
     if (putenv(newenv)) {
-        Py_DECREF(newstr);
-        Py_DECREF(os1);
-        Py_DECREF(os2);
         posix_error();
-        return NULL;
+        goto error;
     }
 #endif
+
     /* Install the first arg and newstr in posix_putenv_garbage;
      * this will cause previous value to be collected.  This has to
      * happen after the real putenv() call because the old value
      * was still accessible until then. */
     if (PyDict_SetItem(posix_putenv_garbage,
-                       PyTuple_GET_ITEM(args, 0), newstr)) {
+#ifdef MS_WINDOWS
+                       PyTuple_GET_ITEM(args, 0),
+#else
+                       os1,
+#endif
+                       newstr)) {
         /* really not much we can do; just leak */
         PyErr_Clear();
     }
@@ -5382,12 +5388,20 @@
 #if defined(PYOS_OS2)
     }
 #endif
+
 #ifndef MS_WINDOWS
     Py_DECREF(os1);
     Py_DECREF(os2);
 #endif
-    Py_INCREF(Py_None);
-    return Py_None;
+    Py_RETURN_NONE;
+
+error:
+#ifndef MS_WINDOWS
+    Py_DECREF(os1);
+    Py_DECREF(os2);
+#endif
+    Py_XDECREF(newstr);
+    return NULL;
 }
 #endif /* putenv */
 
@@ -5399,10 +5413,20 @@
 static PyObject *
 posix_unsetenv(PyObject *self, PyObject *args)
 {
+#ifdef MS_WINDOWS
     char *s1;
 
     if (!PyArg_ParseTuple(args, "s:unsetenv", &s1))
         return NULL;
+#else
+    PyObject *os1;
+    char *s1;
+
+    if (!PyArg_ParseTuple(args, "O&:unsetenv",
+                          PyUnicode_FSConverter, &os1))
+        return NULL;
+    s1 = PyBytes_AsString(os1);
+#endif
 
     unsetenv(s1);
 
@@ -5412,13 +5436,20 @@
      * old value was still accessible until then.
      */
     if (PyDict_DelItem(posix_putenv_garbage,
-                       PyTuple_GET_ITEM(args, 0))) {
+#ifdef MS_WINDOWS
+                       PyTuple_GET_ITEM(args, 0)
+#else
+                       os1
+#endif
+                       )) {
         /* really not much we can do; just leak */
         PyErr_Clear();
     }
 
-    Py_INCREF(Py_None);
-    return Py_None;
+#ifndef MS_WINDOWS
+    Py_DECREF(os1);
+#endif
+    Py_RETURN_NONE;
 }
 #endif /* unsetenv */
 


More information about the Python-checkins mailing list