[Python-checkins] r84194 - in python/branches/py3k: Doc/library/os.rst Doc/whatsnew/3.2.rst Lib/os.py Lib/test/test_os.py Misc/NEWS

victor.stinner python-checkins at python.org
Thu Aug 19 03:05:19 CEST 2010


Author: victor.stinner
Date: Thu Aug 19 03:05:19 2010
New Revision: 84194

Log:
Create os.fsdecode(): decode from the filesystem encoding with surrogateescape
error handler, or strict error handler on Windows.

 * Rewrite os.fsencode() documentation
 * Improve os.fsencode and os.fsdecode() tests using the new PYTHONFSENCODING
   environment variable


Modified:
   python/branches/py3k/Doc/library/os.rst
   python/branches/py3k/Doc/whatsnew/3.2.rst
   python/branches/py3k/Lib/os.py
   python/branches/py3k/Lib/test/test_os.py
   python/branches/py3k/Misc/NEWS

Modified: python/branches/py3k/Doc/library/os.rst
==============================================================================
--- python/branches/py3k/Doc/library/os.rst	(original)
+++ python/branches/py3k/Doc/library/os.rst	Thu Aug 19 03:05:19 2010
@@ -155,13 +155,26 @@
    These functions are described in :ref:`os-file-dir`.
 
 
-.. function:: fsencode(value)
+.. function:: fsencode(filename)
 
-   Encode *value* to bytes for use in the file system, environment variables or
-   the command line. Use :func:`sys.getfilesystemencoding` and
-   ``'surrogateescape'`` error handler for strings and return bytes unchanged.
-   On Windows, use ``'strict'`` error handler for strings if the file system
-   encoding is ``'mbcs'`` (which is the default encoding).
+   Encode *filename* to the filesystem encoding with ``'surrogateescape'``
+   error handler, return :class:`bytes` unchanged. On Windows, use ``'strict'``
+   error handler if the filesystem encoding is ``'mbcs'`` (which is the default
+   encoding).
+
+   :func:`fsdencode` is the reverse function.
+
+   .. versionadded:: 3.2
+
+
+.. function:: fsdecode(filename)
+
+   Decode *filename* from the filesystem encoding with ``'surrogateescape'``
+   error handler, return :class:`str` unchanged. On Windows, use ``'strict'``
+   error handler if the filesystem encoding is ``'mbcs'`` (which is the default
+   encoding).
+
+   :func:`fsencode` is the reverse function.
 
    .. versionadded:: 3.2
 

Modified: python/branches/py3k/Doc/whatsnew/3.2.rst
==============================================================================
--- python/branches/py3k/Doc/whatsnew/3.2.rst	(original)
+++ python/branches/py3k/Doc/whatsnew/3.2.rst	Thu Aug 19 03:05:19 2010
@@ -237,13 +237,16 @@
 * Stub
 
 
-Unicode
-=======
+Filenames and unicode
+=====================
 
 The filesystem encoding can be specified by setting the
 :envvar:`PYTHONFSENCODING` environment variable before running the interpreter.
 The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
 
+The :mod:`os` module has two new functions: :func:`os.fsencode` and
+:func:`os.fsdecode`.
+
 
 IDLE
 ====

Modified: python/branches/py3k/Lib/os.py
==============================================================================
--- python/branches/py3k/Lib/os.py	(original)
+++ python/branches/py3k/Lib/os.py	Thu Aug 19 03:05:19 2010
@@ -402,8 +402,7 @@
             path_list = path_listb
 
         if path_list is not None and isinstance(path_list, bytes):
-            path_list = path_list.decode(sys.getfilesystemencoding(),
-                                         'surrogateescape')
+            path_list = fsdecode(path_list)
 
     if path_list is None:
         path_list = defpath
@@ -536,19 +535,39 @@
 
     __all__.extend(("environb", "getenvb"))
 
-def fsencode(value):
-    """Encode value for use in the file system, environment variables
-    or the command line."""
-    if isinstance(value, bytes):
-        return value
-    elif isinstance(value, str):
+def fsencode(filename):
+    """
+    Encode filename to the filesystem encoding with 'surrogateescape' error
+    handler, return bytes unchanged. On Windows, use 'strict' error handler if
+    the file system encoding is 'mbcs' (which is the default encoding).
+    """
+    if isinstance(filename, bytes):
+        return filename
+    elif isinstance(filename, str):
+        encoding = sys.getfilesystemencoding()
+        if encoding == 'mbcs':
+            return filename.encode(encoding)
+        else:
+            return filename.encode(encoding, 'surrogateescape')
+    else:
+        raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
+
+def fsdecode(filename):
+    """
+    Decode filename from the filesystem encoding with 'surrogateescape' error
+    handler, return str unchanged. On Windows, use 'strict' error handler if
+    the file system encoding is 'mbcs' (which is the default encoding).
+    """
+    if isinstance(filename, str):
+        return filename
+    elif isinstance(filename, bytes):
         encoding = sys.getfilesystemencoding()
         if encoding == 'mbcs':
-            return value.encode(encoding)
+            return filename.decode(encoding)
         else:
-            return value.encode(encoding, 'surrogateescape')
+            return filename.decode(encoding, 'surrogateescape')
     else:
-        raise TypeError("expect bytes or str, not %s" % type(value).__name__)
+        raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
 
 def _exists(name):
     return name in globals()

Modified: python/branches/py3k/Lib/test/test_os.py
==============================================================================
--- python/branches/py3k/Lib/test/test_os.py	(original)
+++ python/branches/py3k/Lib/test/test_os.py	Thu Aug 19 03:05:19 2010
@@ -897,14 +897,6 @@
 
     class Pep383Tests(unittest.TestCase):
         def setUp(self):
-            def fsdecode(filename):
-                encoding = sys.getfilesystemencoding()
-                if encoding == 'mbcs':
-                    errors = 'strict'
-                else:
-                    errors = 'surrogateescape'
-                return filename.decode(encoding, errors)
-
             if support.TESTFN_UNENCODABLE:
                 self.dir = support.TESTFN_UNENCODABLE
             else:
@@ -930,7 +922,7 @@
                 for fn in bytesfn:
                     f = open(os.path.join(self.bdir, fn), "w")
                     f.close()
-                    fn = fsdecode(fn)
+                    fn = os.fsdecode(fn)
                     if fn in self.unicodefn:
                         raise ValueError("duplicate filename")
                     self.unicodefn.add(fn)
@@ -1139,12 +1131,43 @@
         self.assertNotEqual(os.lstat(link), os.stat(link))
 
 
-class MiscTests(unittest.TestCase):
-
-    @unittest.skipIf(os.name == "nt", "POSIX specific test")
-    def test_fsencode(self):
-        self.assertEquals(os.fsencode(b'ab\xff'), b'ab\xff')
-        self.assertEquals(os.fsencode('ab\uDCFF'), b'ab\xff')
+class FSEncodingTests(unittest.TestCase):
+    def test_nop(self):
+        self.assertEquals(os.fsencode(b'abc\xff'), b'abc\xff')
+        self.assertEquals(os.fsdecode('abc\u0141'), 'abc\u0141')
+
+    def test_identity(self):
+        # assert fsdecode(fsencode(x)) == x
+        for fn in ('unicode\u0141', 'latin\xe9', 'ascii'):
+            try:
+                bytesfn = os.fsencode(fn)
+            except UnicodeEncodeError:
+                continue
+            self.assertEquals(os.fsdecode(bytesfn), fn)
+
+    def get_output(self, fs_encoding, func):
+        env = os.environ.copy()
+        env['PYTHONIOENCODING'] = 'utf-8'
+        env['PYTHONFSENCODING'] = fs_encoding
+        code = 'import os; print(%s, end="")' % func
+        process = subprocess.Popen(
+            [sys.executable, "-c", code],
+            stdout=subprocess.PIPE, env=env)
+        stdout, stderr = process.communicate()
+        self.assertEqual(process.returncode, 0)
+        return stdout.decode('utf-8')
+
+    def test_encodings(self):
+        def check(encoding, bytesfn, unicodefn):
+            encoded = self.get_output(encoding, 'repr(os.fsencode(%a))' % unicodefn)
+            self.assertEqual(encoded, repr(bytesfn))
+
+            decoded = self.get_output(encoding, 'repr(os.fsdecode(%a))' % bytesfn)
+            self.assertEqual(decoded, repr(unicodefn))
+
+        check('ascii', b'abc\xff', 'abc\udcff')
+        check('utf-8', b'\xc3\xa9\x80', '\xe9\udc80')
+        check('iso-8859-15', b'\xef\xa4', '\xef\u20ac')
 
 
 def test_main():
@@ -1163,7 +1186,7 @@
         Pep383Tests,
         Win32KillTests,
         Win32SymlinkTests,
-        MiscTests,
+        FSEncodingTests,
     )
 
 if __name__ == "__main__":

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Thu Aug 19 03:05:19 2010
@@ -116,6 +116,9 @@
 Library
 -------
 
+- Create os.fsdecode(): decode from the filesystem encoding with
+  surrogateescape error handler, or strict error handler on Windows.
+
 - Issue #3488: Provide convenient shorthand functions ``gzip.compress``
   and ``gzip.decompress``.  Original patch by Anand B. Pillai.
 


More information about the Python-checkins mailing list