[pypy-commit] pypy default: Issue #1035: os.listdir(someUnicode) returns byte strings for

amauryfa noreply at buildbot.pypy.org
Wed Feb 8 21:16:22 CET 2012


Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: 
Changeset: r52256:c567905f8478
Date: 2012-02-08 21:14 +0100
http://bitbucket.org/pypy/pypy/changeset/c567905f8478/

Log:	Issue #1035: os.listdir(someUnicode) returns byte strings for
	filenames that cannot be decoded by the filesystem encoding.

diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -543,10 +543,16 @@
             dirname = FileEncoder(space, w_dirname)
             result = rposix.listdir(dirname)
             w_fs_encoding = getfilesystemencoding(space)
-            result_w = [
-                space.call_method(space.wrap(s), "decode", w_fs_encoding)
-                for s in result
-            ]
+            len_result = len(result)
+            result_w = [None] * len_result
+            for i in range(len_result):
+                w_bytes = space.wrap(result[i])
+                try:
+                    result_w[i] = space.call_method(w_bytes,
+                                                    "decode", w_fs_encoding)
+                except OperationError, e:
+                    # fall back to the original byte string
+                    result_w[i] = w_bytes
         else:
             dirname = space.str0_w(w_dirname)
             result = rposix.listdir(dirname)
diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -29,6 +29,7 @@
     mod.pdir = pdir
     unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True)
     unicode_dir.join('somefile').write('who cares?')
+    unicode_dir.join('caf\xe9').write('who knows?')
     mod.unicode_dir = unicode_dir
 
     # in applevel tests, os.stat uses the CPython os.stat.
@@ -308,14 +309,22 @@
                           'file2']
 
     def test_listdir_unicode(self):
+        import sys
         unicode_dir = self.unicode_dir
         if unicode_dir is None:
             skip("encoding not good enough")
         posix = self.posix
         result = posix.listdir(unicode_dir)
-        result.sort()
-        assert result == [u'somefile']
-        assert type(result[0]) is unicode
+        typed_result = [(type(x), x) for x in result]
+        assert (unicode, u'somefile') in typed_result
+        try:
+            u = "caf\xe9".decode(sys.getfilesystemencoding())
+        except UnicodeDecodeError:
+            # Could not decode, listdir returned the byte string
+            assert (str, "caf\xe9") in typed_result
+        else:
+            assert (unicode, u) in typed_result
+
 
     def test_access(self):
         pdir = self.pdir + '/file1'


More information about the pypy-commit mailing list