[Python-checkins] r85883 - in python/branches/issue10209: Lib/os.py Lib/test/test_os.py Objects/unicodeobject.c Python/pythonrun.c

Thu Oct 28 13:28:23 CEST 2010

Author: victor.stinner
Date: Thu Oct 28 13:28:23 2010
New Revision: 85883

Log:
On Mac OS X, fsencode normalizes the filename to NFD, and fsdecode to NFC


Modified:
   python/branches/issue10209/Lib/os.py
   python/branches/issue10209/Lib/test/test_os.py
   python/branches/issue10209/Objects/unicodeobject.c
   python/branches/issue10209/Python/pythonrun.c

Modified: python/branches/issue10209/Lib/os.py
==============================================================================

--- python/branches/issue10209/Lib/os.py	(original)
+++ python/branches/issue10209/Lib/os.py	Thu Oct 28 13:28:23 2010
@@ -552,7 +552,12 @@
         if isinstance(filename, bytes):
             return filename
         elif isinstance(filename, str):
-            return filename.encode(encoding, errors)
+            if sys.platform == 'darwin':
+                import unicodedata
+                filename = unicodedata.normalize('NFD', filename)
+                return filename.encode('utf-8', 'surrogateescape')
+            else:
+                return filename.encode(encoding, errors)
         else:
             raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
 
@@ -565,7 +570,12 @@
         if isinstance(filename, str):
             return filename
         elif isinstance(filename, bytes):
-            return filename.decode(encoding, errors)
+            if sys.platform == 'darwin':
+                import unicodedata
+                filename = filename.decode('utf-8', 'surrogateescape')
+                return unicodedata.normalize('NFC', filename)
+            else:
+                return filename.decode(encoding, errors)
         else:
             raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
 

Modified: python/branches/issue10209/Lib/test/test_os.py
==============================================================================
--- python/branches/issue10209/Lib/test/test_os.py	(original)
+++ python/branches/issue10209/Lib/test/test_os.py	Thu Oct 28 13:28:23 2010
@@ -1172,6 +1172,11 @@
                 continue
             self.assertEquals(os.fsdecode(bytesfn), fn)
 
+    @unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X')
+    def test_osx_normalize(self):
+        self.assertEquals(os.fsencode('\xe9'), b'e\xcc\x81')
+        self.assertEquals(os.fsdecode(b'e\xcc\x81'), '\xe9')
+
 
 class PidTests(unittest.TestCase):
     @unittest.skipUnless(hasattr(os, 'getppid'), "test needs os.getppid")

Modified: python/branches/issue10209/Objects/unicodeobject.c
==============================================================================
--- python/branches/issue10209/Objects/unicodeobject.c	(original)
+++ python/branches/issue10209/Objects/unicodeobject.c	Thu Oct 28 13:28:23 2010
@@ -184,6 +184,10 @@
     0, 0, 0, 0, 0, 0, 0, 0
 };
 
+#if defined(__APPLE__)
+static PyObject *normalize_func = NULL;
+#endif
+
 
 Py_UNICODE
 PyUnicode_GetMax(void)
@@ -1584,6 +1588,22 @@
     return NULL;
 }
 
+#ifdef __APPLE__
+int
+_PyUnicode_InitFSEncoding(void)
+{
+    PyObject *unicodedata;
+    unicodedata = PyImport_ImportModule("unicodedata");
+    if (unicodedata == NULL)
+        return -1;
+    normalize_func = PyObject_GetAttrString(unicodedata, "normalize");
+    Py_DECREF(unicodedata);
+    if (normalize_func == NULL)
+        return -1;
+    return 0;
+}
+#endif
+
 PyObject *
 PyUnicode_EncodeFSDefault(PyObject *unicode)
 {
@@ -1592,9 +1612,24 @@
                                 PyUnicode_GET_SIZE(unicode),
                                 NULL);
 #elif defined(__APPLE__)
-    return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
-                                PyUnicode_GET_SIZE(unicode),
-                                "surrogateescape");
+    PyObject *filename, *bytes;
+    int decref;
+    if (normalize_func) {
+        filename = PyObject_CallFunction(normalize_func, "sO", "NFD", unicode);
+        if (filename == NULL)
+            return NULL;
+        decref = 1;
+    }
+    else {
+        filename = unicode;
+        decref = 0;
+    }
+    bytes = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(filename),
+                                 PyUnicode_GET_SIZE(filename),
+                                 "surrogateescape");
+    if (decref)
+        Py_DECREF(filename);
+    return bytes;
 #else
     if (Py_FileSystemDefaultEncoding) {
         return PyUnicode_AsEncodedString(unicode,
@@ -1769,7 +1804,14 @@
 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
     return PyUnicode_DecodeMBCS(s, size, NULL);
 #elif defined(__APPLE__)
-    return PyUnicode_DecodeUTF8(s, size, "surrogateescape");
+    PyObject *filename, *normalized;
+    filename = PyUnicode_DecodeUTF8(s, size, "surrogateescape");
+    if (normalize_func) {
+        normalized = PyObject_CallFunction(normalize_func, "sO", "NFC", filename);
+        Py_DECREF(filename);
+        filename = normalized;
+    }
+    return filename;
 #else
     /* During the early bootstrapping process, Py_FileSystemDefaultEncoding
        can be undefined. If it is case, decode using UTF-8. The following assumes
@@ -9958,6 +10000,10 @@
         }
     }
     (void)PyUnicode_ClearFreeList();
+
+#if defined(__APPLE__)
+    Py_CLEAR(normalize_func);
+#endif
 }
 
 void

Modified: python/branches/issue10209/Python/pythonrun.c
==============================================================================
--- python/branches/issue10209/Python/pythonrun.c	(original)
+++ python/branches/issue10209/Python/pythonrun.c	Thu Oct 28 13:28:23 2010
@@ -74,6 +74,10 @@
 extern void _PyUnicode_Fini(void);
 extern int _PyLong_Init(void);
 extern void PyLong_Fini(void);
+#ifdef __APPLE__
+extern int _PyUnicode_InitFSEncoding(void);
+#endif
+
 
 #ifdef WITH_THREAD
 extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *);
@@ -751,6 +755,13 @@
     } else {
         Py_DECREF(codec);
     }
+
+#ifdef __APPLE__
+    if (_PyUnicode_InitFSEncoding()) {
+        PyErr_Warn(PyExc_RuntimeWarning,
+            "Unable to get normalize() function of the unicodedata module");
+    }
+#endif
 }
 
 /* Import the site module (not into __main__ though) */