[Python-checkins] r85883 - in python/branches/issue10209: Lib/os.py Lib/test/test_os.py Objects/unicodeobject.c Python/pythonrun.c
victor.stinner
python-checkins at python.org
Thu Oct 28 13:28:23 CEST 2010
Author: victor.stinner
Date: Thu Oct 28 13:28:23 2010
New Revision: 85883
Log:
On Mac OS X, fsencode normalizes the filename to NFD, and fsdecode to NFC
Modified:
python/branches/issue10209/Lib/os.py
python/branches/issue10209/Lib/test/test_os.py
python/branches/issue10209/Objects/unicodeobject.c
python/branches/issue10209/Python/pythonrun.c
Modified: python/branches/issue10209/Lib/os.py
==============================================================================
--- python/branches/issue10209/Lib/os.py (original)
+++ python/branches/issue10209/Lib/os.py Thu Oct 28 13:28:23 2010
@@ -552,7 +552,12 @@
if isinstance(filename, bytes):
return filename
elif isinstance(filename, str):
- return filename.encode(encoding, errors)
+ if sys.platform == 'darwin':
+ import unicodedata
+ filename = unicodedata.normalize('NFD', filename)
+ return filename.encode('utf-8', 'surrogateescape')
+ else:
+ return filename.encode(encoding, errors)
else:
raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
@@ -565,7 +570,12 @@
if isinstance(filename, str):
return filename
elif isinstance(filename, bytes):
- return filename.decode(encoding, errors)
+ if sys.platform == 'darwin':
+ import unicodedata
+ filename = filename.decode('utf-8', 'surrogateescape')
+ return unicodedata.normalize('NFC', filename)
+ else:
+ return filename.decode(encoding, errors)
else:
raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
Modified: python/branches/issue10209/Lib/test/test_os.py
==============================================================================
--- python/branches/issue10209/Lib/test/test_os.py (original)
+++ python/branches/issue10209/Lib/test/test_os.py Thu Oct 28 13:28:23 2010
@@ -1172,6 +1172,11 @@
continue
self.assertEquals(os.fsdecode(bytesfn), fn)
+ @unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X')
+ def test_osx_normalize(self):
+ self.assertEquals(os.fsencode('\xe9'), b'e\xcc\x81')
+ self.assertEquals(os.fsdecode(b'e\xcc\x81'), '\xe9')
+
class PidTests(unittest.TestCase):
@unittest.skipUnless(hasattr(os, 'getppid'), "test needs os.getppid")
Modified: python/branches/issue10209/Objects/unicodeobject.c
==============================================================================
--- python/branches/issue10209/Objects/unicodeobject.c (original)
+++ python/branches/issue10209/Objects/unicodeobject.c Thu Oct 28 13:28:23 2010
@@ -184,6 +184,10 @@
0, 0, 0, 0, 0, 0, 0, 0
};
+#if defined(__APPLE__)
+static PyObject *normalize_func = NULL;
+#endif
+
Py_UNICODE
PyUnicode_GetMax(void)
@@ -1584,6 +1588,22 @@
return NULL;
}
+#ifdef __APPLE__
+int
+_PyUnicode_InitFSEncoding(void)
+{
+ PyObject *unicodedata;
+ unicodedata = PyImport_ImportModule("unicodedata");
+ if (unicodedata == NULL)
+ return -1;
+ normalize_func = PyObject_GetAttrString(unicodedata, "normalize");
+ Py_DECREF(unicodedata);
+ if (normalize_func == NULL)
+ return -1;
+ return 0;
+}
+#endif
+
PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode)
{
@@ -1592,9 +1612,24 @@
PyUnicode_GET_SIZE(unicode),
NULL);
#elif defined(__APPLE__)
- return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- "surrogateescape");
+ PyObject *filename, *bytes;
+ int decref;
+ if (normalize_func) {
+ filename = PyObject_CallFunction(normalize_func, "sO", "NFD", unicode);
+ if (filename == NULL)
+ return NULL;
+ decref = 1;
+ }
+ else {
+ filename = unicode;
+ decref = 0;
+ }
+ bytes = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(filename),
+ PyUnicode_GET_SIZE(filename),
+ "surrogateescape");
+ if (decref)
+ Py_DECREF(filename);
+ return bytes;
#else
if (Py_FileSystemDefaultEncoding) {
return PyUnicode_AsEncodedString(unicode,
@@ -1769,7 +1804,14 @@
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
return PyUnicode_DecodeMBCS(s, size, NULL);
#elif defined(__APPLE__)
- return PyUnicode_DecodeUTF8(s, size, "surrogateescape");
+ PyObject *filename, *normalized;
+ filename = PyUnicode_DecodeUTF8(s, size, "surrogateescape");
+ if (normalize_func) {
+ normalized = PyObject_CallFunction(normalize_func, "sO", "NFC", filename);
+ Py_DECREF(filename);
+ filename = normalized;
+ }
+ return filename;
#else
/* During the early bootstrapping process, Py_FileSystemDefaultEncoding
can be undefined. If it is case, decode using UTF-8. The following assumes
@@ -9958,6 +10000,10 @@
}
}
(void)PyUnicode_ClearFreeList();
+
+#if defined(__APPLE__)
+ Py_CLEAR(normalize_func);
+#endif
}
void
Modified: python/branches/issue10209/Python/pythonrun.c
==============================================================================
--- python/branches/issue10209/Python/pythonrun.c (original)
+++ python/branches/issue10209/Python/pythonrun.c Thu Oct 28 13:28:23 2010
@@ -74,6 +74,10 @@
extern void _PyUnicode_Fini(void);
extern int _PyLong_Init(void);
extern void PyLong_Fini(void);
+#ifdef __APPLE__
+extern int _PyUnicode_InitFSEncoding(void);
+#endif
+
#ifdef WITH_THREAD
extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *);
@@ -751,6 +755,13 @@
} else {
Py_DECREF(codec);
}
+
+#ifdef __APPLE__
+ if (_PyUnicode_InitFSEncoding()) {
+ PyErr_Warn(PyExc_RuntimeWarning,
+ "Unable to get normalize() function of the unicodedata module");
+ }
+#endif
}
/* Import the site module (not into __main__ though) */
More information about the Python-checkins
mailing list