[Python-checkins] r85517 - in python/branches/py3k: Doc/c-api/unicode.rst Include/code.h Include/unicodeobject.h Misc/NEWS Objects/codeobject.c Objects/object.c Objects/unicodeobject.c Python/pythonrun.c

victor.stinner python-checkins at python.org
Fri Oct 15 14:04:23 CEST 2010


Author: victor.stinner
Date: Fri Oct 15 14:04:23 2010
New Revision: 85517

Log:
Use locale encoding if Py_FileSystemDefaultEncoding is not set

 * PyUnicode_EncodeFSDefault(), PyUnicode_DecodeFSDefaultAndSize() and
   PyUnicode_DecodeFSDefault() use the locale encoding instead of UTF-8 if
   Py_FileSystemDefaultEncoding is NULL
 * redecode_filenames() functions and _Py_code_object_list (issue #9630)
   are no more needed: remove them


Modified:
   python/branches/py3k/Doc/c-api/unicode.rst
   python/branches/py3k/Include/code.h
   python/branches/py3k/Include/unicodeobject.h
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Objects/codeobject.c
   python/branches/py3k/Objects/object.c
   python/branches/py3k/Objects/unicodeobject.c
   python/branches/py3k/Python/pythonrun.c

Modified: python/branches/py3k/Doc/c-api/unicode.rst
==============================================================================
--- python/branches/py3k/Doc/c-api/unicode.rst	(original)
+++ python/branches/py3k/Doc/c-api/unicode.rst	Fri Oct 15 14:04:23 2010
@@ -415,7 +415,8 @@
    Decode a string using :c:data:`Py_FileSystemDefaultEncoding` and the
    ``'surrogateescape'`` error handler, or ``'strict'`` on Windows.
 
-   If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
+   If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
+   locale encoding.
 
    .. versionchanged:: 3.2
       Use ``'strict'`` error handler on Windows.
@@ -426,7 +427,8 @@
    Decode a null-terminated string using :c:data:`Py_FileSystemDefaultEncoding`
    and the ``'surrogateescape'`` error handler, or ``'strict'`` on Windows.
 
-   If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
+   If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
+   locale encoding.
 
    Use :c:func:`PyUnicode_DecodeFSDefaultAndSize` if you know the string length.
 
@@ -440,7 +442,8 @@
    ``'surrogateescape'`` error handler, or ``'strict'`` on Windows, and return
    :class:`bytes`.
 
-   If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
+   If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
+   locale encoding.
 
    .. versionadded:: 3.2
 

Modified: python/branches/py3k/Include/code.h
==============================================================================
--- python/branches/py3k/Include/code.h	(original)
+++ python/branches/py3k/Include/code.h	Fri Oct 15 14:04:23 2010
@@ -72,7 +72,7 @@
 PyAPI_FUNC(PyCodeObject *) PyCode_New(
 	int, int, int, int, int, PyObject *, PyObject *,
 	PyObject *, PyObject *, PyObject *, PyObject *,
-	PyObject *, PyObject *, int, PyObject *); 
+	PyObject *, PyObject *, int, PyObject *);
         /* same as struct above */
 
 /* Creates a new empty code object with the specified source location. */
@@ -99,13 +99,6 @@
 PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts,
                                       PyObject *names, PyObject *lineno_obj);
 
-/* List of weak references to all code objects. The list is used by
-   initfsencoding() to redecode code filenames at startup if the filesystem
-   encoding changes. At initfsencoding() exit, the list is set to NULL and it
-   is no more used. */
-
-extern PyObject *_Py_code_object_list;
-
 #ifdef __cplusplus
 }
 #endif

Modified: python/branches/py3k/Include/unicodeobject.h
==============================================================================
--- python/branches/py3k/Include/unicodeobject.h	(original)
+++ python/branches/py3k/Include/unicodeobject.h	Fri Oct 15 14:04:23 2010
@@ -1193,7 +1193,8 @@
 /* Decode a null-terminated string using Py_FileSystemDefaultEncoding
    and the "surrogateescape" error handler.
 
-   If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
+   If Py_FileSystemDefaultEncoding is not set, fall back to the locale
+   encoding.
 
    Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
 */
@@ -1205,7 +1206,8 @@
 /* Decode a string using Py_FileSystemDefaultEncoding
    and the "surrogateescape" error handler.
 
-   If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
+   If Py_FileSystemDefaultEncoding is not set, fall back to the locale
+   encoding.
 */
 
 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
@@ -1216,7 +1218,8 @@
 /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
    "surrogateescape" error handler, and return bytes.
 
-   If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
+   If Py_FileSystemDefaultEncoding is not set, fall back to the locale
+   encoding.
 */
 
 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Fri Oct 15 14:04:23 2010
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Use locale encoding instead of UTF-8 to encode and decode filenames if
+  Py_FileSystemDefaultEncoding is not set.
+
 - Issue #10095: fp_setreadl() doesn't reopen the file, reuse instead the file
   descriptor.
 

Modified: python/branches/py3k/Objects/codeobject.c
==============================================================================
--- python/branches/py3k/Objects/codeobject.c	(original)
+++ python/branches/py3k/Objects/codeobject.c	Fri Oct 15 14:04:23 2010
@@ -5,8 +5,6 @@
 #define NAME_CHARS \
     "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
 
-PyObject *_Py_code_object_list = NULL;
-
 /* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */
 
 static int
@@ -111,17 +109,6 @@
         co->co_lnotab = lnotab;
         co->co_zombieframe = NULL;
         co->co_weakreflist = NULL;
-
-        if (_Py_code_object_list != NULL) {
-            int err;
-            PyObject *ref = PyWeakref_NewRef((PyObject*)co, NULL);
-            if (ref == NULL)
-                goto error;
-            err = PyList_Append(_Py_code_object_list, ref);
-            Py_DECREF(ref);
-            if (err)
-                goto error;
-        }
     }
     return co;
 

Modified: python/branches/py3k/Objects/object.c
==============================================================================
--- python/branches/py3k/Objects/object.c	(original)
+++ python/branches/py3k/Objects/object.c	Fri Oct 15 14:04:23 2010
@@ -1604,10 +1604,6 @@
     if (PyType_Ready(&PyCode_Type) < 0)
         Py_FatalError("Can't initialize code type");
 
-    _Py_code_object_list = PyList_New(0);
-    if (_Py_code_object_list == NULL)
-        Py_FatalError("Can't initialize code type");
-
     if (PyType_Ready(&PyFrame_Type) < 0)
         Py_FatalError("Can't initialize frame type");
 

Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c	(original)
+++ python/branches/py3k/Objects/unicodeobject.c	Fri Oct 15 14:04:23 2010
@@ -1597,11 +1597,22 @@
                                          "surrogateescape");
     }
     else {
-        /* if you change the default encoding, update also
-           PyUnicode_DecodeFSDefaultAndSize() and redecode_filenames() */
-        return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
-                                    PyUnicode_GET_SIZE(unicode),
-                                    "surrogateescape");
+        /* locale encoding with surrogateescape */
+        wchar_t *wchar;
+        char *bytes;
+        PyObject *bytes_obj;
+
+        wchar = PyUnicode_AsWideCharString(unicode, NULL);
+        if (wchar == NULL)
+            return NULL;
+        bytes = _Py_wchar2char(wchar);
+        PyMem_Free(wchar);
+        if (bytes == NULL)
+            return NULL;
+
+        bytes_obj = PyBytes_FromString(bytes);
+        PyMem_Free(bytes);
+        return bytes_obj;
     }
 }
 
@@ -1769,9 +1780,22 @@
                                 "surrogateescape");
     }
     else {
-        /* if you change the default encoding, update also
-           PyUnicode_EncodeFSDefault() and redecode_filenames() */
-        return PyUnicode_DecodeUTF8(s, size, "surrogateescape");
+        /* locale encoding with surrogateescape */
+        wchar_t *wchar;
+        PyObject *unicode;
+
+        if (s[size] != '\0' || size != strlen(s)) {
+            PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+            return NULL;
+        }
+
+        wchar = _Py_char2wchar(s);
+        if (wchar == NULL)
+            return NULL;
+
+        unicode = PyUnicode_FromWideChar(wchar, -1);
+        PyMem_Free(wchar);
+        return unicode;
     }
 }
 

Modified: python/branches/py3k/Python/pythonrun.c
==============================================================================
--- python/branches/py3k/Python/pythonrun.c	(original)
+++ python/branches/py3k/Python/pythonrun.c	Fri Oct 15 14:04:23 2010
@@ -719,259 +719,6 @@
     }
 }
 
-/* Redecode a filename from the default filesystem encoding (utf-8) to
-   'new_encoding' encoding with 'errors' error handler */
-static PyObject*
-redecode_filename(PyObject *file, const char *new_encoding,
-                  const char *errors)
-{
-    PyObject *file_bytes, *new_file;
-
-    file_bytes = PyUnicode_EncodeFSDefault(file);
-    if (file_bytes == NULL)
-        return NULL;
-    new_file = PyUnicode_Decode(
-        PyBytes_AsString(file_bytes),
-        PyBytes_GET_SIZE(file_bytes),
-        new_encoding,
-        errors);
-    Py_DECREF(file_bytes);
-    return new_file;
-}
-
-/* Redecode a path list */
-static int
-redecode_path_list(PyObject *paths,
-                   const char *new_encoding, const char *errors)
-{
-    PyObject *filename, *new_filename;
-    Py_ssize_t i, size;
-
-    size = PyList_Size(paths);
-    for (i=0; i < size; i++) {
-        filename = PyList_GetItem(paths, i);
-        if (filename == NULL)
-            return -1;
-
-        new_filename = redecode_filename(filename, new_encoding, errors);
-        if (new_filename == NULL)
-            return -1;
-        if (PyList_SetItem(paths, i, new_filename)) {
-            Py_DECREF(new_filename);
-            return -1;
-        }
-    }
-    return 0;
-}
-
-/* Redecode __file__ and __path__ attributes of sys.modules */
-static int
-redecode_sys_modules(const char *new_encoding, const char *errors)
-{
-    PyInterpreterState *interp;
-    PyObject *modules, *values, *file, *new_file, *paths;
-    PyObject *iter = NULL, *module = NULL;
-
-    interp = PyThreadState_GET()->interp;
-    modules = interp->modules;
-
-    values = PyObject_CallMethod(modules, "values", "");
-    if (values == NULL)
-        goto error;
-
-    iter = PyObject_GetIter(values);
-    Py_DECREF(values);
-    if (iter == NULL)
-        goto error;
-
-    while (1)
-    {
-        module = PyIter_Next(iter);
-        if (module == NULL) {
-            if (PyErr_Occurred())
-                goto error;
-            else
-                break;
-        }
-
-        file = PyModule_GetFilenameObject(module);
-        if (file != NULL) {
-            new_file = redecode_filename(file, new_encoding, errors);
-            Py_DECREF(file);
-            if (new_file == NULL)
-                goto error;
-            if (PyObject_SetAttrString(module, "__file__", new_file)) {
-                Py_DECREF(new_file);
-                goto error;
-            }
-            Py_DECREF(new_file);
-        }
-        else
-            PyErr_Clear();
-
-        paths = PyObject_GetAttrString(module, "__path__");
-        if (paths != NULL) {
-            if (redecode_path_list(paths, new_encoding, errors))
-                goto error;
-        }
-        else
-            PyErr_Clear();
-
-        Py_CLEAR(module);
-    }
-    Py_CLEAR(iter);
-    return 0;
-
-error:
-    Py_XDECREF(iter);
-    Py_XDECREF(module);
-    return -1;
-}
-
-/* Redecode sys.path_importer_cache keys */
-static int
-redecode_sys_path_importer_cache(const char *new_encoding, const char *errors)
-{
-    PyObject *path_importer_cache, *items, *item, *path, *importer, *new_path;
-    PyObject *new_cache = NULL, *iter = NULL;
-
-    path_importer_cache = PySys_GetObject("path_importer_cache");
-    if (path_importer_cache == NULL)
-        goto error;
-
-    items = PyObject_CallMethod(path_importer_cache, "items", "");
-    if (items == NULL)
-        goto error;
-
-    iter = PyObject_GetIter(items);
-    Py_DECREF(items);
-    if (iter == NULL)
-        goto error;
-
-    new_cache = PyDict_New();
-    if (new_cache == NULL)
-        goto error;
-
-    while (1)
-    {
-        item = PyIter_Next(iter);
-        if (item == NULL) {
-            if (PyErr_Occurred())
-                goto error;
-            else
-                break;
-        }
-        path = PyTuple_GET_ITEM(item, 0);
-        importer = PyTuple_GET_ITEM(item, 1);
-
-        new_path = redecode_filename(path, new_encoding, errors);
-        if (new_path == NULL)
-            goto error;
-        if (PyDict_SetItem(new_cache, new_path, importer)) {
-            Py_DECREF(new_path);
-            goto error;
-        }
-        Py_DECREF(new_path);
-    }
-    Py_CLEAR(iter);
-    if (PySys_SetObject("path_importer_cache", new_cache))
-        goto error;
-    Py_CLEAR(new_cache);
-    return 0;
-
-error:
-    Py_XDECREF(iter);
-    Py_XDECREF(new_cache);
-    return -1;
-}
-
-/* Redecode co_filename attribute of all code objects */
-static int
-redecode_code_objects(const char *new_encoding, const char *errors)
-{
-    Py_ssize_t i, len;
-    PyCodeObject *co;
-    PyObject *ref, *new_file;
-
-    len = Py_SIZE(_Py_code_object_list);
-    for (i=0; i < len; i++) {
-        ref = PyList_GET_ITEM(_Py_code_object_list, i);
-        co = (PyCodeObject *)PyWeakref_GetObject(ref);
-        if ((PyObject*)co == Py_None)
-            continue;
-        if (co == NULL)
-            return -1;
-
-        new_file = redecode_filename(co->co_filename, new_encoding, errors);
-        if (new_file == NULL)
-            return -1;
-        Py_DECREF(co->co_filename);
-        co->co_filename = new_file;
-    }
-    Py_CLEAR(_Py_code_object_list);
-    return 0;
-}
-
-/* Redecode the filenames of all modules (__file__ and __path__ attributes),
-   all code objects (co_filename attribute), sys.path, sys.meta_path,
-   sys.executable and sys.path_importer_cache (keys) when the filesystem
-   encoding changes from the default encoding (utf-8) to new_encoding */
-static int
-redecode_filenames(const char *new_encoding)
-{
-    char *errors;
-    PyObject *paths, *executable, *new_executable;
-
-    /* PyUnicode_DecodeFSDefault() and PyUnicode_EncodeFSDefault() do already
-       use utf-8 if Py_FileSystemDefaultEncoding is NULL */
-    if (strcmp(new_encoding, "utf-8") == 0)
-        return 0;
-
-    if (strcmp(new_encoding, "mbcs") != 0)
-        errors = "surrogateescape";
-    else
-        errors = NULL;
-
-    /* sys.modules */
-    if (redecode_sys_modules(new_encoding, errors))
-        return -1;
-
-    /* sys.path and sys.meta_path */
-    paths = PySys_GetObject("path");
-    if (paths != NULL) {
-        if (redecode_path_list(paths, new_encoding, errors))
-            return -1;
-    }
-    paths = PySys_GetObject("meta_path");
-    if (paths != NULL) {
-        if (redecode_path_list(paths, new_encoding, errors))
-            return -1;
-    }
-
-    /* sys.executable */
-    executable = PySys_GetObject("executable");
-    if (executable == NULL)
-        return -1;
-    new_executable = redecode_filename(executable, new_encoding, errors);
-    if (new_executable == NULL)
-        return -1;
-    if (PySys_SetObject("executable", new_executable)) {
-        Py_DECREF(new_executable);
-        return -1;
-    }
-    Py_DECREF(new_executable);
-
-    /* sys.path_importer_cache */
-    if (redecode_sys_path_importer_cache(new_encoding, errors))
-        return -1;
-
-    /* code objects */
-    if (redecode_code_objects(new_encoding, errors))
-        return -1;
-
-    return 0;
-}
-
 static void
 initfsencoding(void)
 {
@@ -987,11 +734,8 @@
            stdin and stdout if these are terminals.  */
         codeset = get_codeset();
         if (codeset != NULL) {
-            if (redecode_filenames(codeset))
-                Py_FatalError("Py_Initialize: can't redecode filenames");
             Py_FileSystemDefaultEncoding = codeset;
             Py_HasFileSystemDefaultEncoding = 0;
-            Py_CLEAR(_Py_code_object_list);
             return;
         } else {
             fprintf(stderr, "Unable to get the locale encoding:\n");
@@ -1004,8 +748,6 @@
     }
 #endif
 
-    Py_CLEAR(_Py_code_object_list);
-
     /* the encoding is mbcs, utf-8 or ascii */
     codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
     if (!codec) {


More information about the Python-checkins mailing list