[Python-checkins] r81190 - in python/branches/py3k: Doc/library/sys.rst Misc/NEWS Python/bltinmodule.c Python/pythonrun.c

victor.stinner python-checkins at python.org
Sat May 15 14:27:16 CEST 2010


Author: victor.stinner
Date: Sat May 15 14:27:16 2010
New Revision: 81190

Log:
Issue #8610: Load file system codec at startup, and display a fatal error on
failure. Set the file system encoding to utf-8 (instead of None) if getting
the locale encoding failed, or if nl_langinfo(CODESET) function is missing.


Modified:
   python/branches/py3k/Doc/library/sys.rst
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Python/bltinmodule.c
   python/branches/py3k/Python/pythonrun.c

Modified: python/branches/py3k/Doc/library/sys.rst
==============================================================================
--- python/branches/py3k/Doc/library/sys.rst	(original)
+++ python/branches/py3k/Doc/library/sys.rst	Sat May 15 14:27:16 2010
@@ -298,15 +298,13 @@
 
 .. function:: getfilesystemencoding()
 
-   Return the name of the encoding used to convert Unicode filenames into system
-   file names, or ``None`` if the system default encoding is used. The result value
-   depends on the operating system:
+   Return the name of the encoding used to convert Unicode filenames into
+   system file names. The result value depends on the operating system:
 
    * On Mac OS X, the encoding is ``'utf-8'``.
 
    * On Unix, the encoding is the user's preference according to the result of
-     nl_langinfo(CODESET), or ``None`` if the ``nl_langinfo(CODESET)``
-     failed.
+     nl_langinfo(CODESET), or ``'utf-8'`` if ``nl_langinfo(CODESET)`` failed.
 
    * On Windows NT+, file names are Unicode natively, so no conversion is
      performed. :func:`getfilesystemencoding` still returns ``'mbcs'``, as
@@ -316,6 +314,10 @@
 
    * On Windows 9x, the encoding is ``'mbcs'``.
 
+   .. versionchanged:: 3.2
+      On Unix, use ``'utf-8'`` instead of ``None`` if ``nl_langinfo(CODESET)``
+      failed. :func:`getfilesystemencoding` result cannot be ``None``.
+
 
 .. function:: getrefcount(object)
 

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Sat May 15 14:27:16 2010
@@ -12,6 +12,10 @@
 Core and Builtins
 -----------------
 
+- Issue #8610: Load file system codec at startup, and display a fatal error on
+  failure. Set the file system encoding to utf-8 (instead of None) if getting
+  the locale encoding failed, or if nl_langinfo(CODESET) function is missing.
+
 - PyFile_FromFd() uses PyUnicode_DecodeFSDefault() instead of
   PyUnicode_FromString() to support surrogates in the filename and use the
   right encoding

Modified: python/branches/py3k/Python/bltinmodule.c
==============================================================================
--- python/branches/py3k/Python/bltinmodule.c	(original)
+++ python/branches/py3k/Python/bltinmodule.c	Sat May 15 14:27:16 2010
@@ -9,6 +9,10 @@
 
 #include <ctype.h>
 
+#ifdef HAVE_LANGINFO_H
+#include <langinfo.h>   /* CODESET */
+#endif
+
 /* The default encoding used by the platform file system APIs
    Can remain NULL for all platforms that don't have such a concept
 
@@ -21,9 +25,12 @@
 #elif defined(__APPLE__)
 const char *Py_FileSystemDefaultEncoding = "utf-8";
 int Py_HasFileSystemDefaultEncoding = 1;
-#else
-const char *Py_FileSystemDefaultEncoding = NULL; /* use default */
+#elif defined(HAVE_LANGINFO_H) && defined(CODESET)
+const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
 int Py_HasFileSystemDefaultEncoding = 0;
+#else
+const char *Py_FileSystemDefaultEncoding = "utf-8";
+int Py_HasFileSystemDefaultEncoding = 1;
 #endif
 
 int

Modified: python/branches/py3k/Python/pythonrun.c
==============================================================================
--- python/branches/py3k/Python/pythonrun.c	(original)
+++ python/branches/py3k/Python/pythonrun.c	Sat May 15 14:27:16 2010
@@ -57,6 +57,7 @@
 
 /* Forward */
 static void initmain(void);
+static void initfsencoding(void);
 static void initsite(void);
 static int initstdio(void);
 static void flush_io(void);
@@ -159,7 +160,6 @@
 
 error:
     Py_XDECREF(codec);
-    PyErr_Clear();
     return NULL;
 }
 #endif
@@ -171,9 +171,6 @@
     PyThreadState *tstate;
     PyObject *bimod, *sysmod, *pstderr;
     char *p;
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
-    char *codeset;
-#endif
     extern void _Py_ReadyTypes(void);
 
     if (initialized)
@@ -264,21 +261,7 @@
 
     _PyImportHooks_Init();
 
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
-    /* On Unix, set the file system encoding according to the
-       user's preference, if the CODESET names a well-known
-       Python codec, and Py_FileSystemDefaultEncoding isn't
-       initialized by other means. Also set the encoding of
-       stdin and stdout if these are terminals.  */
-
-    codeset = get_codeset();
-    if (codeset) {
-        if (!Py_FileSystemDefaultEncoding)
-            Py_FileSystemDefaultEncoding = codeset;
-        else
-            free(codeset);
-    }
-#endif
+    initfsencoding();
 
     if (install_sigs)
         initsigs(); /* Signal handling stuff, including initintr() */
@@ -496,7 +479,7 @@
     _PyUnicode_Fini();
 
     /* reset file system default encoding */
-    if (!Py_HasFileSystemDefaultEncoding) {
+    if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
         free((char*)Py_FileSystemDefaultEncoding);
         Py_FileSystemDefaultEncoding = NULL;
     }
@@ -707,6 +690,45 @@
     }
 }
 
+static void
+initfsencoding(void)
+{
+    PyObject *codec;
+#if defined(HAVE_LANGINFO_H) && defined(CODESET)
+    char *codeset;
+
+    /* On Unix, set the file system encoding according to the
+       user's preference, if the CODESET names a well-known
+       Python codec, and Py_FileSystemDefaultEncoding isn't
+       initialized by other means. Also set the encoding of
+       stdin and stdout if these are terminals.  */
+    codeset = get_codeset();
+    if (codeset != NULL) {
+        Py_FileSystemDefaultEncoding = codeset;
+        Py_HasFileSystemDefaultEncoding = 0;
+        return;
+    }
+
+    PyErr_Clear();
+    fprintf(stderr,
+            "Unable to get the locale encoding: "
+            "fallback to utf-8\n");
+    Py_FileSystemDefaultEncoding = "utf-8";
+    Py_HasFileSystemDefaultEncoding = 1;
+#endif
+
+    /* the encoding is mbcs, utf-8 or ascii */
+    codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
+    if (!codec) {
+        /* Such error can only occurs in critical situations: no more
+         * memory, import a module of the standard library failed,
+         * etc. */
+        Py_FatalError("Py_Initialize: unable to load the file system codec");
+    } else {
+        Py_DECREF(codec);
+    }
+}
+
 /* Import the site module (not into __main__ though) */
 
 static void


More information about the Python-checkins mailing list