Author: martin.v.loewis
Date: Sun Jun 1 09:20:46 2008
New Revision: 63846
Log:
New environment variable PYTHONIOENCODING.
Modified:
python/trunk/Doc/c-api/file.rst
python/trunk/Doc/library/stdtypes.rst
python/trunk/Doc/using/cmdline.rst
python/trunk/Include/fileobject.h
python/trunk/Lib/test/test_sys.py
python/trunk/Misc/NEWS
python/trunk/Modules/main.c
python/trunk/Objects/fileobject.c
python/trunk/Python/pythonrun.c
python/trunk/Python/sysmodule.c
Modified: python/trunk/Doc/c-api/file.rst
==============================================================================
--- python/trunk/Doc/c-api/file.rst (original)
+++ python/trunk/Doc/c-api/file.rst Sun Jun 1 09:20:46 2008
@@ -130,6 +130,14 @@
.. versionadded:: 2.3
+.. cfunction:: int PyFile_SetEncodingAndErrors(PyFileObject *p, const char *enc, *errors)
+
+ Set the file's encoding for Unicode output to *enc*, and its error
+ mode to *err*. Return 1 on success and 0 on failure.
+
+ .. versionadded:: 2.6
+
+
.. cfunction:: int PyFile_SoftSpace(PyObject *p, int newflag)
.. index:: single: softspace (file attribute)
Modified: python/trunk/Doc/library/stdtypes.rst
==============================================================================
--- python/trunk/Doc/library/stdtypes.rst (original)
+++ python/trunk/Doc/library/stdtypes.rst Sun Jun 1 09:20:46 2008
@@ -2165,6 +2165,13 @@
.. versionadded:: 2.3
+.. attribute:: file.errors
+
+ The Unicode error handler used to along with the encoding.
+
+ .. versionadded:: 2.6
+
+
.. attribute:: file.mode
The I/O mode for the file. If the file was created using the :func:`open`
Modified: python/trunk/Doc/using/cmdline.rst
==============================================================================
--- python/trunk/Doc/using/cmdline.rst (original)
+++ python/trunk/Doc/using/cmdline.rst Sun Jun 1 09:20:46 2008
@@ -481,6 +481,13 @@
.. versionadded:: 2.6
+.. envvar:: PYTHONIOENCODING
+
+ Overrides the encoding used for stdin/stdout/stderr, in the syntax
+ encodingname:errorhandler, with the :errors part being optional.
+
+ .. versionadded:: 2.6
+
.. envvar:: PYTHONNOUSERSITE
Modified: python/trunk/Include/fileobject.h
==============================================================================
--- python/trunk/Include/fileobject.h (original)
+++ python/trunk/Include/fileobject.h Sun Jun 1 09:20:46 2008
@@ -24,6 +24,7 @@
int f_newlinetypes; /* Types of newlines seen */
int f_skipnextlf; /* Skip next \n */
PyObject *f_encoding;
+ PyObject *f_errors;
PyObject *weakreflist; /* List of weak references */
int unlocked_count; /* Num. currently running sections of code
using f_fp with the GIL released. */
@@ -37,6 +38,7 @@
PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *);
PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int);
PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *);
+PyAPI_FUNC(int) PyFile_SetEncodingAndErrors(PyObject *, const char *, char *errors);
PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *,
int (*)(FILE *));
PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *);
Modified: python/trunk/Lib/test/test_sys.py
==============================================================================
--- python/trunk/Lib/test/test_sys.py (original)
+++ python/trunk/Lib/test/test_sys.py Sun Jun 1 09:20:46 2008
@@ -385,6 +385,26 @@
## self.assert_(r[0][2] > 100, r[0][2])
## self.assert_(r[1][2] > 100, r[1][2])
+ def test_ioencoding(self):
+ import subprocess,os
+ env = dict(os.environ)
+
+ # Test character: cent sign, encoded as 0x4A (ASCII J) in CP424,
+ # not representable in ASCII.
+
+ env["PYTHONIOENCODING"] = "cp424"
+ p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
+ stdout = subprocess.PIPE, env=env)
+ out = p.stdout.read().strip()
+ self.assertEqual(out, unichr(0xa2).encode("cp424"))
+
+ env["PYTHONIOENCODING"] = "ascii:replace"
+ p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
+ stdout = subprocess.PIPE, env=env)
+ out = p.stdout.read().strip()
+ self.assertEqual(out, '?')
+
+
def test_main():
test.test_support.run_unittest(SysModuleTest)
Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS (original)
+++ python/trunk/Misc/NEWS Sun Jun 1 09:20:46 2008
@@ -12,6 +12,8 @@
Core and Builtins
-----------------
+- New environment variable PYTHONIOENCODING.
+
- Patch #2488: Add sys.maxsize.
- Issue #2353: file.xreadlines() now emits a Py3k warning.
Modified: python/trunk/Modules/main.c
==============================================================================
--- python/trunk/Modules/main.c (original)
+++ python/trunk/Modules/main.c Sun Jun 1 09:20:46 2008
@@ -99,6 +99,7 @@
PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
The default module search path uses %s.\n\
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
+PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
";
Modified: python/trunk/Objects/fileobject.c
==============================================================================
--- python/trunk/Objects/fileobject.c (original)
+++ python/trunk/Objects/fileobject.c Sun Jun 1 09:20:46 2008
@@ -155,6 +155,7 @@
Py_DECREF(f->f_name);
Py_DECREF(f->f_mode);
Py_DECREF(f->f_encoding);
+ Py_DECREF(f->f_errors);
Py_INCREF(name);
f->f_name = name;
@@ -170,6 +171,8 @@
f->f_skipnextlf = 0;
Py_INCREF(Py_None);
f->f_encoding = Py_None;
+ Py_INCREF(Py_None);
+ f->f_errors = Py_None;
if (f->f_mode == NULL)
return NULL;
@@ -435,19 +438,38 @@
}
/* Set the encoding used to output Unicode strings.
- Returh 1 on success, 0 on failure. */
+ Return 1 on success, 0 on failure. */
int
PyFile_SetEncoding(PyObject *f, const char *enc)
{
+ return PyFile_SetEncodingAndErrors(f, enc, NULL);
+}
+
+int
+PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
+{
PyFileObject *file = (PyFileObject*)f;
- PyObject *str = PyBytes_FromString(enc);
+ PyObject *str, *oerrors;
assert(PyFile_Check(f));
+ str = PyBytes_FromString(enc);
if (!str)
return 0;
+ if (errors) {
+ oerrors = PyString_FromString(errors);
+ if (!oerrors) {
+ Py_DECREF(str);
+ return 0;
+ }
+ } else {
+ oerrors = Py_None;
+ Py_INCREF(Py_None);
+ }
Py_DECREF(file->f_encoding);
file->f_encoding = str;
+ Py_DECREF(file->f_errors);
+ file->f_errors = oerrors;
return 1;
}
@@ -491,6 +513,7 @@
Py_XDECREF(f->f_name);
Py_XDECREF(f->f_mode);
Py_XDECREF(f->f_encoding);
+ Py_XDECREF(f->f_errors);
drop_readahead(f);
Py_TYPE(f)->tp_free((PyObject *)f);
}
@@ -1879,6 +1902,8 @@
"file name"},
{"encoding", T_OBJECT, OFF(f_encoding), RO,
"file encoding"},
+ {"errors", T_OBJECT, OFF(f_errors), RO,
+ "Unicode error handler"},
/* getattr(f, "closed") is implemented without this table */
{NULL} /* Sentinel */
};
@@ -2093,6 +2118,8 @@
((PyFileObject *)self)->f_mode = not_yet_string;
Py_INCREF(Py_None);
((PyFileObject *)self)->f_encoding = Py_None;
+ Py_INCREF(Py_None);
+ ((PyFileObject *)self)->f_errors = Py_None;
((PyFileObject *)self)->weakreflist = NULL;
((PyFileObject *)self)->unlocked_count = 0;
}
@@ -2295,7 +2322,9 @@
if ((flags & Py_PRINT_RAW) &&
PyUnicode_Check(v) && enc != Py_None) {
char *cenc = PyBytes_AS_STRING(enc);
- value = PyUnicode_AsEncodedString(v, cenc, "strict");
+ char *errors = fobj->f_errors == Py_None ?
+ "strict" : PyBytes_AS_STRING(fobj->f_errors);
+ value = PyUnicode_AsEncodedString(v, cenc, errors);
if (value == NULL)
return -1;
} else {
Modified: python/trunk/Python/pythonrun.c
==============================================================================
--- python/trunk/Python/pythonrun.c (original)
+++ python/trunk/Python/pythonrun.c Sun Jun 1 09:20:46 2008
@@ -132,11 +132,20 @@
PyThreadState *tstate;
PyObject *bimod, *sysmod;
char *p;
+ char *icodeset; /* On Windows, input codeset may theoretically
+ differ from output codeset. */
+ char *codeset = NULL;
+ char *errors = NULL;
+ int free_codeset = 0;
+ int overridden = 0;
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
- char *codeset;
- char *saved_locale;
+ char *saved_locale, *loc_codeset;
PyObject *sys_stream, *sys_isatty;
#endif
+#ifdef MS_WINDOWS
+ char ibuf[128];
+ char buf[128];
+#endif
extern void _Py_ReadyTypes(void);
if (initialized)
@@ -238,38 +247,75 @@
_PyGILState_Init(interp, tstate);
#endif /* WITH_THREAD */
+ if ((p = Py_GETENV("PYTHONIOENCODING")) && *p != '\0') {
+ p = icodeset = codeset = strdup(p);
+ free_codeset = 1;
+ errors = strchr(p, ':');
+ if (errors) {
+ *errors = '\0';
+ errors++;
+ }
+ overridden = 1;
+ }
+
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
/* On Unix, set the file system encoding according to the
user's preference, if the CODESET names a well-known
Python codec, and Py_FileSystemDefaultEncoding isn't
initialized by other means. Also set the encoding of
- stdin and stdout if these are terminals. */
+ stdin and stdout if these are terminals, unless overridden. */
- saved_locale = strdup(setlocale(LC_CTYPE, NULL));
- setlocale(LC_CTYPE, "");
- codeset = nl_langinfo(CODESET);
- if (codeset && *codeset) {
- PyObject *enc = PyCodec_Encoder(codeset);
- if (enc) {
- codeset = strdup(codeset);
- Py_DECREF(enc);
- } else {
- codeset = NULL;
- PyErr_Clear();
+ if (!overridden || !Py_FileSystemDefaultEncoding) {
+ saved_locale = strdup(setlocale(LC_CTYPE, NULL));
+ setlocale(LC_CTYPE, "");
+ loc_codeset = nl_langinfo(CODESET);
+ if (loc_codeset && *loc_codeset) {
+ PyObject *enc = PyCodec_Encoder(loc_codeset);
+ if (enc) {
+ loc_codeset = strdup(loc_codeset);
+ Py_DECREF(enc);
+ } else {
+ loc_codeset = NULL;
+ PyErr_Clear();
+ }
+ } else
+ loc_codeset = NULL;
+ setlocale(LC_CTYPE, saved_locale);
+ free(saved_locale);
+
+ if (!overridden) {
+ codeset = icodeset = loc_codeset;
+ free_codeset = 1;
+ }
+
+ /* Initialize Py_FileSystemDefaultEncoding from
+ locale even if PYTHONIOENCODING is set. */
+ if (!Py_FileSystemDefaultEncoding) {
+ Py_FileSystemDefaultEncoding = loc_codeset;
+ if (!overridden)
+ free_codeset = 0;
}
- } else
- codeset = NULL;
- setlocale(LC_CTYPE, saved_locale);
- free(saved_locale);
+ }
+#endif
+
+#ifdef MS_WINDOWS
+ if (!overridden) {
+ icodeset = ibuf;
+ encoding = buf;
+ sprintf(ibuf, "cp%d", GetConsoleCP());
+ sprintf(buf, "cp%d", GetConsoleOutputCP());
+ }
+#endif
if (codeset) {
sys_stream = PySys_GetObject("stdin");
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
- if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+ if ((overridden ||
+ (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) {
- if (!PyFile_SetEncoding(sys_stream, codeset))
+ if (!PyFile_SetEncodingAndErrors(sys_stream, icodeset, errors))
Py_FatalError("Cannot set codeset of stdin");
}
Py_XDECREF(sys_isatty);
@@ -278,9 +324,10 @@
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
- if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+ if ((overridden ||
+ (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) {
- if (!PyFile_SetEncoding(sys_stream, codeset))
+ if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
Py_FatalError("Cannot set codeset of stdout");
}
Py_XDECREF(sys_isatty);
@@ -289,19 +336,17 @@
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
- if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+ if((overridden ||
+ (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) {
- if (!PyFile_SetEncoding(sys_stream, codeset))
+ if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
Py_FatalError("Cannot set codeset of stderr");
}
Py_XDECREF(sys_isatty);
- if (!Py_FileSystemDefaultEncoding)
- Py_FileSystemDefaultEncoding = codeset;
- else
+ if (free_codeset)
free(codeset);
}
-#endif
}
void
Modified: python/trunk/Python/sysmodule.c
==============================================================================
--- python/trunk/Python/sysmodule.c (original)
+++ python/trunk/Python/sysmodule.c Sun Jun 1 09:20:46 2008
@@ -1232,9 +1232,6 @@
PyObject *m, *v, *sysdict;
PyObject *sysin, *sysout, *syserr;
char *s;
-#ifdef MS_WINDOWS
- char buf[128];
-#endif
m = Py_InitModule3("sys", sys_methods, sys_doc);
if (m == NULL)
@@ -1272,23 +1269,6 @@
syserr = PyFile_FromFile(stderr, "<stderr>", "w", _check_and_flush);
if (PyErr_Occurred())
return NULL;
-#ifdef MS_WINDOWS
- if(isatty(_fileno(stdin)) && PyFile_Check(sysin)) {
- sprintf(buf, "cp%d", GetConsoleCP());
- if (!PyFile_SetEncoding(sysin, buf))
- return NULL;
- }
- if(isatty(_fileno(stdout)) && PyFile_Check(sysout)) {
- sprintf(buf, "cp%d", GetConsoleOutputCP());
- if (!PyFile_SetEncoding(sysout, buf))
- return NULL;
- }
- if(isatty(_fileno(stderr)) && PyFile_Check(syserr)) {
- sprintf(buf, "cp%d", GetConsoleOutputCP());
- if (!PyFile_SetEncoding(syserr, buf))
- return NULL;
- }
-#endif
PyDict_SetItemString(sysdict, "stdin", sysin);
PyDict_SetItemString(sysdict, "stdout", sysout);