[Jython-checkins] jython: Add PYTHONIOENCODING env var addressing issue #1876, and -E option to suppress.
jeff.allen
jython-checkins at python.org
Sun Feb 9 21:22:03 CET 2014
http://hg.python.org/jython/rev/6e438088c0e3
changeset: 7181:6e438088c0e3
user: Jeff Allen <ja.py at farowl.co.uk>
date: Sun Feb 09 19:26:34 2014 +0000
summary:
Add PYTHONIOENCODING env var addressing issue #1876, and -E option to suppress.
Also introduces registry items python.io.encoding and python.io.errors, with appropriate sequence
of priority for site, user, environment variable and command-line values.
Additions to test.test_sys (from CPython 2.7) and test.test_sys_jy for registry items.
files:
Lib/test/test_sys.py | 20 ++
Lib/test/test_sys_jy.py | 63 ++++++-
NEWS | 4 +
src/org/python/core/Console.java | 8 +
src/org/python/core/Options.java | 1 -
src/org/python/core/PlainConsole.java | 9 +-
src/org/python/core/PyFile.java | 21 +-
src/org/python/core/PySystemState.java | 54 ++++-
src/org/python/core/StdoutWrapper.java | 5 +-
src/org/python/util/jython.java | 125 ++++++++++--
10 files changed, 256 insertions(+), 54 deletions(-)
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -251,6 +251,26 @@
self.assert_(vi[3] in ("alpha", "beta", "candidate", "final"))
self.assert_(isinstance(vi[4], int))
+ def test_ioencoding(self): # from v2.7 test
+ import subprocess,os
+ env = dict(os.environ)
+
+ # Test character: cent sign, encoded as 0x4A (ASCII J) in CP424,
+ # not representable in ASCII.
+
+ env["PYTHONIOENCODING"] = "cp424"
+ p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
+ stdout = subprocess.PIPE, env=env)
+ out = p.stdout.read().strip()
+ self.assertEqual(out, unichr(0xa2).encode("cp424"))
+
+ env["PYTHONIOENCODING"] = "ascii:replace"
+ p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
+ stdout = subprocess.PIPE, env=env)
+ out = p.stdout.read().strip()
+ self.assertEqual(out, '?')
+
+
def test_main():
if test.test_support.is_jython:
del SysModuleTest.test_lost_displayhook
diff --git a/Lib/test/test_sys_jy.py b/Lib/test/test_sys_jy.py
--- a/Lib/test/test_sys_jy.py
+++ b/Lib/test/test_sys_jy.py
@@ -1,3 +1,4 @@
+# -*- coding: iso-8859-1 -*-
from __future__ import with_statement
import os
import re
@@ -185,13 +186,69 @@
finally:
os.rmdir(moduleDir)
self.assertFalse(os.path.exists(moduleDir))
-
+
+class SysEncodingTest(unittest.TestCase):
+
+ # Adapted from CPython 2.7 test_sys to exercise setting Jython registry
+ # values related to encoding and error policy.
+
+ def test_ioencoding(self): # adapted from CPython v2.7 test_sys
+ import subprocess, os
+ env = dict(os.environ)
+
+ def check(code, encoding=None, errors=None):
+ # Execute with encoding and errors optionally set via Java properties
+ command = [sys.executable]
+ if (encoding):
+ command.append('-Dpython.io.encoding={}'.format(encoding))
+ if (errors):
+ command.append('-Dpython.io.errors={}'.format(errors))
+ command.append('-c')
+ command.append('print unichr({:#x})'.format(code))
+ #print "\n ", " ".join(command), " ... ",
+ p = subprocess.Popen(command, stdout = subprocess.PIPE, env=env)
+ return p.stdout.read().strip()
+
+ env.pop("PYTHONIOENCODING", None)
+ self.assertEqual(check(ord(u'A')), b"A")
+
+ # Test character: U+00a2 cent sign (¢) is:
+ # not representable in ASCII.
+ # xml: ¢
+ # cp1252: a2
+ # cp850: bd
+ # cp424: 4a
+ # utf-8: c2 a2
+
+ self.assertEqual(check(0xa2, "iso-8859-1"), "¢") # same as this file
+
+ # self.assertEqual(check(0xa2, "ascii"), "") # and an error message
+ self.assertEqual(check(0xa2, "ascii", "ignore"),"")
+ self.assertEqual(check(0xa2, "ascii", "replace"), "?")
+ self.assertEqual(check(0xa2, "ascii", "backslashreplace"), r"\xa2")
+ self.assertEqual(check(0xa2, "ascii", "xmlcharrefreplace"), "¢")
+
+ self.assertEqual(check(0xa2, "Cp1252"), "\xa2")
+ self.assertEqual(check(0xa2, "Cp424"), "\x4a")
+ self.assertEqual(check(0xa2, "utf-8"), "\xc2\xa2")
+
+ self.assertEqual(check(0xa2, "iso8859-5", "backslashreplace"), r"\xa2")
+
+ # Now check that PYTHONIOENCODING can be superseded piecemeal
+ env["PYTHONIOENCODING"] = "ascii:xmlcharrefreplace"
+ self.assertEqual(check(0xa2, "iso8859-5"), "¢")
+ self.assertEqual(check(0xa2, None, "backslashreplace"), r"\xa2")
+ self.assertEqual(check(0xa2, "cp850"), "\xbd")
+
def test_main():
- test_support.run_unittest(SysTest,
+ test_support.run_unittest(
+ SysTest,
ShadowingTest,
SyspathResourceTest,
- SyspathUnicodeTest)
+ SyspathUnicodeTest,
+ SysEncodingTest,
+ )
if __name__ == "__main__":
test_main()
diff --git a/NEWS b/NEWS
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,7 @@
- [ 1753 ] zlib doesn't call end() on compress and decompress
- [ 1860 ] test failures in test_array.py
- [ 1862 ] cStringIO does not support arrays as arguments
+ - [ 1876 ] PYTHONIOENCODING unsupported, used (among others) by PyDev
- [ 1926 ] Adjust MutableSet.pop test so we do not need to skip it
- [ 1964 ] time.strptime() does not support %f in format
- [ 2005 ] threading.Event object's wait([timeout]) function returns null instead of True/False.
@@ -18,6 +19,9 @@
- [ 2075 ] Incorrect padding for hex format strings
- [ 2082 ] Unexpected (Pdb) prompt during regression tests
- [ 2083 ] os.unlink() can delete directories
+ New Features
+ - Command line option -E (ignore environment variables)
+ - Environment variable PYTHONIOENCODING, and corresponding registry items
Jython 2.7b1
Bugs Fixed
diff --git a/src/org/python/core/Console.java b/src/org/python/core/Console.java
--- a/src/org/python/core/Console.java
+++ b/src/org/python/core/Console.java
@@ -2,6 +2,7 @@
package org.python.core;
import java.io.IOException;
+import java.nio.charset.Charset;
/**
* A class named in configuration as the value of <code>python.console</code> must implement this
@@ -29,4 +30,11 @@
*/
public void uninstall() throws UnsupportedOperationException;
+ /**
+ * Accessor for encoding to use for line input as a <code>Charset</code>.
+ *
+ * @return Charset of the encoding in use.
+ */
+ public Charset getEncodingCharset();
+
}
diff --git a/src/org/python/core/Options.java b/src/org/python/core/Options.java
--- a/src/org/python/core/Options.java
+++ b/src/org/python/core/Options.java
@@ -83,7 +83,6 @@
public static boolean dont_write_bytecode = false;
/** Whether -E (ignore environment) was enabled via the command line. */
- //XXX: place holder, not implemented yet.
public static boolean ignore_environment = false;
//XXX: place holder, not implemented yet.
diff --git a/src/org/python/core/PlainConsole.java b/src/org/python/core/PlainConsole.java
--- a/src/org/python/core/PlainConsole.java
+++ b/src/org/python/core/PlainConsole.java
@@ -37,7 +37,7 @@
* must be one supported by the JVM. The PlainConsole does not replace <code>System.in</code> or
* <code>System.out</code>, and does not add any line-editing capability to what is standard for
* your OS console.
- *
+ *
* @param encoding name of a supported encoding or <code>null</code> for
* <code>Charset.defaultCharset()</code>
*/
@@ -59,7 +59,7 @@
* A <code>PlainConsole</code> may be uninstalled. This method assumes any sub-class may not be
* uninstalled. Sub-classes that permit themselves to be uninstalled <b>must</b> override (and
* not call) this method.
- *
+ *
* @throws UnsupportedOperationException unless this class is exactly <code>PlainConsole</code>
*/
@Override
@@ -71,4 +71,9 @@
}
}
+ @Override
+ public Charset getEncodingCharset() {
+ return encodingCharset;
+ }
+
}
diff --git a/src/org/python/core/PyFile.java b/src/org/python/core/PyFile.java
--- a/src/org/python/core/PyFile.java
+++ b/src/org/python/core/PyFile.java
@@ -48,6 +48,9 @@
@ExposedGet(doc = BuiltinDocs.file_encoding_doc)
public String encoding;
+ @ExposedGet(doc = BuiltinDocs.file_errors_doc)
+ public String errors;
+
/** Indicator dictating whether a space should be written to this
* file on the next print statement (not currently implemented in
* print ) */
@@ -170,6 +173,18 @@
}
/**
+ * Set the strings defining the encoding and error handling policy. Setting these strings
+ * affects behaviour of the {@link #writelines(PyObject)} when passed a {@link PyUnicode} value.
+ *
+ * @param encoding the <code>encoding</code> property of <code>file</code>.
+ * @param errors the <code>errors</code> property of <code>file</code> (or <code>null</code>).
+ */
+ void setEncoding(String encoding, String errors) {
+ this.encoding = encoding;
+ this.errors = errors;
+ }
+
+ /**
* Wrap the given RawIOBase with a BufferedIOBase according to the
* mode and given bufsize.
*
@@ -446,13 +461,13 @@
*
* @param obj to write
* @param message for TypeError if raised (or null for default message)
- * @return bytes representing tha value (as a String in the Jython convention)
+ * @return bytes representing the value (as a String in the Jython convention)
*/
private String asWritable(PyObject obj, String message) {
if (obj instanceof PyUnicode) {
- // By convention, use platform default encoding to bytes
- return ((PyUnicode)obj).encode();
+ // Unicode must be encoded into bytes (null arguments here invoke the default values)
+ return ((PyUnicode)obj).encode(encoding, errors);
} else if (obj instanceof PyString) {
// Take a short cut
diff --git a/src/org/python/core/PySystemState.java b/src/org/python/core/PySystemState.java
--- a/src/org/python/core/PySystemState.java
+++ b/src/org/python/core/PySystemState.java
@@ -53,6 +53,8 @@
public static final String PYTHON_CACHEDIR = "python.cachedir";
public static final String PYTHON_CACHEDIR_SKIP = "python.cachedir.skip";
public static final String PYTHON_CONSOLE_ENCODING = "python.console.encoding";
+ public static final String PYTHON_IO_ENCODING = "python.io.encoding";
+ public static final String PYTHON_IO_ERRORS = "python.io.errors";
protected static final String CACHEDIR_DEFAULT_NAME = "cachedir";
public static final String JYTHON_JAR = "jython.jar";
@@ -256,18 +258,25 @@
}
}
+ /**
+ * Initialise the encoding of <code>sys.stdin</code>, <code>sys.stdout</code>, and
+ * <code>sys.stderr</code>, and their error handling policy, from registry variables.
+ * Under the console app util.jython, values reflect PYTHONIOENCODING if not overridden.
+ * Note that the encoding must name a Python codec, as in <code>codecs.encode()</code>.
+ */
private void initEncoding() {
- String encoding = registry.getProperty(PYTHON_CONSOLE_ENCODING);
- if (encoding == null) {
- return;
+ // Two registry variables, counterparts to PYTHONIOENCODING = [encoding][:errors]
+ String encoding = registry.getProperty(PYTHON_IO_ENCODING);
+ String errors = registry.getProperty(PYTHON_IO_ERRORS);
+
+ if (encoding==null) {
+ // We still don't have an explicit selection for this: match the console.
+ encoding = Py.getConsole().getEncodingCharset().name();
}
- for (PyFile stdStream : new PyFile[] {(PyFile)this.stdin, (PyFile)this.stdout,
- (PyFile)this.stderr}) {
- if (stdStream.isatty()) {
- stdStream.encoding = encoding;
- }
- }
+ ((PyFile)stdin).setEncoding(encoding, errors);
+ ((PyFile)stdout).setEncoding(encoding, errors);
+ ((PyFile)stderr).setEncoding(encoding, "backslashreplace");
}
// might be nice to have something general here, but for now these
@@ -683,6 +692,8 @@
} catch (SecurityException e) {
// Continue
}
+
+ // Now the post properties (possibly set by custom JythonInitializer).
registry.putAll(postProperties);
if (standalone) {
// set default standalone property (if not yet set)
@@ -690,24 +701,34 @@
registry.put(PYTHON_CACHEDIR_SKIP, "true");
}
}
+
+ /*
+ * The console encoding is the one used by line-editing consoles to decode on the OS side and
+ * encode on the Python side. It must be a Java codec name, so any relationship to
+ * python.io.encoding is dubious.
+ */
if (!registry.containsKey(PYTHON_CONSOLE_ENCODING)) {
String encoding = getPlatformEncoding();
if (encoding != null) {
registry.put(PYTHON_CONSOLE_ENCODING, encoding);
}
}
+
// Set up options from registry
Options.setFromRegistry();
}
/**
- * @return the encoding of the underlying platform; can be <code>null</code>
+ * Return the encoding of the underlying platform, if we can work it out by any means at all.
+ *
+ * @return the encoding of the underlying platform
*/
private static String getPlatformEncoding() {
// first try to grab the Console encoding
String encoding = getConsoleEncoding();
if (encoding == null) {
try {
+ // Not quite the console encoding (differs on Windows)
encoding = System.getProperty("file.encoding");
} catch (SecurityException se) {
// ignore, can't do anything about it
@@ -722,7 +743,7 @@
private static String getConsoleEncoding() {
String encoding = null;
try {
- Method encodingMethod = Console.class.getDeclaredMethod("encoding");
+ Method encodingMethod = java.io.Console.class.getDeclaredMethod("encoding");
encodingMethod.setAccessible(true); // private static method
encoding = (String)encodingMethod.invoke(Console.class);
} catch (Exception e) {
@@ -731,6 +752,12 @@
return encoding;
}
+ /**
+ * Merge the contents of a property file into the registry without overriding any values already
+ * set there.
+ *
+ * @param file
+ */
private static void addRegistryFile(File file) {
if (file.exists()) {
if (!file.isDirectory()) {
@@ -922,9 +949,6 @@
}
Py.initClassExceptions(getDefaultBuiltins());
- // defaultSystemState can't init its own encoding, see its constructor
- Py.defaultSystemState.initEncoding();
-
// Make sure that Exception classes have been loaded
new PySyntaxError("", 1, 1, "", "");
@@ -1077,7 +1101,7 @@
Class<?> consoleClass = Class.forName(consoleName);
// Ensure it can be cast to the interface type of all consoles
- if (! consoleType.isAssignableFrom(consoleClass)) {
+ if (!consoleType.isAssignableFrom(consoleClass)) {
throw new ClassCastException();
}
diff --git a/src/org/python/core/StdoutWrapper.java b/src/org/python/core/StdoutWrapper.java
--- a/src/org/python/core/StdoutWrapper.java
+++ b/src/org/python/core/StdoutWrapper.java
@@ -103,8 +103,9 @@
private String printToFile(PyFile file, PyObject o) {
String s;
- if (o instanceof PyUnicode && file.encoding != null) {
- s = ((PyUnicode)o).encode(file.encoding, "strict");
+ if (o instanceof PyUnicode) {
+ // Use the encoding and policy defined for the stream. (Each may be null.)
+ s = ((PyUnicode)o).encode(file.encoding, file.errors);
} else {
s = o.__str__().toString();
}
diff --git a/src/org/python/util/jython.java b/src/org/python/util/jython.java
--- a/src/org/python/util/jython.java
+++ b/src/org/python/util/jython.java
@@ -55,8 +55,7 @@
+ "-c cmd : program passed in as string (terminates option list)\n"
// + "-d : debug output from parser (also PYTHONDEBUG=x)\n"
+ "-Dprop=v : Set the property `prop' to value `v'\n"
- // + "-E : ignore environment variables (such as PYTHONPATH)\n"
- + "-C codec : Use a different codec when reading from the console.\n"
+ + "-E : ignore environment variables (such as JYTHONPATH)\n"
+ "-h : print this help message and exit (also --help)\n"
+ "-i : inspect interactively after running script\n"
// + ", (also PYTHONINSPECT=x)\n"
@@ -83,9 +82,11 @@
+ "file : program read from script file\n"
+ "- : program read from stdin (default; interactive mode if a tty)\n"
+ "arg ... : arguments passed to program in sys.argv[1:]\n" + "\n"
- + "Other environment variables:\n" + "JYTHONPATH: '" + File.pathSeparator
+ + "Other environment variables:\n" //
+ + "JYTHONPATH: '" + File.pathSeparator
+ "'-separated list of directories prefixed to the default module\n"
- + " search path. The result is sys.path.";
+ + " search path. The result is sys.path.\n"
+ + "PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.";
public static boolean shouldRestart;
@@ -94,7 +95,7 @@
* root of the JAR archive. Note that the __name__ is set to the base name of the JAR file and
* not to "__main__" (for historic reasons). This method do NOT handle exceptions. the caller
* SHOULD handle any (Py)Exceptions thrown by the code.
- *
+ *
* @param filename The path to the filename to run.
*/
public static void runJar(String filename) {
@@ -211,16 +212,22 @@
// Get system properties (or empty set if we're prevented from accessing them)
Properties preProperties = PySystemState.getBaseProperties();
+ // Read environment variable PYTHONIOENCODING into properties (registry)
+ String pythonIoEncoding = getenv("PYTHONIOENCODING");
+ if (pythonIoEncoding != null) {
+ String[] spec = splitString(pythonIoEncoding, ':', 2);
+ // Note that if encoding or errors is blank (=null), the registry value wins.
+ addDefault(preProperties, PySystemState.PYTHON_IO_ENCODING, spec[0]);
+ addDefault(preProperties, PySystemState.PYTHON_IO_ERRORS, spec[1]);
+ }
+
// Decide if System.in is interactive
if (!opts.fixInteractive || opts.interactive) {
// The options suggest System.in is interactive: but only if isatty() agrees
opts.interactive = Py.isInteractive();
if (opts.interactive) {
// Set the default console type if nothing else has
- String consoleClassName = preProperties.getProperty("python.console");
- if (consoleClassName==null) {
- preProperties.setProperty("python.console", PYTHON_CONSOLE_CLASS);
- }
+ addDefault(preProperties, "python.console", PYTHON_CONSOLE_CLASS);
}
}
@@ -230,7 +237,9 @@
PyList warnoptions = new PyList();
addWarnings(opts.warnoptions, warnoptions);
- addWarnings(warnOptionsFromEnv(), warnoptions);
+ if (!Options.ignore_environment) {
+ addWarnings(warnOptionsFromEnv(), warnoptions);
+ }
systemState.setWarnoptions(warnoptions);
// Make sure warnings module is loaded if there are warning options
@@ -378,24 +387,18 @@
}
if (opts.fixInteractive || (opts.filename == null && opts.command == null)) {
- if (opts.encoding == null) {
- opts.encoding = PySystemState.registry.getProperty("python.console.encoding");
- }
- if (opts.encoding != null) {
- if (!Charset.isSupported(opts.encoding)) {
- System.err.println(opts.encoding
- + " is not a supported encoding on this JVM, so it can't "
- + "be used in python.console.encoding.");
- System.exit(1);
- }
- interp.cflags.encoding = opts.encoding;
- }
+ // Go interactive with the console: the parser needs to know the encoding.
+ String encoding = Py.getConsole().getEncodingCharset().name();
+
+ // Run the interpreter interactively
try {
+ interp.cflags.encoding = encoding;
interp.interact(null, null);
} catch (Throwable t) {
Py.printException(t);
}
}
+
interp.cleanup();
}
@@ -414,9 +417,79 @@
// continue
}
}
+
+ /**
+ * Return an array of trimmed strings by splitting the argument at each occurrence of a
+ * separator character. (Helper for configuration variable processing.) Segments of zero length
+ * after trimming emerge as <code>null</code>. If there are more than the specified number of
+ * segments the last element of the array contains all of the source string after the
+ * <code>(n-1)</code>th occurrence of <code>sep</code>.
+ *
+ * @param spec to split
+ * @param sep character on which to split
+ * @param n number of parts to split into
+ * @return <code>n</code>-element array of strings (or <code>null</code>s)
+ */
+ private static String[] splitString(String spec, char sep, int n) {
+ String[] list = new String[n];
+ int p = 0, i = 0, L = spec.length();
+ while (p < L) {
+ int c = spec.indexOf(sep, p);
+ if (c < 0 || i >= n - 1) {
+ // No more seps, or no more space: i.th piece is the rest of spec.
+ c = L;
+ }
+ String s = spec.substring(p, c).trim();
+ list[i++] = (s.length() > 0) ? s : null;
+ p = c + 1;
+ }
+ return list;
+ }
+
+ /**
+ * If the key is not currently present and the passed value is not <code>null</code>, sets the
+ * <code>key</code> to the <code>value</code> in the given <code>Properties</code> object. Thus,
+ * it provides a default value for a subsequent <code>getProperty()</code>.
+ *
+ * @param registry to be (possibly) updated
+ * @param key at which to set value
+ * @param value to set (or <code>null</code> for no setting)
+ * @return true iff a value was set
+ */
+ private static boolean addDefault(Properties registry, String key, String value) {
+ // Set value at key if nothing else has set it
+ if (value == null || registry.containsKey(key)) {
+ return false;
+ } else {
+ registry.setProperty(key, value);
+ return true;
+ }
+ }
+
+ /**
+ * Get the value of an environment variable, if we are allowed to and it exists; otherwise
+ * return <code>null</code>. We are allowed to access the environment variable if the -E flag
+ * was not given and the application has permission to read environment variables. The -E flag
+ * is reflected in {@link Options#ignore_environment}, and will be set automatically if it turns
+ * out we do not have permission.
+ *
+ * @param varname name to access in the environment
+ * @return the value or <code>null</code>.
+ */
+ private static String getenv(String varname) {
+ if (!Options.ignore_environment) {
+ try {
+ return System.getenv(varname);
+ } catch (SecurityException e) {
+ // We're not allowed to access them after all
+ Options.ignore_environment = true;
+ }
+ }
+ return null;
+ }
+
}
-
class CommandLineOptions {
public String filename;
@@ -515,12 +588,8 @@
} else {
return argumentExpected(arg);
}
- } else if (arg.equals("-C")) {
- encoding = args[++index];
- setProperty("python.console.encoding", encoding);
} else if (arg.equals("-E")) {
- // XXX: accept -E (ignore environment variables) to be compatible with
- // CPython. do nothing for now (we could ignore the registry)
+ // -E (ignore environment variables)
Options.ignore_environment = true;
} else if (arg.startsWith("-D")) {
String key = null;
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list