[Python-3000-checkins] r66951 - in python/branches/py3k: Lib/test/test_pep3120.py Misc/NEWS Parser/tokenizer.c Parser/tokenizer.h Python/ast.c

brett.cannon python-3000-checkins at python.org
Fri Oct 17 05:38:51 CEST 2008


Author: brett.cannon
Date: Fri Oct 17 05:38:50 2008
New Revision: 66951

Log:
Latin-1 source code was not being properly decoded when passed through
compile(). This was due to left-over special-casing before UTF-8 became the
default source encoding.

Closes issue #3574. Thanks to Victor Stinner for help with the patch.


Modified:
   python/branches/py3k/Lib/test/test_pep3120.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Parser/tokenizer.c
   python/branches/py3k/Parser/tokenizer.h
   python/branches/py3k/Python/ast.c

Modified: python/branches/py3k/Lib/test/test_pep3120.py
==============================================================================
Binary files. No diff available.

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Fri Oct 17 05:38:50 2008
@@ -15,6 +15,8 @@
 Core and Builtins
 -----------------
 
+- Issue #3574: compile() incorrectly handled source code encoded as Latin-1.
+
 - Issues #2384 and #3975: Tracebacks were not correctly printed when the
   source file contains a ``coding:`` header: the wrong line was displayed, and
   the encoding was not respected.

Modified: python/branches/py3k/Parser/tokenizer.c
==============================================================================
--- python/branches/py3k/Parser/tokenizer.c	(original)
+++ python/branches/py3k/Parser/tokenizer.c	Fri Oct 17 05:38:50 2008
@@ -135,6 +135,7 @@
 	tok->decoding_state = STATE_INIT;
 	tok->decoding_erred = 0;
 	tok->read_coding_spec = 0;
+	tok->enc = NULL;
 	tok->encoding = NULL;
         tok->cont_line = 0;
 #ifndef PGEN
@@ -274,8 +275,7 @@
 		tok->read_coding_spec = 1;
 		if (tok->encoding == NULL) {
 			assert(tok->decoding_state == STATE_RAW);
-			if (strcmp(cs, "utf-8") == 0 ||
-			    strcmp(cs, "iso-8859-1") == 0) {
+			if (strcmp(cs, "utf-8") == 0) {
 				tok->encoding = cs;
 			} else {
 				r = set_readline(tok, cs);

Modified: python/branches/py3k/Parser/tokenizer.h
==============================================================================
--- python/branches/py3k/Parser/tokenizer.h	(original)
+++ python/branches/py3k/Parser/tokenizer.h	Fri Oct 17 05:38:50 2008
@@ -49,14 +49,14 @@
 	enum decoding_state decoding_state;
 	int decoding_erred;	/* whether erred in decoding  */
 	int read_coding_spec;	/* whether 'coding:...' has been read  */
-	char *encoding;
+	char *encoding;         /* Source encoding. */
 	int cont_line;          /* whether we are in a continuation line. */
 	const char* line_start;	/* pointer to start of current line */
 #ifndef PGEN
 	PyObject *decoding_readline; /* codecs.open(...).readline */
 	PyObject *decoding_buffer;
 #endif
-	const char* enc;
+	const char* enc;        /* Encoding for the current str. */
 	const char* str;
 };
 

Modified: python/branches/py3k/Python/ast.c
==============================================================================
--- python/branches/py3k/Python/ast.c	(original)
+++ python/branches/py3k/Python/ast.c	Fri Oct 17 05:38:50 2008
@@ -3160,9 +3160,6 @@
     if (encoding == NULL) {
         buf = (char *)s;
         u = NULL;
-    } else if (strcmp(encoding, "iso-8859-1") == 0) {
-        buf = (char *)s;
-        u = NULL;
     } else {
         /* check for integer overflow */
         if (len > PY_SIZE_MAX / 4)
@@ -3275,8 +3272,7 @@
         }
     }
     need_encoding = (!*bytesmode && c->c_encoding != NULL &&
-                     strcmp(c->c_encoding, "utf-8") != 0 &&
-                     strcmp(c->c_encoding, "iso-8859-1") != 0);
+                     strcmp(c->c_encoding, "utf-8") != 0);
     if (rawmode || strchr(s, '\\') == NULL) {
         if (need_encoding) {
             PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);


More information about the Python-3000-checkins mailing list