[Python-checkins] cpython: Issue #10785: Store the filename as Unicode in the Python parser.
victor.stinner
python-checkins at python.org
Tue Apr 5 01:48:20 CEST 2011
http://hg.python.org/cpython/rev/6e9dc970ac0e
changeset: 69147:6e9dc970ac0e
user: Victor Stinner <victor.stinner at haypocalc.com>
date: Tue Apr 05 00:39:01 2011 +0200
summary:
Issue #10785: Store the filename as Unicode in the Python parser.
files:
Include/parsetok.h | 9 +++++-
Makefile.pre.in | 7 +++--
Misc/NEWS | 2 +
Modules/parsermodule.c | 1 +
Parser/parsetok.c | 32 ++++++++++++++++++-----
Parser/parsetok_pgen.c | 2 +
Parser/tokenizer.c | 35 ++++++++++++++++---------
Parser/tokenizer.h | 8 +++++-
Python/pythonrun.c | 40 ++++++++++++++++++------------
9 files changed, 94 insertions(+), 42 deletions(-)
diff --git a/Include/parsetok.h b/Include/parsetok.h
--- a/Include/parsetok.h
+++ b/Include/parsetok.h
@@ -9,7 +9,10 @@
typedef struct {
int error;
- const char *filename; /* decoded from the filesystem encoding */
+#ifndef PGEN
+ /* The filename is useless for pgen, see comment in tok_state structure */
+ PyObject *filename;
+#endif
int lineno;
int offset;
char *text; /* UTF-8-encoded string */
@@ -66,8 +69,10 @@
perrdetail *err_ret,
int *flags);
-/* Note that he following function is defined in pythonrun.c not parsetok.c. */
+/* Note that the following functions are defined in pythonrun.c,
+ not in parsetok.c */
PyAPI_FUNC(void) PyParser_SetError(perrdetail *);
+PyAPI_FUNC(void) PyParser_ClearError(perrdetail *);
#ifdef __cplusplus
}
diff --git a/Makefile.pre.in b/Makefile.pre.in
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -238,14 +238,13 @@
Parser/listnode.o \
Parser/node.o \
Parser/parser.o \
- Parser/parsetok.o \
Parser/bitset.o \
Parser/metagrammar.o \
Parser/firstsets.o \
Parser/grammar.o \
Parser/pgen.o
-PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/tokenizer.o
+PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o
PGOBJS= \
Objects/obmalloc.o \
@@ -254,10 +253,12 @@
Python/pyctype.o \
Parser/tokenizer_pgen.o \
Parser/printgrammar.o \
+ Parser/parsetok_pgen.o \
Parser/pgenmain.o
PARSER_HEADERS= \
Parser/parser.h \
+ Include/parsetok.h \
Parser/tokenizer.h
PGENOBJS= $(PGENMAIN) $(POBJS) $(PGOBJS)
@@ -593,6 +594,7 @@
Parser/metagrammar.o: $(srcdir)/Parser/metagrammar.c
Parser/tokenizer_pgen.o: $(srcdir)/Parser/tokenizer.c
+Parser/parsetok_pgen.o: $(srcdir)/Parser/parsetok.c
Parser/pgenmain.o: $(srcdir)/Include/parsetok.h
@@ -700,7 +702,6 @@
Include/objimpl.h \
Include/opcode.h \
Include/osdefs.h \
- Include/parsetok.h \
Include/patchlevel.h \
Include/pgen.h \
Include/pgenheaders.h \
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@
Core and Builtins
-----------------
+- Issue #10785: Store the filename as Unicode in the Python parser.
+
- Issue #11619: _PyImport_LoadDynamicModule() doesn't encode the path to bytes
on Windows.
diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c
--- a/Modules/parsermodule.c
+++ b/Modules/parsermodule.c
@@ -584,6 +584,7 @@
else
PyParser_SetError(&err);
}
+ PyParser_ClearError(&err);
return (res);
}
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -13,7 +13,7 @@
/* Forward */
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
-static void initerr(perrdetail *err_ret, const char* filename);
+static int initerr(perrdetail *err_ret, const char* filename);
/* Parse input coming from a string. Return error code, print some errors. */
node *
@@ -48,7 +48,8 @@
struct tok_state *tok;
int exec_input = start == file_input;
- initerr(err_ret, filename);
+ if (initerr(err_ret, filename) < 0)
+ return NULL;
if (*flags & PyPARSE_IGNORE_COOKIE)
tok = PyTokenizer_FromUTF8(s, exec_input);
@@ -59,7 +60,10 @@
return NULL;
}
- tok->filename = filename ? filename : "<string>";
+#ifndef PGEN
+ Py_INCREF(err_ret->filename);
+ tok->filename = err_ret->filename;
+#endif
return parsetok(tok, g, start, err_ret, flags);
}
@@ -90,13 +94,17 @@
{
struct tok_state *tok;
- initerr(err_ret, filename);
+ if (initerr(err_ret, filename) < 0)
+ return NULL;
if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) {
err_ret->error = E_NOMEM;
return NULL;
}
- tok->filename = filename;
+#ifndef PGEN
+ Py_INCREF(err_ret->filename);
+ tok->filename = err_ret->filename;
+#endif
return parsetok(tok, g, start, err_ret, flags);
}
@@ -267,14 +275,24 @@
return n;
}
-static void
+static int
initerr(perrdetail *err_ret, const char *filename)
{
err_ret->error = E_OK;
- err_ret->filename = filename;
err_ret->lineno = 0;
err_ret->offset = 0;
err_ret->text = NULL;
err_ret->token = -1;
err_ret->expected = -1;
+#ifndef PGEN
+ if (filename)
+ err_ret->filename = PyUnicode_DecodeFSDefault(filename);
+ else
+ err_ret->filename = PyUnicode_FromString("<string>");
+ if (err_ret->filename == NULL) {
+ err_ret->error = E_ERROR;
+ return -1;
+ }
+#endif
+ return 0;
}
diff --git a/Parser/parsetok_pgen.c b/Parser/parsetok_pgen.c
new file mode 100644
--- /dev/null
+++ b/Parser/parsetok_pgen.c
@@ -0,0 +1,2 @@
+#define PGEN
+#include "parsetok.c"
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -128,7 +128,6 @@
tok->prompt = tok->nextprompt = NULL;
tok->lineno = 0;
tok->level = 0;
- tok->filename = NULL;
tok->altwarning = 1;
tok->alterror = 1;
tok->alttabsize = 1;
@@ -140,6 +139,7 @@
tok->encoding = NULL;
tok->cont_line = 0;
#ifndef PGEN
+ tok->filename = NULL;
tok->decoding_readline = NULL;
tok->decoding_buffer = NULL;
#endif
@@ -545,7 +545,6 @@
{
char *line = NULL;
int badchar = 0;
- PyObject *filename;
for (;;) {
if (tok->decoding_state == STATE_NORMAL) {
/* We already have a codec associated with
@@ -586,16 +585,12 @@
if (badchar) {
/* Need to add 1 to the line number, since this line
has not been counted, yet. */
- filename = PyUnicode_DecodeFSDefault(tok->filename);
- if (filename != NULL) {
- PyErr_Format(PyExc_SyntaxError,
- "Non-UTF-8 code starting with '\\x%.2x' "
- "in file %U on line %i, "
- "but no encoding declared; "
- "see http://python.org/dev/peps/pep-0263/ for details",
- badchar, filename, tok->lineno + 1);
- Py_DECREF(filename);
- }
+ PyErr_Format(PyExc_SyntaxError,
+ "Non-UTF-8 code starting with '\\x%.2x' "
+ "in file %U on line %i, "
+ "but no encoding declared; "
+ "see http://python.org/dev/peps/pep-0263/ for details",
+ badchar, tok->filename, tok->lineno + 1);
return error_ret(tok);
}
#endif
@@ -853,6 +848,7 @@
#ifndef PGEN
Py_XDECREF(tok->decoding_readline);
Py_XDECREF(tok->decoding_buffer);
+ Py_XDECREF(tok->filename);
#endif
if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf);
@@ -1247,8 +1243,13 @@
return 1;
}
if (tok->altwarning) {
- PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
+#ifdef PGEN
+ PySys_WriteStderr("inconsistent use of tabs and spaces "
+ "in indentation\n");
+#else
+ PySys_FormatStderr("%U: inconsistent use of tabs and spaces "
"in indentation\n", tok->filename);
+#endif
tok->altwarning = 0;
}
return 0;
@@ -1718,6 +1719,11 @@
fclose(fp);
return NULL;
}
+#ifndef PGEN
+ tok->filename = PyUnicode_FromString("<string>");
+ if (tok->filename == NULL)
+ goto error;
+#endif
while (tok->lineno < 2 && tok->done == E_OK) {
PyTokenizer_Get(tok, &p_start, &p_end);
}
@@ -1727,6 +1733,9 @@
if (encoding)
strcpy(encoding, tok->encoding);
}
+#ifndef PGEN
+error:
+#endif
PyTokenizer_Free(tok);
return encoding;
}
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -40,7 +40,13 @@
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
- const char *filename; /* encoded to the filesystem encoding */
+#ifndef PGEN
+ /* pgen doesn't have access to Python codecs, it cannot decode the input
+ filename. The bytes filename might be kept, but it is only used by
+ indenterror() and it is not really needed: pgen only compiles one file
+ (Grammar/Grammar). */
+ PyObject *filename;
+#endif
int altwarning; /* Issue warning if alternate tabs don't match */
int alterror; /* Issue error if alternate tabs don't match */
int alttabsize; /* Alternate tab spacing */
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -62,6 +62,7 @@
static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *,
PyCompilerFlags *);
static void err_input(perrdetail *);
+static void err_free(perrdetail *);
static void initsigs(void);
static void call_py_exitfuncs(void);
static void wait_for_thread_shutdown(void);
@@ -1887,12 +1888,13 @@
flags->cf_flags |= iflags & PyCF_MASK;
mod = PyAST_FromNode(n, flags, filename, arena);
PyNode_Free(n);
- return mod;
}
else {
err_input(&err);
- return NULL;
+ mod = NULL;
}
+ err_free(&err);
+ return mod;
}
mod_ty
@@ -1917,14 +1919,15 @@
flags->cf_flags |= iflags & PyCF_MASK;
mod = PyAST_FromNode(n, flags, filename, arena);
PyNode_Free(n);
- return mod;
}
else {
err_input(&err);
if (errcode)
*errcode = err.error;
- return NULL;
+ mod = NULL;
}
+ err_free(&err);
+ return mod;
}
/* Simplified interface to parsefile -- return node or set exception */
@@ -1938,6 +1941,7 @@
start, NULL, NULL, &err, flags);
if (n == NULL)
err_input(&err);
+ err_free(&err);
return n;
}
@@ -1952,6 +1956,7 @@
start, &err, flags);
if (n == NULL)
err_input(&err);
+ err_free(&err);
return n;
}
@@ -1964,6 +1969,7 @@
&_PyParser_Grammar, start, &err, flags);
if (n == NULL)
err_input(&err);
+ err_free(&err);
return n;
}
@@ -1977,11 +1983,23 @@
even parser modules. */
void
+PyParser_ClearError(perrdetail *err)
+{
+ err_free(err);
+}
+
+void
PyParser_SetError(perrdetail *err)
{
err_input(err);
}
+static void
+err_free(perrdetail *err)
+{
+ Py_CLEAR(err->filename);
+}
+
/* Set the error appropriate to the given input error code (see errcode.h) */
static void
@@ -1989,7 +2007,6 @@
{
PyObject *v, *w, *errtype, *errtext;
PyObject *msg_obj = NULL;
- PyObject *filename;
char *msg = NULL;
errtype = PyExc_SyntaxError;
@@ -2075,17 +2092,8 @@
errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
"replace");
}
- if (err->filename != NULL)
- filename = PyUnicode_DecodeFSDefault(err->filename);
- else {
- Py_INCREF(Py_None);
- filename = Py_None;
- }
- if (filename != NULL)
- v = Py_BuildValue("(NiiN)", filename,
- err->lineno, err->offset, errtext);
- else
- v = NULL;
+ v = Py_BuildValue("(OiiN)", err->filename,
+ err->lineno, err->offset, errtext);
if (v != NULL) {
if (msg_obj)
w = Py_BuildValue("(OO)", msg_obj, v);
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list