[Python-checkins] python/dist/src/Parser tokenizer.c, 2.76, 2.76.2.1

Tue Jul 12 23:58:40 CEST 2005

Update of /cvsroot/python/python/dist/src/Parser
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv28082/Parser

Modified Files:
      Tag: release24-maint
	tokenizer.c 
Log Message:
Backport checkin:
Apply SF patch #1101726: Fix buffer overrun in tokenizer.c when a source file
with a PEP 263 encoding declaration results in long decoded line.


Index: tokenizer.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Parser/tokenizer.c,v
retrieving revision 2.76
retrieving revision 2.76.2.1
diff -u -d -r2.76 -r2.76.2.1

--- tokenizer.c	4 Aug 2004 17:36:41 -0000	2.76
+++ tokenizer.c	12 Jul 2005 21:58:38 -0000	2.76.2.1
@@ -334,7 +334,19 @@
 }
 
 /* Read a line of text from TOK into S, using the stream in TOK.
-   Return NULL on failure, else S.  */
+   Return NULL on failure, else S.
+   
+   On entry, tok->decoding_buffer will be one of:
+     1) NULL: need to call tok->decoding_readline to get a new line
+     2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
+           stored the result in tok->decoding_buffer
+     3) PyStringObject *: previous call to fp_readl did not have enough room
+           (in the s buffer) to copy entire contents of the line read
+           by tok->decoding_readline.  tok->decoding_buffer has the overflow.
+           In this case, fp_readl is called in a loop (with an expanded buffer)
+           until the buffer ends with a '\n' (or until the end of the file is 
+           reached): see tok_nextc and its calls to decoding_fgets.
+*/
 
 static char *
 fp_readl(char *s, int size, struct tok_state *tok)
@@ -344,32 +356,45 @@
 	Py_FatalError("fp_readl should not be called in this build.");
 	return NULL; /* Keep compiler happy (not reachable) */
 #else
-	PyObject* utf8;
+	PyObject* utf8 = NULL;
 	PyObject* buf = tok->decoding_buffer;
+	char *str;
+	int utf8len;
+
+	/* Ask for one less byte so we can terminate it */
+	assert(size > 0);
+	size--;
+
 	if (buf == NULL) {
-		/* Ask for one less byte so we can terminate it */
-		PyObject *args = Py_BuildValue("(i)", size-1);
-		if (args == NULL)
-			return error_ret(tok);
-		buf = PyObject_Call(tok->decoding_readline, args, NULL);
-		Py_DECREF(args);
+		buf = PyObject_CallObject(tok->decoding_readline, NULL);
 		if (buf == NULL)
 			return error_ret(tok);
 	} else {
 		tok->decoding_buffer = NULL;
+		if (PyString_CheckExact(buf))
+			utf8 = buf;
 	}
-	utf8 = PyUnicode_AsUTF8String(buf);
-	Py_DECREF(buf);
-	if (utf8 == NULL)
-		return error_ret(tok);
-	else {
-		const char* str = PyString_AsString(utf8);
-		assert(strlen(str) < (size_t)size); /* XXX */
-		strcpy(s, str);
-		Py_DECREF(utf8);
-		if (s[0] == '\0') return NULL; /* EOF */
-		return s;
+	if (utf8 == NULL) {
+		utf8 = PyUnicode_AsUTF8String(buf);
+		Py_DECREF(buf);
+		if (utf8 == NULL)
+			return error_ret(tok);
+	}
+	str = PyString_AsString(utf8);
+	utf8len = PyString_GET_SIZE(utf8);
+	if (utf8len > size) {
+		tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size);
+		if (tok->decoding_buffer == NULL) {
+			Py_DECREF(utf8);
+			return error_ret(tok);
+		}
+		utf8len = size;
 	}
+	memcpy(s, str, utf8len);
+	s[utf8len] = '\0';
+	Py_DECREF(utf8);
+	if (utf8len == 0) return NULL; /* EOF */
+	return s;
 #endif
 }
 
@@ -491,14 +516,7 @@
 	} else {
 		PyObject* buf = tok->decoding_buffer;
 		if (buf == NULL) {
-			PyObject *args = PyTuple_New(0);
-			if (args == NULL) {
-				error_ret(tok);
-				return 1;
-			}
-			buf = PyObject_Call(tok->decoding_readline,
-					    args, NULL);
-			Py_DECREF(args);
+			buf = PyObject_CallObject(tok->decoding_readline, NULL);
 			if (buf == NULL) {
 				error_ret(tok);
 				return 1;