[Python-checkins] python/dist/src/Python bltinmodule.c,2.261,2.262 compile.c,2.249,2.250 graminit.c,2.33,2.34 pythonrun.c,2.164,2.165

loewis@users.sourceforge.net loewis@users.sourceforge.net
Sun, 04 Aug 2002 10:29:55 -0700


Update of /cvsroot/python/python/dist/src/Python
In directory usw-pr-cvs1:/tmp/cvs-serv1805/Python

Modified Files:
	bltinmodule.c compile.c graminit.c pythonrun.c 
Log Message:
Patch #534304: Implement phase 1 of PEP 263.


Index: bltinmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/bltinmodule.c,v
retrieving revision 2.261
retrieving revision 2.262
diff -C2 -d -r2.261 -r2.262
*** bltinmodule.c	30 Jun 2002 15:26:10 -0000	2.261
--- bltinmodule.c	4 Aug 2002 17:29:52 -0000	2.262
***************
*** 1366,1373 ****
  			}
  		}
- 		PyMem_FREE(s);
- 		return result;
- 	}
- 	if (v != NULL) {
  		f = PySys_GetObject("stdout");
  		if (f == NULL) {
--- 1366,1369 ----
***************
*** 1375,1382 ****
--- 1371,1389 ----
  			return NULL;
  		}
+ 		PyFile_SoftSpace(f, 0);
+ 		PyMem_FREE(s);
+ 		return result;
+ 	}
+ 	f = PySys_GetObject("stdout");
+ 	if (f == NULL) {
+ 		PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
+ 		return NULL;
+ 	}
+ 	if (v != NULL) {
  		if (Py_FlushLine() != 0 ||
  		    PyFile_WriteObject(v, f, Py_PRINT_RAW) != 0)
  			return NULL;
  	}
+ 	PyFile_SoftSpace(f, 0);
  	f = PySys_GetObject("stdin");
  	if (f == NULL) {

Index: compile.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v
retrieving revision 2.249
retrieving revision 2.250
diff -C2 -d -r2.249 -r2.250
*** compile.c	4 Aug 2002 06:28:21 -0000	2.249
--- compile.c	4 Aug 2002 17:29:52 -0000	2.250
***************
*** 486,489 ****
--- 486,490 ----
  	struct symtable *c_symtable; /* pointer to module symbol table */
          PyFutureFeatures *c_future; /* pointer to module's __future__ */
+ 	char *c_encoding;	/* source encoding (a borrowed reference) */
  };
  
***************
*** 1183,1186 ****
--- 1184,1204 ----
  
  static PyObject *
+ decode_utf8(char **sPtr, char *end, char* encoding)
+ {
+ 	PyObject *u, *v;
+ 	char *s, *t;
+ 	t = s = *sPtr;
+ 	/* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
+ 	while (s < end && (*s & 0x80)) s++;
+ 	*sPtr = s;
+ 	u = PyUnicode_DecodeUTF8(t, s - t, NULL);
+ 	if (u == NULL)
+ 		return NULL;
+ 	v = PyUnicode_AsEncodedString(u, encoding, NULL);
+ 	Py_DECREF(u);
+ 	return v;
+ }
+ 
+ static PyObject *
  parsestr(struct compiling *com, char *s)
  {
***************
*** 1194,1197 ****
--- 1212,1217 ----
  	int quote = first;
  	int rawmode = 0;
+ 	char* encoding = ((com == NULL) ? NULL : com->c_encoding);
+ 	int need_encoding;
  	int unicode = 0;
  
***************
*** 1231,1240 ****
  #ifdef Py_USING_UNICODE
  	if (unicode || Py_UnicodeFlag) {
  		if (rawmode)
! 			v = PyUnicode_DecodeRawUnicodeEscape(
! 				 s, len, NULL);
  		else
! 			v = PyUnicode_DecodeUnicodeEscape(
! 				s, len, NULL);
  		if (v == NULL)
  			PyErr_SyntaxLocation(com->c_filename, com->c_lineno);
--- 1251,1305 ----
  #ifdef Py_USING_UNICODE
  	if (unicode || Py_UnicodeFlag) {
+ 		PyObject *u, *w;
+ 		if (encoding == NULL) {
+ 			buf = s;
+ 			u = NULL;
+ 		} else if (strcmp(encoding, "iso-8859-1") == 0) {
+ 			buf = s;
+ 			u = NULL;
+ 		} else {
+ 			/* "\XX" may become "\u005c\uHHLL" (12 bytes) */
+ 			u = PyString_FromStringAndSize((char *)NULL, len * 4);
+ 			if (u == NULL)
+ 				return NULL;
+ 			p = buf = PyString_AsString(u);
+ 			end = s + len;
+ 			while (s < end) {
+ 				if (*s == '\\') {
+ 					*p++ = *s++;
+ 					if (*s & 0x80) {
+ 						strcpy(p, "u005c");
+ 						p += 5;
+ 					}
+ 				}
+ 				if (*s & 0x80) { /* XXX inefficient */
+ 					char *r;
+ 					int rn, i;
+ 					w = decode_utf8(&s, end, "utf-16-be");
+ 					if (w == NULL) {
+ 						Py_DECREF(u);
+ 						return NULL;
+ 					}
+ 					r = PyString_AsString(w);
+ 					rn = PyString_Size(w);
+ 					assert(rn % 2 == 0);
+ 					for (i = 0; i < rn; i += 2) {
+ 						sprintf(p, "\\u%02x%02x",
+ 							r[i + 0] & 0xFF,
+ 							r[i + 1] & 0xFF);
+ 						p += 6;
+ 					}
+ 					Py_DECREF(w);
+ 				} else {
+ 					*p++ = *s++;
+ 				}
+ 			}
+ 			len = p - buf;
+ 		}
  		if (rawmode)
! 			v = PyUnicode_DecodeRawUnicodeEscape(buf, len, NULL);
  		else
! 			v = PyUnicode_DecodeUnicodeEscape(buf, len, NULL);
! 		Py_XDECREF(u);
  		if (v == NULL)
  			PyErr_SyntaxLocation(com->c_filename, com->c_lineno);
***************
*** 1243,1249 ****
  	}
  #endif
! 	if (rawmode || strchr(s, '\\') == NULL)
! 		return PyString_FromStringAndSize(s, len);
! 	v = PyString_FromStringAndSize((char *)NULL, len);
  	if (v == NULL)
  		return NULL;
--- 1308,1328 ----
  	}
  #endif
! 	need_encoding = (encoding != NULL &&
! 			 strcmp(encoding, "utf-8") != 0 &&
! 			 strcmp(encoding, "iso-8859-1") != 0);
! 	if (rawmode || strchr(s, '\\') == NULL) {
! 		if (need_encoding) {
! 			PyObject* u = PyUnicode_DecodeUTF8(s, len, NULL);
! 			if (u == NULL)
! 				return NULL;
! 			v = PyUnicode_AsEncodedString(u, encoding, NULL);
! 			Py_DECREF(u);
! 			return v;
! 		} else {
! 			return PyString_FromStringAndSize(s, len);
! 		}
! 	}
! 	v = PyString_FromStringAndSize((char *)NULL, /* XXX 4 is enough? */
! 				       need_encoding ? len * 4 : len);
  	if (v == NULL)
  		return NULL;
***************
*** 1252,1256 ****
  	while (s < end) {
  		if (*s != '\\') {
! 			*p++ = *s++;
  			continue;
  		}
--- 1331,1349 ----
  	while (s < end) {
  		if (*s != '\\') {
! 		  ORDINAL: 
! 			if (need_encoding && (*s & 0x80)) {
! 				char *r;
! 				int rn;
! 				PyObject* w = decode_utf8(&s, end, encoding);
! 				if (w == NULL)
! 					return NULL;
! 				r = PyString_AsString(w);
! 				rn = PyString_Size(w);
! 				memcpy(p, r, rn);
! 				p += rn;
! 				Py_DECREF(w);
! 			} else {
! 				*p++ = *s++;
! 			}
  			continue;
  		}
***************
*** 1321,1326 ****
  		default:
  			*p++ = '\\';
! 			*p++ = s[-1];
! 			break;
  		}
  	}
--- 1414,1419 ----
  		default:
  			*p++ = '\\';
! 			s--;
! 			goto ORDINAL;
  		}
  	}
***************
*** 4150,4153 ****
--- 4243,4252 ----
  	if (!com_init(&sc, filename))
  		return NULL;
+ 	if (TYPE(n) == encoding_decl) {
+ 		sc.c_encoding = STR(n);
+ 		n = CHILD(n, 0);
+ 	} else {
+ 		sc.c_encoding = NULL;
+ 	}
  	if (base) {
  		sc.c_private = base->c_private;
***************
*** 4158,4161 ****
--- 4257,4264 ----
  			sc.c_nested = 1;
  		sc.c_flags |= base->c_flags & PyCF_MASK;
+ 		if (base->c_encoding != NULL) {
+ 			assert(sc.c_encoding == NULL);
+ 			sc.c_encoding = base->c_encoding;
+ 		}
  	} else {
  		sc.c_private = NULL;

Index: graminit.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/graminit.c,v
retrieving revision 2.33
retrieving revision 2.34
diff -C2 -d -r2.33 -r2.34
*** graminit.c	24 May 2002 15:47:06 -0000	2.33
--- graminit.c	4 Aug 2002 17:29:52 -0000	2.34
***************
*** 1464,1468 ****
  	{2, arcs_66_1},
  };
! static dfa dfas[67] = {
  	{256, "single_input", 0, 3, states_0,
  	 "\004\030\001\000\000\000\124\360\213\011\162\000\002\000\140\210\244\005\001"},
--- 1464,1478 ----
  	{2, arcs_66_1},
  };
! static arc arcs_67_0[1] = {
! 	{12, 1},
! };
! static arc arcs_67_1[1] = {
! 	{0, 1},
! };
! static state states_67[2] = {
! 	{1, arcs_67_0},
! 	{1, arcs_67_1},
! };
! static dfa dfas[68] = {
  	{256, "single_input", 0, 3, states_0,
  	 "\004\030\001\000\000\000\124\360\213\011\162\000\002\000\140\210\244\005\001"},
***************
*** 1599,1604 ****
  	{322, "testlist1", 0, 2, states_66,
  	 "\000\020\001\000\000\000\000\000\000\000\000\000\002\000\140\210\244\005\000"},
  };
! static label labels[148] = {
  	{0, "EMPTY"},
  	{256, 0},
--- 1609,1616 ----
  	{322, "testlist1", 0, 2, states_66,
  	 "\000\020\001\000\000\000\000\000\000\000\000\000\002\000\140\210\244\005\000"},
+ 	{323, "encoding_decl", 0, 2, states_67,
+ 	 "\000\020\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"},
  };
! static label labels[149] = {
  	{0, "EMPTY"},
  	{256, 0},
***************
*** 1749,1757 ****
  	{319, 0},
  	{321, 0},
  };
  grammar _PyParser_Grammar = {
! 	67,
  	dfas,
! 	{148, labels},
  	256
  };
--- 1761,1770 ----
  	{319, 0},
  	{321, 0},
+ 	{323, 0},
  };
  grammar _PyParser_Grammar = {
! 	68,
  	dfas,
! 	{149, labels},
  	256
  };

Index: pythonrun.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/pythonrun.c,v
retrieving revision 2.164
retrieving revision 2.165
diff -C2 -d -r2.164 -r2.165
*** pythonrun.c	9 Jul 2002 18:22:55 -0000	2.164
--- pythonrun.c	4 Aug 2002 17:29:52 -0000	2.165
***************
*** 1222,1225 ****
--- 1222,1226 ----
  {
  	PyObject *v, *w, *errtype;
+ 	PyObject* u = NULL;
  	char *msg = NULL;
  	errtype = PyExc_SyntaxError;
***************
*** 1273,1276 ****
--- 1274,1288 ----
  		msg = "too many levels of indentation";
  		break;
+ 	case E_DECODE: {	/* XXX */
+ 		PyThreadState* tstate = PyThreadState_Get();
+ 		PyObject* value = tstate->curexc_value;
+ 		if (value != NULL) {
+ 			u = PyObject_Repr(value);
+ 			if (u != NULL) {
+ 				msg = PyString_AsString(u);
+ 				break;
+ 			}
+ 		}
+ 	}
  	default:
  		fprintf(stderr, "error=%d\n", err->error);
***************
*** 1279,1282 ****
--- 1291,1295 ----
  	}
  	w = Py_BuildValue("(sO)", msg, v);
+ 	Py_XDECREF(u);
  	Py_XDECREF(v);
  	PyErr_SetObject(errtype, w);