[Python-checkins] python/dist/src/Parser tokenizer.c,2.75,2.76

perky at users.sourceforge.net perky at users.sourceforge.net
Wed Aug 4 19:36:43 CEST 2004


Update of /cvsroot/python/python/dist/src/Parser
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3131/Parser

Modified Files:
	tokenizer.c 
Log Message:
SF #941229: Decode source code with sys.stdin.encoding in interactive
modes like non-interactive modes.  This allows for non-latin-1 users
to write unicode strings directly and sets Japanese users free from
weird manual escaping <wink> in shift_jis environments.
(Reviewed by Martin v. Loewis)


Index: tokenizer.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Parser/tokenizer.c,v
retrieving revision 2.75
retrieving revision 2.76
diff -C2 -d -r2.75 -r2.76
*** tokenizer.c	2 Aug 2004 06:09:55 -0000	2.75
--- tokenizer.c	4 Aug 2004 17:36:41 -0000	2.76
***************
*** 652,655 ****
--- 652,712 ----
  }
  
+ #if !defined(PGEN) && defined(Py_USING_UNICODE)
+ static int
+ tok_stdin_decode(struct tok_state *tok, char **inp)
+ {
+ 	PyObject *enc, *sysstdin, *decoded, *utf8;
+ 	const char *encoding;
+ 	char *converted;
+ 
+ 	if (PySys_GetFile((char *)"stdin", NULL) != stdin)
+ 		return 0;
+ 	sysstdin = PySys_GetObject("stdin");
+ 	if (sysstdin == NULL || !PyFile_Check(sysstdin))
+ 		return 0;
+ 
+ 	enc = ((PyFileObject *)sysstdin)->f_encoding;
+ 	if (enc == NULL || !PyString_Check(enc))
+ 		return 0;
+ 	Py_INCREF(enc);
+ 
+ 	encoding = PyString_AsString(enc);
+ 	decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);
+ 	if (decoded == NULL)
+ 		goto error_clear;
+ 
+ 	utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);
+ 	Py_DECREF(decoded);
+ 	if (utf8 == NULL)
+ 		goto error_clear;
+ 
+ 	converted = new_string(PyString_AsString(utf8), PyString_Size(utf8));
+ 	Py_DECREF(utf8);
+ 	if (converted == NULL)
+ 		goto error_nomem;
+ 
+ 	PyMem_FREE(*inp);
+ 	*inp = converted;
+ 	if (tok->encoding != NULL)
+ 		PyMem_DEL(tok->encoding);
+ 	tok->encoding = new_string(encoding, strlen(encoding));
+ 	if (tok->encoding == NULL)
+ 		goto error_nomem;
+ 
+ 	Py_DECREF(enc);
+ 	return 0;
+ 
+ error_nomem:
+ 	Py_DECREF(enc);
+ 	tok->done = E_NOMEM;
+ 	return -1;
+ 
+ error_clear:
+ 	/* Fallback to iso-8859-1: for backward compatibility */
+ 	Py_DECREF(enc);
+ 	PyErr_Clear();
+ 	return 0;
+ }
+ #endif
  
  /* Get next char, updating state; error code goes into tok->done */
***************
*** 691,694 ****
--- 748,755 ----
  				tok->done = E_EOF;
  			}
+ #if !defined(PGEN) && defined(Py_USING_UNICODE)
+ 			else if (tok_stdin_decode(tok, &new) != 0)
+ 				PyMem_FREE(new);
+ #endif
  			else if (tok->start != NULL) {
  				size_t start = tok->start - tok->buf;



More information about the Python-checkins mailing list