[Python-3000-checkins] r67190 - in python/branches/py3k: Lib/test/test_cmd_line.py Misc/NEWS Modules/main.c Python/import.c

amaury.forgeotdarc python-3000-checkins at python.org
Wed Nov 12 00:05:00 CET 2008


Author: amaury.forgeotdarc
Date: Wed Nov 12 00:04:59 2008
New Revision: 67190

Log:
#3705: Command-line arguments were not correctly decoded when the
terminal does not use UTF8. 

Now the code propagates the unicode string as far as possible, and avoids 
the conversion to char* which implicitely uses utf-8.

Reviewed by Benjamin.


Modified:
   python/branches/py3k/Lib/test/test_cmd_line.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Modules/main.c
   python/branches/py3k/Python/import.c

Modified: python/branches/py3k/Lib/test/test_cmd_line.py
==============================================================================
--- python/branches/py3k/Lib/test/test_cmd_line.py	(original)
+++ python/branches/py3k/Lib/test/test_cmd_line.py	Wed Nov 12 00:04:59 2008
@@ -135,6 +135,12 @@
             self.exit_code('-c', 'pass'),
             0)
 
+        # Test handling of non-ascii data
+        command = "assert(ord('\xe9') == 0xe9)"
+        self.assertEqual(
+            self.exit_code('-c', command),
+            0)
+
 
 def test_main():
     test.support.run_unittest(CmdLineTest)

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Wed Nov 12 00:04:59 2008
@@ -13,6 +13,9 @@
 Core and Builtins
 -----------------
 
+- Issue #3705: Command-line arguments were not correctly decoded when the
+  terminal does not use UTF8.
+
 Library
 -------
 

Modified: python/branches/py3k/Modules/main.c
==============================================================================
--- python/branches/py3k/Modules/main.c	(original)
+++ python/branches/py3k/Modules/main.c	Wed Nov 12 00:04:59 2008
@@ -287,7 +287,7 @@
 {
 	int c;
 	int sts;
-	char *command = NULL;
+	wchar_t *command = NULL;
 	wchar_t *filename = NULL;
 	wchar_t *module = NULL;
 	FILE *fp = stdin;
@@ -299,7 +299,6 @@
 	int version = 0;
 	int saw_unbuffered_flag = 0;
 	PyCompilerFlags cf;
-	char *oldloc;
 
 	cf.cf_flags = 0;
 
@@ -310,30 +309,19 @@
 
 	while ((c = _PyOS_GetOpt(argc, argv, PROGRAM_OPTS)) != EOF) {
 		if (c == 'c') {
-			size_t r1, r2;
-			oldloc = setlocale(LC_ALL, NULL);
-			setlocale(LC_ALL, "");
-			r1 = wcslen(_PyOS_optarg);
-			r2 = wcstombs(NULL, _PyOS_optarg, r1);
-			if (r2 == (size_t) -1)
-				Py_FatalError(
-				   "cannot convert character encoding of -c argument");
-			if (r2 > r1)
-				r1 = r2;
-			r1 += 2;
+			size_t len;
 			/* -c is the last option; following arguments
 			   that look like options are left for the
 			   command to interpret. */
-			command = (char *)malloc(r1);
+
+			len = wcslen(_PyOS_optarg) + 1 + 1;
+			command = (wchar_t *)malloc(sizeof(wchar_t) * len);
 			if (command == NULL)
 				Py_FatalError(
 				   "not enough memory to copy -c argument");
-			r2 = wcstombs(command, _PyOS_optarg, r1);
-			if (r2 > r1-1)
-				Py_FatalError(
-				    "not enough memory to copy -c argument");
-			strcat(command, "\n");
-			setlocale(LC_ALL, oldloc);
+			wcscpy(command, _PyOS_optarg);
+			command[len - 2] = '\n';
+			command[len - 1] = 0;
 			break;
 		}
 
@@ -543,8 +531,18 @@
 	}
 
 	if (command) {
-		sts = PyRun_SimpleStringFlags(command, &cf) != 0;
+		PyObject *commandObj = PyUnicode_FromWideChar(
+		    command, wcslen(command));
 		free(command);
+		if (commandObj != NULL) {
+			sts = PyRun_SimpleStringFlags(
+				_PyUnicode_AsString(commandObj), &cf) != 0;
+		}
+		else {
+			PyErr_Print();
+			sts = 1;
+		}
+		Py_DECREF(commandObj);
 	} else if (module) {
 		sts = RunModule(module, 1);
 	}

Modified: python/branches/py3k/Python/import.c
==============================================================================
--- python/branches/py3k/Python/import.c	(original)
+++ python/branches/py3k/Python/import.c	Wed Nov 12 00:04:59 2008
@@ -2793,6 +2793,7 @@
 {
 	extern int fclose(FILE *);
 	PyObject *fob, *ret;
+	PyObject *pathobj;
 	struct filedescr *fdp;
 	char pathname[MAXPATHLEN+1];
 	FILE *fp = NULL;
@@ -2836,9 +2837,9 @@
 		fob = Py_None;
 		Py_INCREF(fob);
 	}
-	ret = Py_BuildValue("Os(ssi)",
-		      fob, pathname, fdp->suffix, fdp->mode, fdp->type);
-	Py_DECREF(fob);
+	pathobj = PyUnicode_DecodeFSDefault(pathname);
+	ret = Py_BuildValue("NN(ssi)",
+		      fob, pathobj, fdp->suffix, fdp->mode, fdp->type);
 	PyMem_FREE(found_encoding);
 
 	return ret;
@@ -2849,7 +2850,9 @@
 {
 	char *name;
 	PyObject *path = NULL;
-	if (!PyArg_ParseTuple(args, "s|O:find_module", &name, &path))
+	if (!PyArg_ParseTuple(args, "es|O:find_module",
+	                      Py_FileSystemDefaultEncoding, &name,
+	                      &path))
 		return NULL;
 	return call_find_module(name, path);
 }


More information about the Python-3000-checkins mailing list