[Python-checkins] r79723 - in python/trunk: Misc/NEWS Parser/tokenizer.c

benjamin.peterson python-checkins at python.org
Sun Apr 4 00:48:51 CEST 2010


Author: benjamin.peterson
Date: Sun Apr  4 00:48:51 2010
New Revision: 79723

Log:
ensure that the locale does not affect the tokenization of identifiers

Modified:
   python/trunk/Misc/NEWS
   python/trunk/Parser/tokenizer.c

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Sun Apr  4 00:48:51 2010
@@ -12,6 +12,8 @@
 Core and Builtins
 -----------------
 
+- Ensure that tokenization of identifiers is not affected by locale.
+
 - Issue #1222585: Added LDCXXSHARED for C++ support. Patch by Arfrever.
 
 - Raise a TypeError when trying to delete a T_STRING_INPLACE struct member.

Modified: python/trunk/Parser/tokenizer.c
==============================================================================
--- python/trunk/Parser/tokenizer.c	(original)
+++ python/trunk/Parser/tokenizer.c	Sun Apr  4 00:48:51 2010
@@ -93,6 +93,21 @@
 };
 
 
+/* Ensure that the locale does not interfere with tokenization. */
+
+static int
+ascii_isalpha(int c)
+{
+	return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
+}
+
+static int
+ascii_isalnum(int c)
+{
+	return ascii_isalpha(c) || ('0' <= c && c <= '9');
+}
+
+
 /* Create and initialize a new tok_state structure */
 
 static struct tok_state *
@@ -230,7 +245,7 @@
 			} while (t[0] == '\x20' || t[0] == '\t');
 
 			begin = t;
-			while (isalnum(Py_CHARMASK(t[0])) ||
+			while (ascii_isalnum(Py_CHARMASK(t[0])) ||
 			       t[0] == '-' || t[0] == '_' || t[0] == '.')
 				t++;
 
@@ -1185,7 +1200,6 @@
 	return 0;
 }
 
-
 /* Get next token, after space stripping etc. */
 
 static int
@@ -1341,7 +1355,7 @@
 	}
 
 	/* Identifier (most frequent token!) */
-	if (isalpha(c) || c == '_') {
+	if (ascii_isalpha(c) || c == '_') {
 		/* Process r"", u"" and ur"" */
 		switch (c) {
 		case 'b':
@@ -1367,7 +1381,7 @@
 				goto letter_quote;
 			break;
 		}
-		while (isalnum(c) || c == '_') {
+		while (ascii_isalnum(c) || c == '_') {
 			c = tok_nextc(tok);
 		}
 		tok_backup(tok, c);


More information about the Python-checkins mailing list