[Python-3000-checkins] r59007 - python/branches/py3k/Parser/tokenizer.c

guido.van.rossum python-3000-checkins at python.org
Fri Nov 16 01:51:45 CET 2007


Author: guido.van.rossum
Date: Fri Nov 16 01:51:45 2007
New Revision: 59007

Modified:
   python/branches/py3k/Parser/tokenizer.c
Log:
Cleanup of tokenizer.c.


Modified: python/branches/py3k/Parser/tokenizer.c
==============================================================================
--- python/branches/py3k/Parser/tokenizer.c	(original)
+++ python/branches/py3k/Parser/tokenizer.c	Fri Nov 16 01:51:45 2007
@@ -1269,30 +1269,24 @@
 	/* Identifier (most frequent token!) */
 	nonascii = 0;
 	if (is_potential_identifier_start(c)) {
-		/* Process r"", u"" and ur"" */
-		switch (c) {
-		case 'r':
-		case 'R':
+		/* Process b"", r"" and br"" */
+		if (c == 'b' || c == 'B') {
 			c = tok_nextc(tok);
 			if (c == '"' || c == '\'')
 				goto letter_quote;
-			break;
-		case 'b':
-		case 'B':
+		}
+		if (c == 'r' || c == 'R') {
 			c = tok_nextc(tok);
-			if (c == 'r' || c == 'R')
-				c = tok_nextc(tok);
 			if (c == '"' || c == '\'')
 				goto letter_quote;
-			break;
-		}
+	    }
 		while (is_potential_identifier_char(c)) {
 			if (c >= 128)
 				nonascii = 1;
 			c = tok_nextc(tok);
 		}
 		tok_backup(tok, c);
-		if (nonascii && 
+		if (nonascii &&
 		    !verify_identifier(tok->start, tok->cur)) {
 			tok->done = E_IDENTIFIER;
 			return ERRORTOKEN;
@@ -1322,7 +1316,7 @@
 			c = tok_nextc(tok);
 			if (c == '.') {
 				*p_start = tok->start;
-				*p_end = tok->cur; 
+				*p_end = tok->cur;
 				return ELLIPSIS;
 			} else {
 				tok_backup(tok, c);
@@ -1436,55 +1430,47 @@
   letter_quote:
 	/* String */
 	if (c == '\'' || c == '"') {
-		Py_ssize_t quote2 = tok->cur - tok->start + 1;
-		int quote = c;
-		int triple = 0;
-		int tripcount = 0;
-		for (;;) {
-			c = tok_nextc(tok);
-			if (c == '\n') {
-				if (!triple) {
-					tok->done = E_EOLS;
-					tok_backup(tok, c);
-					return ERRORTOKEN;
-				}
-				tripcount = 0;
-                                tok->cont_line = 1; /* multiline string. */
-			}
-			else if (c == EOF) {
-				if (triple)
-					tok->done = E_EOFS;
-				else
-					tok->done = E_EOLS;
-				tok->cur = tok->inp;
-				return ERRORTOKEN;
-			}
-			else if (c == quote) {
-				tripcount++;
-				if (tok->cur - tok->start == quote2) {
-					c = tok_nextc(tok);
-					if (c == quote) {
-						triple = 1;
-						tripcount = 0;
-						continue;
-					}
-					tok_backup(tok, c);
-				}
-				if (!triple || tripcount == 3)
-					break;
-			}
-			else if (c == '\\') {
-				tripcount = 0;
-				c = tok_nextc(tok);
-				if (c == EOF) {
-					tok->done = E_EOLS;
-					tok->cur = tok->inp;
-					return ERRORTOKEN;
-				}
-			}
+ 		int quote = c;
+		int quote_size = 1;             /* 1 or 3 */
+		int end_quote_size = 0;
+
+		/* Find the quote size and start of string */
+		c = tok_nextc(tok);
+		if (c == quote) {
+ 			c = tok_nextc(tok);
+			if (c == quote)
+				quote_size = 3;
 			else
-				tripcount = 0;
+				end_quote_size = 1;     /* empty string found */
 		}
+		if (c != quote)
+		    tok_backup(tok, c);
+
+		/* Get rest of string */
+		while (end_quote_size != quote_size) {
+ 			c = tok_nextc(tok);
+  			if (c == EOF) {
+				if (quote_size == 3)
+ 					tok->done = E_EOFS;
+ 				else
+ 					tok->done = E_EOLS;
+ 				tok->cur = tok->inp;
+ 				return ERRORTOKEN;
+ 			}
+ 			if (quote_size == 1 && c == '\n') {
+ 			    tok->done = E_EOLS;
+ 			    tok->cur = tok->inp;
+ 			    return ERRORTOKEN;
+ 			}
+ 			if (c == quote)
+ 			    end_quote_size += 1;
+ 			else {
+ 			    end_quote_size = 0;
+ 			    if (c == '\\')
+ 			        c = tok_nextc(tok);  /* skip escaped char */
+ 			}
+ 		}
+
 		*p_start = tok->start;
 		*p_end = tok->cur;
 		return STRING;
@@ -1619,7 +1605,7 @@
 /* Get -*- encoding -*- from a Python file.
 
    PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
-   the first or second line of the file (in which case the encoding 
+   the first or second line of the file (in which case the encoding
    should be assumed to be PyUnicode_GetDefaultEncoding()).
 
    The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed


More information about the Python-3000-checkins mailing list