[Python-3000-checkins] r60443 - in python/branches/py3k: Doc/tutorial/errors.rst Include/unicodeobject.h Lib/_abcoll.py Objects/unicodeobject.c

christian.heimes python-3000-checkins at python.org
Wed Jan 30 12:58:23 CET 2008


Author: christian.heimes
Date: Wed Jan 30 12:58:22 2008
New Revision: 60443

Modified:
   python/branches/py3k/   (props changed)
   python/branches/py3k/Doc/tutorial/errors.rst
   python/branches/py3k/Include/unicodeobject.h
   python/branches/py3k/Lib/_abcoll.py
   python/branches/py3k/Objects/unicodeobject.c
Log:
Merged revisions 60408-60440 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r60425 | raymond.hettinger | 2008-01-29 20:52:09 +0100 (Tue, 29 Jan 2008) | 1 line
  
  CallMethod is faster with a NULL third-argument than with an empty format string.
........
  r60431 | raymond.hettinger | 2008-01-30 01:01:07 +0100 (Wed, 30 Jan 2008) | 1 line
  
  Add isdisjoint() to the Set/MutableSet ABCs.
........
  r60432 | raymond.hettinger | 2008-01-30 01:08:31 +0100 (Wed, 30 Jan 2008) | 1 line
  
  MutableSets support a remove() method.
........
  r60433 | raymond.hettinger | 2008-01-30 01:51:58 +0100 (Wed, 30 Jan 2008) | 1 line
  
  Demonstrate new except/as syntax.
........
  r60440 | christian.heimes | 2008-01-30 12:32:37 +0100 (Wed, 30 Jan 2008) | 1 line
  
  Patch #1970 by Antoine Pitrou: Speedup unicode whitespace and linebreak detection. The speedup is about 25% for split() (571 / 457 usec) and 35% (175 / 127 usec )for splitlines()
........


Modified: python/branches/py3k/Doc/tutorial/errors.rst
==============================================================================
--- python/branches/py3k/Doc/tutorial/errors.rst	(original)
+++ python/branches/py3k/Doc/tutorial/errors.rst	Wed Jan 30 12:58:22 2008
@@ -131,8 +131,8 @@
        f = open('myfile.txt')
        s = f.readline()
        i = int(s.strip())
-   except IOError as e:
-       print("I/O error(%s): %s" % (e.errno, e.strerror))
+   except IOError as (errno, strerror):
+       print "I/O error(%s): %s" % (errno, strerror)
    except ValueError:
        print("Could not convert data to an integer.")
    except:

Modified: python/branches/py3k/Include/unicodeobject.h
==============================================================================
--- python/branches/py3k/Include/unicodeobject.h	(original)
+++ python/branches/py3k/Include/unicodeobject.h	Wed Jan 30 12:58:22 2008
@@ -358,7 +358,14 @@
 
 #else
 
-#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
+/* Since splitting on whitespace is an important use case, and whitespace
+   in most situations is solely ASCII whitespace, we optimize for the common
+   case by using a quick look-up table with an inlined check.
+ */
+extern const unsigned char _Py_ascii_whitespace[];
+
+#define Py_UNICODE_ISSPACE(ch) \
+	((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
 
 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)

Modified: python/branches/py3k/Lib/_abcoll.py
==============================================================================
--- python/branches/py3k/Lib/_abcoll.py	(original)
+++ python/branches/py3k/Lib/_abcoll.py	Wed Jan 30 12:58:22 2008
@@ -211,6 +211,12 @@
             return NotImplemented
         return self._from_iterable(value for value in other if value in self)
 
+    def isdisjoint(self, other):
+        for value in other:
+            if value in self:
+                return False
+        return True
+
     def __or__(self, other):
         if not isinstance(other, Iterable):
             return NotImplemented
@@ -278,6 +284,12 @@
         """Return True if it was deleted, False if not there."""
         raise NotImplementedError
 
+    def remove(self, value):
+        """Remove an element. If not a member, raise a KeyError."""
+        if value not in self:
+            raise KeyError(value)
+        self.discard(value)
+
     def pop(self):
         """Return the popped value.  Raise KeyError if empty."""
         it = iter(self)

Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c	(original)
+++ python/branches/py3k/Objects/unicodeobject.c	Wed Jan 30 12:58:22 2008
@@ -125,6 +125,64 @@
 */
 static const char unicode_default_encoding[] = "utf-8";
 
+/* Fast detection of the most frequent whitespace characters */
+const unsigned char _Py_ascii_whitespace[] = {
+	0, 0, 0, 0, 0, 0, 0, 0,
+//     case 0x0009: /* HORIZONTAL TABULATION */
+//     case 0x000A: /* LINE FEED */
+//     case 0x000B: /* VERTICAL TABULATION */
+//     case 0x000C: /* FORM FEED */
+//     case 0x000D: /* CARRIAGE RETURN */
+	0, 1, 1, 1, 1, 1, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+//     case 0x001C: /* FILE SEPARATOR */
+//     case 0x001D: /* GROUP SEPARATOR */
+//     case 0x001E: /* RECORD SEPARATOR */
+//     case 0x001F: /* UNIT SEPARATOR */
+	0, 0, 0, 0, 1, 1, 1, 1,
+//     case 0x0020: /* SPACE */
+	1, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Same for linebreaks */
+static unsigned char ascii_linebreak[] = {
+	0, 0, 0, 0, 0, 0, 0, 0,
+//         0x000A, /* LINE FEED */
+//         0x000D, /* CARRIAGE RETURN */
+	0, 0, 1, 0, 0, 1, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+//         0x001C, /* FILE SEPARATOR */
+//         0x001D, /* GROUP SEPARATOR */
+//         0x001E, /* RECORD SEPARATOR */
+	0, 0, 0, 0, 1, 1, 1, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0
+};
+
+
 Py_UNICODE
 PyUnicode_GetMax(void)
 {
@@ -151,8 +209,9 @@
 
 #define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F))))
 
-#define BLOOM_LINEBREAK(ch)\
-    (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK((ch)))
+#define BLOOM_LINEBREAK(ch) \
+    ((ch) < 128U ? ascii_linebreak[(ch)] : \
+    (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch)))
 
 Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
 {
@@ -5602,25 +5661,26 @@
     register Py_ssize_t j;
     Py_ssize_t len = self->length;
     PyObject *str;
+    register const Py_UNICODE *buf = self->str;
 
     for (i = j = 0; i < len; ) {
 	/* find a token */
-	while (i < len && Py_UNICODE_ISSPACE(self->str[i]))
+	while (i < len && Py_UNICODE_ISSPACE(buf[i]))
 	    i++;
 	j = i;
-	while (i < len && !Py_UNICODE_ISSPACE(self->str[i]))
+	while (i < len && !Py_UNICODE_ISSPACE(buf[i]))
 	    i++;
 	if (j < i) {
 	    if (maxcount-- <= 0)
 		break;
-	    SPLIT_APPEND(self->str, j, i);
-	    while (i < len && Py_UNICODE_ISSPACE(self->str[i]))
+	    SPLIT_APPEND(buf, j, i);
+	    while (i < len && Py_UNICODE_ISSPACE(buf[i]))
 		i++;
 	    j = i;
 	}
     }
     if (j < len) {
-	SPLIT_APPEND(self->str, j, len);
+	SPLIT_APPEND(buf, j, len);
     }
     return list;
 
@@ -5693,18 +5753,19 @@
     register Py_ssize_t j;
     Py_ssize_t len = self->length;
     PyObject *str;
+    register const Py_UNICODE *buf = self->str;
 
     for (i = j = 0; i < len; ) {
-	if (self->str[i] == ch) {
+	if (buf[i] == ch) {
 	    if (maxcount-- <= 0)
 		break;
-	    SPLIT_APPEND(self->str, j, i);
+	    SPLIT_APPEND(buf, j, i);
 	    i = j = i + 1;
 	} else
 	    i++;
     }
     if (j <= len) {
-	SPLIT_APPEND(self->str, j, len);
+	SPLIT_APPEND(buf, j, len);
     }
     return list;
 
@@ -5753,25 +5814,26 @@
     register Py_ssize_t j;
     Py_ssize_t len = self->length;
     PyObject *str;
+    register const Py_UNICODE *buf = self->str;
 
     for (i = j = len - 1; i >= 0; ) {
 	/* find a token */
-	while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
+	while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
 	    i--;
 	j = i;
-	while (i >= 0 && !Py_UNICODE_ISSPACE(self->str[i]))
+	while (i >= 0 && !Py_UNICODE_ISSPACE(buf[i]))
 	    i--;
 	if (j > i) {
 	    if (maxcount-- <= 0)
 		break;
-	    SPLIT_APPEND(self->str, i + 1, j + 1);
-	    while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
+	    SPLIT_APPEND(buf, i + 1, j + 1);
+	    while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
 		i--;
 	    j = i;
 	}
     }
     if (j >= 0) {
-	SPLIT_APPEND(self->str, 0, j + 1);
+	SPLIT_APPEND(buf, 0, j + 1);
     }
     if (PyList_Reverse(list) < 0)
         goto onError;
@@ -5792,18 +5854,19 @@
     register Py_ssize_t j;
     Py_ssize_t len = self->length;
     PyObject *str;
+    register const Py_UNICODE *buf = self->str;
 
     for (i = j = len - 1; i >= 0; ) {
-	if (self->str[i] == ch) {
+	if (buf[i] == ch) {
 	    if (maxcount-- <= 0)
 		break;
-	    SPLIT_APPEND(self->str, i + 1, j + 1);
+	    SPLIT_APPEND(buf, i + 1, j + 1);
 	    j = i = i - 1;
 	} else
 	    i--;
     }
     if (j >= -1) {
-	SPLIT_APPEND(self->str, 0, j + 1);
+	SPLIT_APPEND(buf, 0, j + 1);
     }
     if (PyList_Reverse(list) < 0)
         goto onError;


More information about the Python-3000-checkins mailing list