[Python-checkins] cpython: Issue #21118: Optimize also str.translate() for ASCII => ASCII deletion

victor.stinner python-checkins at python.org
Sat Apr 5 14:27:30 CEST 2014


http://hg.python.org/cpython/rev/47b0c076e17d
changeset:   90157:47b0c076e17d
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Sat Apr 05 14:27:07 2014 +0200
summary:
  Issue #21118: Optimize also str.translate() for ASCII => ASCII deletion

files:
  Objects/unicodeobject.c |  54 +++++++++++++++++-----------
  1 files changed, 32 insertions(+), 22 deletions(-)


diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8561,7 +8561,8 @@
 
     if (item == Py_None) {
         /* deletion: skip fast translate */
-        goto exit;
+        translate[ch] = 0xfe;
+        return 1;
     }
 
     if (item == NULL) {
@@ -8614,12 +8615,12 @@
    translated into writer, raise an exception and return -1 on error. */
 static int
 unicode_fast_translate(PyObject *input, PyObject *mapping,
-                       _PyUnicodeWriter *writer)
-{
-    Py_UCS1 translate[128], ch, ch2;
+                       _PyUnicodeWriter *writer, int ignore)
+{
+    Py_UCS1 ascii_table[128], ch, ch2;
     Py_ssize_t len;
     Py_UCS1 *in, *end, *out;
-    int res;
+    int res = 0;
 
     if (PyUnicode_READY(input) == -1)
         return -1;
@@ -8627,7 +8628,7 @@
         return 0;
     len = PyUnicode_GET_LENGTH(input);
 
-    memset(translate, 0xff, 128);
+    memset(ascii_table, 0xff, 128);
 
     in = PyUnicode_1BYTE_DATA(input);
     end = in + len;
@@ -8636,23 +8637,32 @@
     assert(PyUnicode_GET_LENGTH(writer->buffer) == len);
     out = PyUnicode_1BYTE_DATA(writer->buffer);
 
-    for (; in < end; in++, out++) {
+    for (; in < end; in++) {
         ch = *in;
-        ch2 = translate[ch];
+        ch2 = ascii_table[ch];
         if (ch2 == 0xff) {
-            res = unicode_fast_translate_lookup(mapping, ch, translate);
-            if (res < 0)
+            int translate = unicode_fast_translate_lookup(mapping, ch,
+                                                          ascii_table);
+            if (translate < 0)
                 return -1;
-            if (res == 0) {
-                writer->pos = in - PyUnicode_1BYTE_DATA(input);
-                return 0;
-            }
-            ch2 = translate[ch];
-        }
+            if (translate == 0)
+                goto exit;
+            ch2 = ascii_table[ch];
+        }
+        if (ch2 == 0xfe) {
+            if (ignore)
+                continue;
+            goto exit;
+        }
+        assert(ch2 < 128);
         *out = ch2;
-    }
-    writer->pos = len;
-    return 1;
+        out++;
+    }
+    res = 1;
+
+exit:
+    writer->pos = out - PyUnicode_1BYTE_DATA(writer->buffer);
+    return res;
 }
 
 PyObject *
@@ -8695,7 +8705,9 @@
     if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1)
         goto onError;
 
-    res = unicode_fast_translate(input, mapping, &writer);
+    ignore = (errors != NULL && strcmp(errors, "ignore") == 0);
+
+    res = unicode_fast_translate(input, mapping, &writer, ignore);
     if (res < 0) {
         _PyUnicodeWriter_Dealloc(&writer);
         return NULL;
@@ -8703,8 +8715,6 @@
     if (res == 1)
         return _PyUnicodeWriter_Finish(&writer);
 
-    ignore = (errors != NULL && strcmp(errors, "ignore") == 0);
-
     i = writer.pos;
     while (i<size) {
         /* try to encode it */

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list