[Python-checkins] cpython: Issue #23119: Simplify setobject by inlining the special case for unicode

raymond.hettinger python-checkins at python.org
Mon Jan 26 01:13:15 CET 2015


https://hg.python.org/cpython/rev/20f54cdf351d
changeset:   94279:20f54cdf351d
user:        Raymond Hettinger <python at rcn.com>
date:        Sun Jan 25 16:12:49 2015 -0800
summary:
  Issue #23119:  Simplify setobject by inlining the special case for unicode equality testing.

files:
  Include/setobject.h  |   3 +-
  Lib/test/test_sys.py |   2 +-
  Objects/setobject.c  |  81 ++++---------------------------
  3 files changed, 13 insertions(+), 73 deletions(-)


diff --git a/Include/setobject.h b/Include/setobject.h
--- a/Include/setobject.h
+++ b/Include/setobject.h
@@ -35,7 +35,7 @@
 
 */
 
-typedef struct _setobject {
+typedef struct {
     PyObject_HEAD
 
     Py_ssize_t fill;            /* Number active and dummy entries*/
@@ -53,7 +53,6 @@
      * runtime null-tests.
      */
     setentry *table;
-    setentry *(*lookup)(struct _setobject *so, PyObject *key, Py_hash_t hash);
     Py_hash_t hash;             /* Only used by frozenset objects */
     setentry smalltable[PySet_MINSIZE];
 
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -994,7 +994,7 @@
         # frozenset
         PySet_MINSIZE = 8
         samples = [[], range(10), range(50)]
-        s = size('3n2P' + PySet_MINSIZE*'nP' + '2nP')
+        s = size('3nP' + PySet_MINSIZE*'nP' + '2nP')
         for sample in samples:
             minused = len(sample)
             if minused == 0: tmp = 1
diff --git a/Objects/setobject.c b/Objects/setobject.c
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -69,6 +69,10 @@
             PyObject *startkey = entry->key;
             if (startkey == key)
                 return entry;
+            if (PyUnicode_CheckExact(startkey)
+                && PyUnicode_CheckExact(key)
+                && unicode_eq(startkey, key))
+                return entry;
             Py_INCREF(startkey);
             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
             Py_DECREF(startkey);
@@ -90,6 +94,10 @@
                 PyObject *startkey = entry->key;
                 if (startkey == key)
                     return entry;
+                if (PyUnicode_CheckExact(startkey)
+                    && PyUnicode_CheckExact(key)
+                    && unicode_eq(startkey, key))
+                    return entry;
                 Py_INCREF(startkey);
                 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
                 Py_DECREF(startkey);
@@ -116,68 +124,6 @@
 }
 
 /*
- * Hacked up version of set_lookkey which can assume keys are always unicode;
- * This means we can always use unicode_eq directly and not have to check to
- * see if the comparison altered the table.
- */
-static setentry *
-set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
-{
-    setentry *table = so->table;
-    setentry *freeslot = NULL;
-    setentry *entry;
-    size_t perturb = hash;
-    size_t mask = so->mask;
-    size_t i = (size_t)hash;
-    size_t j;
-
-    /* Make sure this function doesn't have to handle non-unicode keys,
-       including subclasses of str; e.g., one reason to subclass
-       strings is to override __eq__, and for speed we don't cater to
-       that here. */
-    if (!PyUnicode_CheckExact(key)) {                             /* unlikely */
-        so->lookup = set_lookkey;
-        return set_lookkey(so, key, hash);
-    }
-
-    entry = &table[i & mask];
-    if (entry->key == NULL)
-        return entry;
-
-    while (1) {
-        if (entry->hash == hash
-            && (entry->key == key
-                || (entry->key != dummy                           /* unlikely */
-                    && unicode_eq(entry->key, key))))             /* likely */
-            return entry;
-        if (entry->key == dummy && freeslot == NULL)
-            freeslot = entry;
-
-        for (j = 1 ; j <= LINEAR_PROBES ; j++) {
-            entry = &table[(i + j) & mask];
-            if (entry->key == NULL)
-                goto found_null;
-            if (entry->hash == hash
-                && (entry->key == key
-                    || (entry->key != dummy                       /* unlikely */
-                        && unicode_eq(entry->key, key))))         /* likely */
-                return entry;
-            if (entry->key == dummy && freeslot == NULL)
-                freeslot = entry;
-        }
-
-        perturb >>= PERTURB_SHIFT;
-        i = i * 5 + 1 + perturb;
-
-        entry = &table[i & mask];
-        if (entry->key == NULL)
-            goto found_null;
-    }
-  found_null:
-    return freeslot == NULL ? entry : freeslot;
-}
-
-/*
 Internal routine used by set_table_resize() to insert an item which is
 known to be absent from the set.  This routine also assumes that
 the set contains no deleted entries.  Besides the performance benefit,
@@ -225,8 +171,7 @@
 {
     setentry *entry;
 
-    assert(so->lookup != NULL);
-    entry = so->lookup(so, key, hash);
+    entry = set_lookkey(so, key, hash);
     if (entry == NULL)
         return -1;
     if (entry->key == NULL) {
@@ -385,7 +330,7 @@
     setentry *entry;
     PyObject *old_key;
 
-    entry = (so->lookup)(so, oldentry->key, oldentry->hash);
+    entry = set_lookkey(so, oldentry->key, oldentry->hash);
     if (entry == NULL)
         return -1;
     if (entry->key == NULL  ||  entry->key == dummy)
@@ -631,7 +576,7 @@
     PyObject *key;
     setentry *lu_entry;
 
-    lu_entry = (so->lookup)(so, entry->key, entry->hash);
+    lu_entry = set_lookkey(so, entry->key, entry->hash);
     if (lu_entry == NULL)
         return -1;
     key = lu_entry->key;
@@ -994,7 +939,6 @@
     so->used = 0;
     so->mask = PySet_MINSIZE - 1;
     so->table = so->smalltable;
-    so->lookup = set_lookkey_unicode;
     so->hash = -1;
     so->finger = 0;
     so->weakreflist = NULL;
@@ -1095,7 +1039,6 @@
 {
     Py_ssize_t t;
     setentry *u;
-    setentry *(*f)(PySetObject *so, PyObject *key, Py_ssize_t hash);
     setentry tab[PySet_MINSIZE];
     Py_hash_t h;
 
@@ -1111,8 +1054,6 @@
         a->table = a->smalltable;
     b->table = u;
 
-    f = a->lookup;   a->lookup = b->lookup;      b->lookup = f;
-
     if (a->table == a->smalltable || b->table == b->smalltable) {
         memcpy(tab, a->smalltable, sizeof(tab));
         memcpy(a->smalltable, b->smalltable, sizeof(tab));

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list