[Python-checkins] cpython: Move insertion resize logic into set_insert_key().

raymond.hettinger python-checkins at python.org
Sat Jul 4 02:21:22 CEST 2015


https://hg.python.org/cpython/rev/1fa89f685c4b
changeset:   96788:1fa89f685c4b
parent:      96785:91c5097bca2b
user:        Raymond Hettinger <python at rcn.com>
date:        Fri Jul 03 17:21:17 2015 -0700
summary:
  Move insertion resize logic into set_insert_key().

Simplifies the code a little bit and does the resize check
only when a new key is added (giving a small speed up in
the case where the key already exists).

Fixes possible bug in set_merge() where the set_insert_key()
call relies on a big resize at the start to make enough room
for the keys but is vulnerable to a comparision callback that
could cause the table to shrink in the middle of the merge.

Also, changed the resize threshold from two-thirds of the
mask+1 to just two-thirds.  The plus one offset gave no
real benefit (afterall, the two-thirds mark is just a
heuristic and isn't a precise cut-off).

files:
  Objects/setobject.c |  75 +++++++++++---------------------
  1 files changed, 27 insertions(+), 48 deletions(-)


diff --git a/Objects/setobject.c b/Objects/setobject.c
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -125,6 +125,8 @@
     }
 }
 
+static int set_table_resize(PySetObject *, Py_ssize_t);
+
 static int
 set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
 {
@@ -139,7 +141,7 @@
 
     entry = &table[i];
     if (entry->key == NULL)
-        goto found_null_first;
+        goto found_unused;
 
     freeslot = NULL;
     perturb = hash;
@@ -173,7 +175,7 @@
             for (j = 0 ; j < LINEAR_PROBES ; j++) {
                 entry++;
                 if (entry->hash == 0 && entry->key == NULL)
-                    goto found_null;
+                    goto found_unused_or_dummy;
                 if (entry->hash == hash) {
                     PyObject *startkey = entry->key;
                     assert(startkey != dummy);
@@ -204,30 +206,27 @@
 
         entry = &table[i];
         if (entry->key == NULL)
-            goto found_null;
+            goto found_unused_or_dummy;
     }
 
-  found_null_first:
+  found_unused_or_dummy:
+    if (freeslot == NULL)
+        goto found_unused;
+    Py_INCREF(key);
+    so->used++;
+    freeslot->key = key;
+    freeslot->hash = hash;
+    return 0;
+
+  found_unused:
     Py_INCREF(key);
     so->fill++;
     so->used++;
     entry->key = key;
     entry->hash = hash;
-    return 0;
-
-  found_null:
-    Py_INCREF(key);
-    if (freeslot == NULL) {
-        /* UNUSED */
-        so->fill++;
-    } else {
-        /* DUMMY */
-        entry = freeslot;
-    }
-    so->used++;
-    entry->key = key;
-    entry->hash = hash;
-    return 0;
+    if ((size_t)so->fill*3 < mask*2)
+        return 0;
+    return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
 
   found_active:
     return 0;
@@ -366,28 +365,15 @@
     return 0;
 }
 
-/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
-
 static int
 set_add_entry(PySetObject *so, setentry *entry)
 {
-    Py_ssize_t n_used;
-    PyObject *key = entry->key;
-    Py_hash_t hash = entry->hash;
-
-    assert(so->fill <= so->mask);  /* at least one empty slot */
-    n_used = so->used;
-    if (set_insert_key(so, key, hash))
-        return -1;
-    if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
-        return 0;
-    return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
+    return set_insert_key(so, entry->key, entry->hash);
 }
 
 static int
 set_add_key(PySetObject *so, PyObject *key)
 {
-    setentry entry;
     Py_hash_t hash;
 
     if (!PyUnicode_CheckExact(key) ||
@@ -396,9 +382,7 @@
         if (hash == -1)
             return -1;
     }
-    entry.key = key;
-    entry.hash = hash;
-    return set_add_entry(so, &entry);
+    return set_insert_key(so, key, hash);
 }
 
 #define DISCARD_NOTFOUND 0
@@ -631,7 +615,7 @@
      * incrementally resizing as we insert new keys.  Expect
      * that there will be no (or few) overlapping keys.
      */
-    if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
+    if ((so->fill + other->used)*3 >= so->mask*2) {
        if (set_table_resize(so, (so->used + other->used)*2) != 0)
            return -1;
     }
@@ -979,16 +963,12 @@
         */
         if (dictsize == -1)
             return -1;
-        if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
+        if ((so->fill + dictsize)*3 >= so->mask*2) {
             if (set_table_resize(so, (so->used + dictsize)*2) != 0)
                 return -1;
         }
         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
-            setentry an_entry;
-
-            an_entry.hash = hash;
-            an_entry.key = key;
-            if (set_add_entry(so, &an_entry))
+            if (set_insert_key(so, key, hash))
                 return -1;
         }
         return 0;
@@ -1583,17 +1563,16 @@
 
     if (PyDict_CheckExact(other)) {
         while (set_next(so, &pos, &entry)) {
-            setentry entrycopy;
+            PyObject *key = entry->key;
+            Py_hash_t hash = entry->hash;
             int rv;
-            entrycopy.hash = entry->hash;
-            entrycopy.key = entry->key;
-            rv = _PyDict_Contains(other, entry->key, entry->hash);
+            rv = _PyDict_Contains(other, key, hash);
             if (rv < 0) {
                 Py_DECREF(result);
                 return NULL;
             }
             if (!rv) {
-                if (set_add_entry((PySetObject *)result, &entrycopy)) {
+                if (set_insert_key((PySetObject *)result, key, hash)) {
                     Py_DECREF(result);
                     return NULL;
                 }

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list