[Python-checkins] cpython: Move insertion resize logic into set_insert_key().
raymond.hettinger
python-checkins at python.org
Sat Jul 4 02:21:22 CEST 2015
https://hg.python.org/cpython/rev/1fa89f685c4b
changeset: 96788:1fa89f685c4b
parent: 96785:91c5097bca2b
user: Raymond Hettinger <python at rcn.com>
date: Fri Jul 03 17:21:17 2015 -0700
summary:
Move insertion resize logic into set_insert_key().
Simplifies the code a little bit and does the resize check
only when a new key is added (giving a small speed up in
the case where the key already exists).
Fixes possible bug in set_merge() where the set_insert_key()
call relies on a big resize at the start to make enough room
for the keys but is vulnerable to a comparision callback that
could cause the table to shrink in the middle of the merge.
Also, changed the resize threshold from two-thirds of the
mask+1 to just two-thirds. The plus one offset gave no
real benefit (afterall, the two-thirds mark is just a
heuristic and isn't a precise cut-off).
files:
Objects/setobject.c | 75 +++++++++++---------------------
1 files changed, 27 insertions(+), 48 deletions(-)
diff --git a/Objects/setobject.c b/Objects/setobject.c
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -125,6 +125,8 @@
}
}
+static int set_table_resize(PySetObject *, Py_ssize_t);
+
static int
set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
{
@@ -139,7 +141,7 @@
entry = &table[i];
if (entry->key == NULL)
- goto found_null_first;
+ goto found_unused;
freeslot = NULL;
perturb = hash;
@@ -173,7 +175,7 @@
for (j = 0 ; j < LINEAR_PROBES ; j++) {
entry++;
if (entry->hash == 0 && entry->key == NULL)
- goto found_null;
+ goto found_unused_or_dummy;
if (entry->hash == hash) {
PyObject *startkey = entry->key;
assert(startkey != dummy);
@@ -204,30 +206,27 @@
entry = &table[i];
if (entry->key == NULL)
- goto found_null;
+ goto found_unused_or_dummy;
}
- found_null_first:
+ found_unused_or_dummy:
+ if (freeslot == NULL)
+ goto found_unused;
+ Py_INCREF(key);
+ so->used++;
+ freeslot->key = key;
+ freeslot->hash = hash;
+ return 0;
+
+ found_unused:
Py_INCREF(key);
so->fill++;
so->used++;
entry->key = key;
entry->hash = hash;
- return 0;
-
- found_null:
- Py_INCREF(key);
- if (freeslot == NULL) {
- /* UNUSED */
- so->fill++;
- } else {
- /* DUMMY */
- entry = freeslot;
- }
- so->used++;
- entry->key = key;
- entry->hash = hash;
- return 0;
+ if ((size_t)so->fill*3 < mask*2)
+ return 0;
+ return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
found_active:
return 0;
@@ -366,28 +365,15 @@
return 0;
}
-/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
-
static int
set_add_entry(PySetObject *so, setentry *entry)
{
- Py_ssize_t n_used;
- PyObject *key = entry->key;
- Py_hash_t hash = entry->hash;
-
- assert(so->fill <= so->mask); /* at least one empty slot */
- n_used = so->used;
- if (set_insert_key(so, key, hash))
- return -1;
- if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
- return 0;
- return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
+ return set_insert_key(so, entry->key, entry->hash);
}
static int
set_add_key(PySetObject *so, PyObject *key)
{
- setentry entry;
Py_hash_t hash;
if (!PyUnicode_CheckExact(key) ||
@@ -396,9 +382,7 @@
if (hash == -1)
return -1;
}
- entry.key = key;
- entry.hash = hash;
- return set_add_entry(so, &entry);
+ return set_insert_key(so, key, hash);
}
#define DISCARD_NOTFOUND 0
@@ -631,7 +615,7 @@
* incrementally resizing as we insert new keys. Expect
* that there will be no (or few) overlapping keys.
*/
- if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
+ if ((so->fill + other->used)*3 >= so->mask*2) {
if (set_table_resize(so, (so->used + other->used)*2) != 0)
return -1;
}
@@ -979,16 +963,12 @@
*/
if (dictsize == -1)
return -1;
- if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
+ if ((so->fill + dictsize)*3 >= so->mask*2) {
if (set_table_resize(so, (so->used + dictsize)*2) != 0)
return -1;
}
while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
- setentry an_entry;
-
- an_entry.hash = hash;
- an_entry.key = key;
- if (set_add_entry(so, &an_entry))
+ if (set_insert_key(so, key, hash))
return -1;
}
return 0;
@@ -1583,17 +1563,16 @@
if (PyDict_CheckExact(other)) {
while (set_next(so, &pos, &entry)) {
- setentry entrycopy;
+ PyObject *key = entry->key;
+ Py_hash_t hash = entry->hash;
int rv;
- entrycopy.hash = entry->hash;
- entrycopy.key = entry->key;
- rv = _PyDict_Contains(other, entry->key, entry->hash);
+ rv = _PyDict_Contains(other, key, hash);
if (rv < 0) {
Py_DECREF(result);
return NULL;
}
if (!rv) {
- if (set_add_entry((PySetObject *)result, &entrycopy)) {
+ if (set_insert_key((PySetObject *)result, key, hash)) {
Py_DECREF(result);
return NULL;
}
--
Repository URL: https://hg.python.org/cpython
More information about the Python-checkins
mailing list