[Python-checkins] cpython: hashtable.h now supports keys of any size

victor.stinner python-checkins at python.org
Mon Mar 21 17:01:56 EDT 2016


https://hg.python.org/cpython/rev/aca4e9af1ca6
changeset:   100640:aca4e9af1ca6
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Mon Mar 21 22:00:58 2016 +0100
summary:
  hashtable.h now supports keys of any size

Issue #26588: hashtable.h now supports keys of any size, not only
sizeof(void*). It allows to support key larger than sizeof(void*), but also to
use less memory for key smaller than sizeof(void*).

files:
  Modules/_tracemalloc.c |  105 +++++++++++------
  Modules/hashtable.c    |  147 +++++++++++++++----------
  Modules/hashtable.h    |  167 ++++++++++++++++++++++------
  Python/marshal.c       |   15 +-
  4 files changed, 295 insertions(+), 139 deletions(-)


diff --git a/Modules/_tracemalloc.c b/Modules/_tracemalloc.c
--- a/Modules/_tracemalloc.c
+++ b/Modules/_tracemalloc.c
@@ -196,23 +196,38 @@
 }
 #endif
 
+static Py_uhash_t
+hashtable_hash_pyobject(size_t key_size, const void *pkey)
+{
+    PyObject *obj;
+
+    _Py_HASHTABLE_READ_KEY(key_size, pkey, obj);
+    return PyObject_Hash(obj);
+}
+
 static int
-hashtable_compare_unicode(const void *key, const _Py_hashtable_entry_t *entry)
+hashtable_compare_unicode(size_t key_size, const void *pkey,
+                          const _Py_hashtable_entry_t *entry)
 {
-    if (key != NULL && entry->key != NULL)
-        return (PyUnicode_Compare((PyObject *)key, (PyObject *)entry->key) == 0);
+    PyObject *key, *entry_key;
+
+    _Py_HASHTABLE_READ_KEY(key_size, pkey, key);
+    _Py_HASHTABLE_ENTRY_READ_KEY(key_size, entry, entry_key);
+
+    if (key != NULL && entry_key != NULL)
+        return (PyUnicode_Compare(key, entry_key) == 0);
     else
-        return key == entry->key;
+        return key == entry_key;
 }
 
 static _Py_hashtable_allocator_t hashtable_alloc = {malloc, free};
 
 static _Py_hashtable_t *
-hashtable_new(size_t data_size,
+hashtable_new(size_t key_size, size_t data_size,
               _Py_hashtable_hash_func hash_func,
               _Py_hashtable_compare_func compare_func)
 {
-    return _Py_hashtable_new_full(data_size, 0,
+    return _Py_hashtable_new_full(key_size, data_size, 0,
                                   hash_func, compare_func,
                                   NULL, NULL, NULL, &hashtable_alloc);
 }
@@ -230,20 +245,25 @@
 }
 
 static Py_uhash_t
-hashtable_hash_traceback(const void *key)
+hashtable_hash_traceback(size_t key_size, const void *pkey)
 {
-    const traceback_t *traceback = key;
+    const traceback_t *traceback;
+
+    _Py_HASHTABLE_READ_KEY(key_size, pkey, traceback);
     return traceback->hash;
 }
 
 static int
-hashtable_compare_traceback(const traceback_t *traceback1,
+hashtable_compare_traceback(size_t key_size, const void *pkey,
                             const _Py_hashtable_entry_t *he)
 {
-    const traceback_t *traceback2 = he->key;
+    traceback_t *traceback1, *traceback2;
     const frame_t *frame1, *frame2;
     int i;
 
+    _Py_HASHTABLE_READ_KEY(key_size, pkey, traceback1);
+    _Py_HASHTABLE_ENTRY_READ_KEY(key_size, he, traceback2);
+
     if (traceback1->nframe != traceback2->nframe)
         return 0;
 
@@ -312,15 +332,16 @@
     }
 
     /* intern the filename */
-    entry = _Py_hashtable_get_entry(tracemalloc_filenames, filename);
+    entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_filenames, filename);
     if (entry != NULL) {
-        filename = (PyObject *)entry->key;
+        _Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_filenames->key_size, entry,
+                                     filename);
     }
     else {
         /* tracemalloc_filenames is responsible to keep a reference
            to the filename */
         Py_INCREF(filename);
-        if (_Py_hashtable_set(tracemalloc_filenames, filename, NULL, 0) < 0) {
+        if (_Py_HASHTABLE_SET_NODATA(tracemalloc_filenames, filename) < 0) {
             Py_DECREF(filename);
 #ifdef TRACE_DEBUG
             tracemalloc_error("failed to intern the filename");
@@ -403,9 +424,10 @@
     traceback->hash = traceback_hash(traceback);
 
     /* intern the traceback */
-    entry = _Py_hashtable_get_entry(tracemalloc_tracebacks, traceback);
+    entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_tracebacks, traceback);
     if (entry != NULL) {
-        traceback = (traceback_t *)entry->key;
+        _Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_tracebacks->key_size, entry,
+                                     traceback);
     }
     else {
         traceback_t *copy;
@@ -422,7 +444,7 @@
         }
         memcpy(copy, traceback, traceback_size);
 
-        if (_Py_hashtable_set(tracemalloc_tracebacks, copy, NULL, 0) < 0) {
+        if (_Py_HASHTABLE_SET_NODATA(tracemalloc_tracebacks, copy) < 0) {
             raw_free(copy);
 #ifdef TRACE_DEBUG
             tracemalloc_error("failed to intern the traceback: putdata failed");
@@ -464,7 +486,7 @@
 {
     trace_t trace;
 
-    if (_Py_hashtable_pop(tracemalloc_traces, ptr, &trace, sizeof(trace))) {
+    if (_Py_HASHTABLE_POP(tracemalloc_traces, ptr, trace)) {
         assert(tracemalloc_traced_memory >= trace.size);
         tracemalloc_traced_memory -= trace.size;
     }
@@ -714,17 +736,23 @@
 #endif   /* TRACE_RAW_MALLOC */
 
 static int
-tracemalloc_clear_filename(_Py_hashtable_entry_t *entry, void *user_data)
+tracemalloc_clear_filename(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
+                           void *user_data)
 {
-    PyObject *filename = (PyObject *)entry->key;
+    PyObject *filename;
+
+    _Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, filename);
     Py_DECREF(filename);
     return 0;
 }
 
 static int
-traceback_free_traceback(_Py_hashtable_entry_t *entry, void *user_data)
+traceback_free_traceback(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
+                         void *user_data)
 {
-    traceback_t *traceback = (traceback_t *)entry->key;
+    traceback_t *traceback;
+
+    _Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, traceback);
     raw_free(traceback);
     return 0;
 }
@@ -791,21 +819,20 @@
     }
 #endif
 
-    tracemalloc_filenames = hashtable_new(0,
-                                          (_Py_hashtable_hash_func)PyObject_Hash,
+    tracemalloc_filenames = hashtable_new(sizeof(PyObject *), 0,
+                                          hashtable_hash_pyobject,
                                           hashtable_compare_unicode);
 
-    tracemalloc_tracebacks = hashtable_new(0,
-                                           (_Py_hashtable_hash_func)hashtable_hash_traceback,
-                                           (_Py_hashtable_compare_func)hashtable_compare_traceback);
+    tracemalloc_tracebacks = hashtable_new(sizeof(traceback_t *), 0,
+                                           hashtable_hash_traceback,
+                                           hashtable_compare_traceback);
 
-    tracemalloc_traces = hashtable_new(sizeof(trace_t),
+    tracemalloc_traces = hashtable_new(sizeof(void*), sizeof(trace_t),
                                        _Py_hashtable_hash_ptr,
                                        _Py_hashtable_compare_direct);
 
     if (tracemalloc_filenames == NULL || tracemalloc_tracebacks == NULL
-        || tracemalloc_traces == NULL)
-    {
+       || tracemalloc_traces == NULL) {
         PyErr_NoMemory();
         return -1;
     }
@@ -840,9 +867,9 @@
     tracemalloc_stop();
 
     /* destroy hash tables */
-    _Py_hashtable_destroy(tracemalloc_traces);
     _Py_hashtable_destroy(tracemalloc_tracebacks);
     _Py_hashtable_destroy(tracemalloc_filenames);
+    _Py_hashtable_destroy(tracemalloc_traces);
 
 #if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC)
     if (tables_lock != NULL) {
@@ -935,8 +962,9 @@
     PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem);
     PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj);
 
+    tracemalloc_clear_traces();
+
     /* release memory */
-    tracemalloc_clear_traces();
     raw_free(tracemalloc_traceback);
     tracemalloc_traceback = NULL;
 }
@@ -1065,14 +1093,15 @@
 } get_traces_t;
 
 static int
-tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data)
+tracemalloc_get_traces_fill(_Py_hashtable_t *traces, _Py_hashtable_entry_t *entry,
+                            void *user_data)
 {
     get_traces_t *get_traces = user_data;
     trace_t *trace;
     PyObject *tracemalloc_obj;
     int res;
 
-    trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(entry);
+    trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(traces, entry);
 
     tracemalloc_obj = trace_to_pyobject(trace, get_traces->tracebacks);
     if (tracemalloc_obj == NULL)
@@ -1087,9 +1116,11 @@
 }
 
 static int
-tracemalloc_pyobject_decref_cb(_Py_hashtable_entry_t *entry, void *user_data)
+tracemalloc_pyobject_decref_cb(_Py_hashtable_t *tracebacks,
+                               _Py_hashtable_entry_t *entry,
+                               void *user_data)
 {
-    PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+    PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(tracebacks, entry);
     Py_DECREF(obj);
     return 0;
 }
@@ -1120,7 +1151,7 @@
 
     /* the traceback hash table is used temporarily to intern traceback tuple
        of (filename, lineno) tuples */
-    get_traces.tracebacks = hashtable_new(sizeof(PyObject *),
+    get_traces.tracebacks = hashtable_new(sizeof(traceback_t *), sizeof(PyObject *),
                                           _Py_hashtable_hash_ptr,
                                           _Py_hashtable_compare_direct);
     if (get_traces.tracebacks == NULL) {
@@ -1152,7 +1183,7 @@
 finally:
     if (get_traces.tracebacks != NULL) {
         _Py_hashtable_foreach(get_traces.tracebacks,
-                         tracemalloc_pyobject_decref_cb, NULL);
+                              tracemalloc_pyobject_decref_cb, NULL);
         _Py_hashtable_destroy(get_traces.tracebacks);
     }
     if (get_traces.traces != NULL)
diff --git a/Modules/hashtable.c b/Modules/hashtable.c
--- a/Modules/hashtable.c
+++ b/Modules/hashtable.c
@@ -1,5 +1,5 @@
-/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash
-   project:
+/* The implementation of the hash table (_Py_hashtable_t) is based on the
+   cfuhash project:
    http://sourceforge.net/projects/libcfu/
 
    Copyright of cfuhash:
@@ -59,7 +59,7 @@
 #define ENTRY_NEXT(ENTRY) \
         ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
 #define HASHTABLE_ITEM_SIZE(HT) \
-        (sizeof(_Py_hashtable_entry_t) + (HT)->data_size)
+        (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size)
 
 /* Forward declaration */
 static void hashtable_rehash(_Py_hashtable_t *ht);
@@ -70,6 +70,7 @@
     list->head = NULL;
 }
 
+
 static void
 _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
 {
@@ -77,6 +78,7 @@
     list->head = item;
 }
 
+
 static void
 _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
                  _Py_slist_item_t *item)
@@ -87,24 +89,26 @@
         list->head = item->next;
 }
 
-Py_uhash_t
-_Py_hashtable_hash_int(const void *key)
-{
-    return (Py_uhash_t)key;
-}
 
 Py_uhash_t
-_Py_hashtable_hash_ptr(const void *key)
+_Py_hashtable_hash_ptr(size_t key_size, const void *pkey)
 {
+    void *key;
+
+    _Py_HASHTABLE_READ_KEY(key_size, pkey, key);
     return (Py_uhash_t)_Py_HashPointer((void *)key);
 }
 
+
 int
-_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry)
+_Py_hashtable_compare_direct(size_t key_size, const void *pkey,
+                             const _Py_hashtable_entry_t *entry)
 {
-    return entry->key == key;
+    const void *pkey2 = _Py_HASHTABLE_ENTRY_KEY(entry);
+    return (memcmp(pkey, pkey2, key_size) == 0);
 }
 
+
 /* makes sure the real size of the buckets array is a power of 2 */
 static size_t
 round_size(size_t s)
@@ -118,8 +122,10 @@
     return i;
 }
 
+
 _Py_hashtable_t *
-_Py_hashtable_new_full(size_t data_size, size_t init_size,
+_Py_hashtable_new_full(size_t key_size, size_t data_size,
+                       size_t init_size,
                        _Py_hashtable_hash_func hash_func,
                        _Py_hashtable_compare_func compare_func,
                        _Py_hashtable_copy_data_func copy_data_func,
@@ -144,6 +150,7 @@
 
     ht->num_buckets = round_size(init_size);
     ht->entries = 0;
+    ht->key_size = key_size;
     ht->data_size = data_size;
 
     buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
@@ -163,16 +170,19 @@
     return ht;
 }
 
+
 _Py_hashtable_t *
-_Py_hashtable_new(size_t data_size,
+_Py_hashtable_new(size_t key_size, size_t data_size,
                   _Py_hashtable_hash_func hash_func,
                   _Py_hashtable_compare_func compare_func)
 {
-    return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
+    return _Py_hashtable_new_full(key_size, data_size,
+                                  HASHTABLE_MIN_SIZE,
                                   hash_func, compare_func,
                                   NULL, NULL, NULL, NULL);
 }
 
+
 size_t
 _Py_hashtable_size(_Py_hashtable_t *ht)
 {
@@ -195,7 +205,7 @@
             for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
                 void *data;
 
-                data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+                data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry);
                 size += ht->get_data_size_func(data);
             }
         }
@@ -203,6 +213,7 @@
     return size;
 }
 
+
 #ifdef Py_DEBUG
 void
 _Py_hashtable_print_stats(_Py_hashtable_t *ht)
@@ -243,38 +254,47 @@
 }
 #endif
 
-/* Get an entry. Return NULL if the key does not exist. */
+
 _Py_hashtable_entry_t *
-_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
+_Py_hashtable_get_entry(_Py_hashtable_t *ht,
+                        size_t key_size, const void *pkey)
 {
     Py_uhash_t key_hash;
     size_t index;
     _Py_hashtable_entry_t *entry;
 
-    key_hash = ht->hash_func(key);
+    assert(key_size == ht->key_size);
+
+    key_hash = ht->hash_func(key_size, pkey);
     index = key_hash & (ht->num_buckets - 1);
 
     for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
-        if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+        if (entry->key_hash == key_hash
+           && ht->compare_func(key_size, pkey, entry))
             break;
     }
 
     return entry;
 }
 
+
 static int
-_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+_Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+                        void *data, size_t data_size)
 {
     Py_uhash_t key_hash;
     size_t index;
     _Py_hashtable_entry_t *entry, *previous;
 
-    key_hash = ht->hash_func(key);
+    assert(key_size == ht->key_size);
+
+    key_hash = ht->hash_func(key_size, pkey);
     index = key_hash & (ht->num_buckets - 1);
 
     previous = NULL;
     for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
-        if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+        if (entry->key_hash == key_hash
+           && ht->compare_func(key_size, pkey, entry))
             break;
         previous = entry;
     }
@@ -287,7 +307,7 @@
     ht->entries--;
 
     if (data != NULL)
-        _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+        _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data);
     ht->alloc.free(entry);
 
     if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
@@ -295,26 +315,27 @@
     return 1;
 }
 
-/* Add a new entry to the hash. The key must not be present in the hash table.
-   Return 0 on success, -1 on memory error. */
+
 int
-_Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
-                  void *data, size_t data_size)
+_Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+                  size_t data_size, void *data)
 {
     Py_uhash_t key_hash;
     size_t index;
     _Py_hashtable_entry_t *entry;
 
+    assert(key_size == ht->key_size);
+
     assert(data != NULL || data_size == 0);
 #ifndef NDEBUG
     /* Don't write the assertion on a single line because it is interesting
        to know the duplicated entry if the assertion failed. The entry can
        be read using a debugger. */
-    entry = _Py_hashtable_get_entry(ht, key);
+    entry = _Py_hashtable_get_entry(ht, key_size, pkey);
     assert(entry == NULL);
 #endif
 
-    key_hash = ht->hash_func(key);
+    key_hash = ht->hash_func(key_size, pkey);
     index = key_hash & (ht->num_buckets - 1);
 
     entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
@@ -323,11 +344,11 @@
         return -1;
     }
 
-    entry->key = (void *)key;
     entry->key_hash = key_hash;
+    memcpy((void *)_Py_HASHTABLE_ENTRY_KEY(entry), pkey, key_size);
 
     assert(data_size == ht->data_size);
-    memcpy(_Py_HASHTABLE_ENTRY_DATA(entry), data, data_size);
+    memcpy(_Py_HASHTABLE_ENTRY_DATA(ht, entry), data, data_size);
 
     _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
     ht->entries++;
@@ -337,48 +358,48 @@
     return 0;
 }
 
-/* Get data from an entry. Copy entry data into data and return 1 if the entry
-   exists, return 0 if the entry does not exist. */
+
 int
-_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
+                  size_t data_size, void *data)
 {
     _Py_hashtable_entry_t *entry;
 
     assert(data != NULL);
 
-    entry = _Py_hashtable_get_entry(ht, key);
+    entry = _Py_hashtable_get_entry(ht, key_size, pkey);
     if (entry == NULL)
         return 0;
-    _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+    _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data);
     return 1;
 }
 
+
 int
-_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
+_Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
+                  size_t data_size, void *data)
 {
     assert(data != NULL);
     assert(ht->free_data_func == NULL);
-    return _hashtable_pop_entry(ht, key, data, data_size);
+    return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size);
 }
 
-/* Delete an entry. The entry must exist. */
+
 void
-_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key)
+_Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
 {
 #ifndef NDEBUG
-    int found = _hashtable_pop_entry(ht, key, NULL, 0);
+    int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
     assert(found);
 #else
-    (void)_hashtable_pop_entry(ht, key, NULL, 0);
+    (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
 #endif
 }
 
-/* Prototype for a pointer to a function to be called foreach
-   key/value pair in the hash by hashtable_foreach().  Iteration
-   stops if a non-zero value is returned. */
+
 int
 _Py_hashtable_foreach(_Py_hashtable_t *ht,
-                      int (*func) (_Py_hashtable_entry_t *entry, void *arg),
+                      _Py_hashtable_foreach_func func,
                       void *arg)
 {
     _Py_hashtable_entry_t *entry;
@@ -386,7 +407,7 @@
 
     for (hv = 0; hv < ht->num_buckets; hv++) {
         for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
-            int res = func(entry, arg);
+            int res = func(ht, entry, arg);
             if (res)
                 return res;
         }
@@ -394,9 +415,11 @@
     return 0;
 }
 
+
 static void
 hashtable_rehash(_Py_hashtable_t *ht)
 {
+    const size_t key_size = ht->key_size;
     size_t buckets_size, new_size, bucket;
     _Py_slist_t *old_buckets = NULL;
     size_t old_num_buckets;
@@ -425,7 +448,8 @@
         for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
             size_t entry_index;
 
-            assert(ht->hash_func(entry->key) == entry->key_hash);
+
+            assert(ht->hash_func(key_size, _Py_HASHTABLE_ENTRY_KEY(entry)) == entry->key_hash);
             next = ENTRY_NEXT(entry);
             entry_index = entry->key_hash & (new_size - 1);
 
@@ -436,6 +460,7 @@
     ht->alloc.free(old_buckets);
 }
 
+
 void
 _Py_hashtable_clear(_Py_hashtable_t *ht)
 {
@@ -446,7 +471,7 @@
         for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
             next = ENTRY_NEXT(entry);
             if (ht->free_data_func)
-                ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+                ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry));
             ht->alloc.free(entry);
         }
         _Py_slist_init(&ht->buckets[i]);
@@ -455,6 +480,7 @@
     hashtable_rehash(ht);
 }
 
+
 void
 _Py_hashtable_destroy(_Py_hashtable_t *ht)
 {
@@ -465,7 +491,7 @@
         while (entry) {
             _Py_slist_item_t *entry_next = entry->next;
             if (ht->free_data_func)
-                ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+                ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry));
             ht->alloc.free(entry);
             entry = entry_next;
         }
@@ -475,17 +501,20 @@
     ht->alloc.free(ht);
 }
 
-/* Return a copy of the hash table */
+
 _Py_hashtable_t *
 _Py_hashtable_copy(_Py_hashtable_t *src)
 {
+    const size_t key_size = src->key_size;
+    const size_t data_size = src->data_size;
     _Py_hashtable_t *dst;
     _Py_hashtable_entry_t *entry;
     size_t bucket;
     int err;
     void *data, *new_data;
 
-    dst = _Py_hashtable_new_full(src->data_size, src->num_buckets,
+    dst = _Py_hashtable_new_full(key_size, data_size,
+                                 src->num_buckets,
                                  src->hash_func, src->compare_func,
                                  src->copy_data_func, src->free_data_func,
                                  src->get_data_size_func, &src->alloc);
@@ -496,17 +525,20 @@
         entry = TABLE_HEAD(src, bucket);
         for (; entry; entry = ENTRY_NEXT(entry)) {
             if (src->copy_data_func) {
-                data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+                data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(src, entry);
                 new_data = src->copy_data_func(data);
                 if (new_data != NULL)
-                    err = _Py_hashtable_set(dst, entry->key,
-                                        &new_data, src->data_size);
+                    err = _Py_hashtable_set(dst, key_size,
+                                            _Py_HASHTABLE_ENTRY_KEY(entry),
+                                            data_size, &new_data);
                 else
                     err = 1;
             }
             else {
-                data = _Py_HASHTABLE_ENTRY_DATA(entry);
-                err = _Py_hashtable_set(dst, entry->key, data, src->data_size);
+                data = _Py_HASHTABLE_ENTRY_DATA(src, entry);
+                err = _Py_hashtable_set(dst, key_size,
+                                        _Py_HASHTABLE_ENTRY_KEY(entry),
+                                        data_size, data);
             }
             if (err) {
                 _Py_hashtable_destroy(dst);
@@ -516,4 +548,3 @@
     }
     return dst;
 }
-
diff --git a/Modules/hashtable.h b/Modules/hashtable.h
--- a/Modules/hashtable.h
+++ b/Modules/hashtable.h
@@ -1,9 +1,10 @@
 #ifndef Py_HASHTABLE_H
 #define Py_HASHTABLE_H
-
 /* The whole API is private */
 #ifndef Py_LIMITED_API
 
+/* Single linked list */
+
 typedef struct _Py_slist_item_s {
     struct _Py_slist_item_s *next;
 } _Py_slist_item_t;
@@ -16,30 +17,55 @@
 
 #define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head)
 
+
+/* _Py_hashtable: table entry */
+
 typedef struct {
     /* used by _Py_hashtable_t.buckets to link entries */
     _Py_slist_item_t _Py_slist_item;
 
-    const void *key;
     Py_uhash_t key_hash;
 
-    /* data follows */
+    /* key (key_size bytes) and then data (data_size bytes) follows */
 } _Py_hashtable_entry_t;
 
-#define _Py_HASHTABLE_ENTRY_DATA(ENTRY) \
-        ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t))
+#define _Py_HASHTABLE_ENTRY_KEY(ENTRY) \
+        ((const void *)((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t)))
 
-#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \
-        (*(void **)_Py_HASHTABLE_ENTRY_DATA(ENTRY))
+#define _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY) \
+        ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t) + (TABLE)->key_size)
 
-#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \
+#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(TABLE, ENTRY) \
+        (*(void **)_Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY))
+
+/* Get a key value from pkey: use memcpy() rather than a pointer dereference
+   to avoid memory alignment issues. */
+#define _Py_HASHTABLE_READ_KEY(KEY_SIZE, PKEY, DST_KEY) \
+    do { \
+        assert(sizeof(DST_KEY) == (KEY_SIZE)); \
+        memcpy(&(DST_KEY), (PKEY), sizeof(DST_KEY)); \
+    } while (0)
+
+#define _Py_HASHTABLE_ENTRY_READ_KEY(KEY_SIZE, ENTRY, KEY) \
+    do { \
+        assert(sizeof(KEY) == (KEY_SIZE)); \
+        memcpy(&(KEY), _Py_HASHTABLE_ENTRY_KEY(ENTRY), sizeof(KEY)); \
+    } while (0)
+
+#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, ENTRY, DATA_SIZE, DATA) \
     do { \
         assert((DATA_SIZE) == (TABLE)->data_size); \
-        memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \
+        memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY), DATA_SIZE); \
     } while (0)
 
-typedef Py_uhash_t (*_Py_hashtable_hash_func) (const void *key);
-typedef int (*_Py_hashtable_compare_func) (const void *key, const _Py_hashtable_entry_t *he);
+
+/* _Py_hashtable: prototypes */
+
+typedef Py_uhash_t (*_Py_hashtable_hash_func) (size_t key_size,
+                                               const void *pkey);
+typedef int (*_Py_hashtable_compare_func) (size_t key_size,
+                                           const void *pkey,
+                                           const _Py_hashtable_entry_t *he);
 typedef void* (*_Py_hashtable_copy_data_func)(void *data);
 typedef void (*_Py_hashtable_free_data_func)(void *data);
 typedef size_t (*_Py_hashtable_get_data_size_func)(void *data);
@@ -52,10 +78,14 @@
     void (*free) (void *ptr);
 } _Py_hashtable_allocator_t;
 
+
+/* _Py_hashtable: table */
+
 typedef struct {
     size_t num_buckets;
     size_t entries; /* Total number of entries in the table. */
     _Py_slist_t *buckets;
+    size_t key_size;
     size_t data_size;
 
     _Py_hashtable_hash_func hash_func;
@@ -66,16 +96,25 @@
     _Py_hashtable_allocator_t alloc;
 } _Py_hashtable_t;
 
-/* hash and compare functions for integers and pointers */
-PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key);
-PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_int(const void *key);
-PyAPI_FUNC(int) _Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry);
+/* hash a pointer (void*) */
+PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
+    size_t key_size,
+    const void *pkey);
+
+/* comparison using memcmp() */
+PyAPI_FUNC(int) _Py_hashtable_compare_direct(
+    size_t key_size,
+    const void *pkey,
+    const _Py_hashtable_entry_t *entry);
 
 PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new(
+    size_t key_size,
     size_t data_size,
     _Py_hashtable_hash_func hash_func,
     _Py_hashtable_compare_func compare_func);
+
 PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full(
+    size_t key_size,
     size_t data_size,
     size_t init_size,
     _Py_hashtable_hash_func hash_func,
@@ -84,45 +123,95 @@
     _Py_hashtable_free_data_func free_data_func,
     _Py_hashtable_get_data_size_func get_data_size_func,
     _Py_hashtable_allocator_t *allocator);
-PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src);
-PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht);
+
 PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht);
 
-typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_entry_t *entry, void *arg);
+/* Return a copy of the hash table */
+PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src);
 
+PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht);
+
+typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_t *ht,
+                                           _Py_hashtable_entry_t *entry,
+                                           void *arg);
+
+/* Call func() on each entry of the hashtable.
+   Iteration stops if func() result is non-zero, in this case it's the result
+   of the call. Otherwise, the function returns 0. */
 PyAPI_FUNC(int) _Py_hashtable_foreach(
     _Py_hashtable_t *ht,
-    _Py_hashtable_foreach_func func, void *arg);
+    _Py_hashtable_foreach_func func,
+    void *arg);
+
 PyAPI_FUNC(size_t) _Py_hashtable_size(_Py_hashtable_t *ht);
 
+/* Add a new entry to the hash. The key must not be present in the hash table.
+   Return 0 on success, -1 on memory error.
+
+   Don't call directly this function,
+   but use _Py_HASHTABLE_SET() and _Py_HASHTABLE_SET_NODATA() macros */
+PyAPI_FUNC(int) _Py_hashtable_set(
+    _Py_hashtable_t *ht,
+    size_t key_size,
+    const void *pkey,
+    size_t data_size,
+    void *data);
+
+#define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \
+    _Py_hashtable_set(TABLE, sizeof(KEY), &KEY, sizeof(DATA), &(DATA))
+
+#define _Py_HASHTABLE_SET_NODATA(TABLE, KEY) \
+    _Py_hashtable_set(TABLE, sizeof(KEY), &KEY, 0, NULL)
+
+
+/* Get an entry.
+   Return NULL if the key does not exist.
+
+   Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
+   macro */
 PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
     _Py_hashtable_t *ht,
-    const void *key);
-PyAPI_FUNC(int) _Py_hashtable_set(
-    _Py_hashtable_t *ht,
-    const void *key,
-    void *data,
-    size_t data_size);
+    size_t key_size,
+    const void *pkey);
+
+#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
+    _Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
+
+
+/* Get data from an entry. Copy entry data into data and return 1 if the entry
+   exists, return 0 if the entry does not exist.
+
+   Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
 PyAPI_FUNC(int) _Py_hashtable_get(
     _Py_hashtable_t *ht,
-    const void *key,
-    void *data,
-    size_t data_size);
+    size_t key_size,
+    const void *pkey,
+    size_t data_size,
+    void *data);
+
+#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
+    _Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
+
+
+/* Don't call directly this function, but use _Py_HASHTABLE_POP() macro */
 PyAPI_FUNC(int) _Py_hashtable_pop(
     _Py_hashtable_t *ht,
-    const void *key,
-    void *data,
-    size_t data_size);
+    size_t key_size,
+    const void *pkey,
+    size_t data_size,
+    void *data);
+
+#define _Py_HASHTABLE_POP(TABLE, KEY, DATA) \
+    _Py_hashtable_pop(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
+
+
+/* Delete an entry.
+
+   WARNING: The entry must exist. */
 PyAPI_FUNC(void) _Py_hashtable_delete(
     _Py_hashtable_t *ht,
-    const void *key);
-
-#define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \
-    _Py_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA))
-
-#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
-    _Py_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA))
+    size_t key_size,
+    const void *pkey);
 
 #endif   /* Py_LIMITED_API */
-
 #endif
diff --git a/Python/marshal.c b/Python/marshal.c
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -263,10 +263,10 @@
     if (Py_REFCNT(v) == 1)
         return 0;
 
-    entry = _Py_hashtable_get_entry(p->hashtable, v);
+    entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
     if (entry != NULL) {
         /* write the reference index to the stream */
-        _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, &w, sizeof(w), entry);
+        _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, sizeof(w), &w);
         /* we don't store "long" indices in the dict */
         assert(0 <= w && w <= 0x7fffffff);
         w_byte(TYPE_REF, p);
@@ -571,7 +571,8 @@
 w_init_refs(WFILE *wf, int version)
 {
     if (version >= 3) {
-        wf->hashtable = _Py_hashtable_new(sizeof(int), _Py_hashtable_hash_ptr,
+        wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
+                                          _Py_hashtable_hash_ptr,
                                           _Py_hashtable_compare_direct);
         if (wf->hashtable == NULL) {
             PyErr_NoMemory();
@@ -582,9 +583,13 @@
 }
 
 static int
-w_decref_entry(_Py_hashtable_entry_t *entry, void *Py_UNUSED(data))
+w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
+               void *Py_UNUSED(data))
 {
-    Py_XDECREF(entry->key);
+    PyObject *entry_key;
+
+    _Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, entry_key);
+    Py_XDECREF(entry_key);
     return 0;
 }
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list