cpython: a887596b841f (original) (raw)

--- a/Include/setobject.h +++ b/Include/setobject.h @@ -51,9 +51,9 @@ struct _setobject { */ setentry *table; setentry *(*lookup)(PySetObject *so, PyObject *key, Py_hash_t hash);

#endif /* Py_LIMITED_API */

--- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -68,6 +68,11 @@ chaining would be substantial (100% with The initial probe index is computed as hash mod the table size. Subsequent probe indices are computed as explained in Objects/dictobject.c. +To improve cache locality, each probe is done in pairs. +After the probe is examined, an adjacent entry is then examined as well. +The likelihood is that an adjacent entry is in the same cache line and +can be examined more cheaply than another probe elsewhere in memory. + All arithmetic on hash should ignore overflow. Unlike the dictionary implementation, the lookkey functions can return @@ -77,7 +82,7 @@ NULL if the rich comparison returns an e static setentry * set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) {

- if (entry->hash == hash) { startkey = entry->key; Py_INCREF(startkey); @@ -107,14 +111,15 @@ set_lookkey(PySetObject *so, PyObject k return set_lookkey(so, key, hash); } } - freeslot = (entry->key == dummy) ? entry : NULL; / In the loop, key == dummy is by far (factor of 100s) the least likely outcome, so test for that last. */

@@ -134,14 +139,42 @@ set_lookkey(PySetObject *so, PyObject *k break; } else {

+

+

+ } return entry; } @@ -154,7 +187,7 @@ set_lookkey(PySetObject *so, PyObject *k static setentry * set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) {

+

+ i = i * 5 + perturb + 1;

+

@@ -244,17 +304,23 @@ is responsible for incref'ing key. static void set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) {