[Python-Dev] One more dict trick (original) (raw)
Tim Peters tim.one@home.com
Wed, 30 May 2001 23:53:53 -0400
- Previous message: [Python-Dev] SF hacked
- Next message: [Python-Dev] One more dict trick
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
This is a multi-part message in MIME format.
------=_NextPart_000_0006_01C0E963.C83DC7A0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: 7bit
If anyone has an app known or suspected to be sensitive to dict timing, please try the patch here. Best I've been able to tell, it's a win. But it's a radical change in approach, so I don't want to rush it.
This gets rid of the polynomial machinery entirely, along with the branches associated with updating the things, and the dictobject struct member holding the table's poly. Instead it relies on that
i = (5*i + 1) % n
is a full-period RNG whenever n is a power of 2 (that's what guarantees it will visit every slot), but perturbs that by adding in a few bits from the full hash code shifted right each time (that's what guarantees every bit of the hash code eventually influences the probe sequence, avoiding simple quadratic-time degenerate cases).
------=_NextPart_000_0006_01C0E963.C83DC7A0 Content-Type: text/plain; name="dict.txt" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="dict.txt"
Index: Objects/dictobject.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /cvsroot/python/python/dist/src/Objects/dictobject.c,v retrieving revision 2.96 diff -c -r2.96 dictobject.c *** Objects/dictobject.c 2001/05/27 07:39:22 2.96 --- Objects/dictobject.c 2001/05/31 03:29:23
*** 85,123 **** iteration. */ =20
- static long polys[] =3D {
- /* 4 + 3, / / first active entry if MINSIZE =3D=3D 4 */
8 + 3, /* first active entry if MINSIZE =3D=3D 8 */
16 + 3,
32 + 5,
64 + 3,
128 + 3,
256 + 29,
512 + 17,
1024 + 9,
2048 + 5,
4096 + 83,
8192 + 27,
16384 + 43,
32768 + 3,
65536 + 45,
131072 + 9,
262144 + 39,
524288 + 39,
1048576 + 9,
2097152 + 5,
4194304 + 3,
8388608 + 33,
16777216 + 27,
33554432 + 9,
67108864 + 71,
134217728 + 39,
268435456 + 9,
536870912 + 5,
1073741824 + 83
/* 2147483648 + 9 -- if we ever boost this to unsigned long */
- }; -=20 /* Object used as dummy key to fill deleted entries */ static PyObject dummy; / Initialized by first call to = newdictobject() */ =20 --- 85,90 ----
*** 168,174 **** int ma_fill; /* # Active + # Dummy / int ma_used; / # Active / int ma_size; / total # slots in ma_table */
int ma_poly; /* appopriate entry from polys vector */ /* ma_table points to ma_smalltable for small tables, else to * additional malloc'ed memory. ma_table is never NULL! This rule * saves repeated runtime null-tests in the workhorse getitem and
--- 135,140 ----
*** 202,209 ****
(mp)->ma_table =3D (mp)->ma_smalltable;
(mp)->ma_size =3D MINSIZE;
(mp)->ma_used =3D (mp)->ma_fill =3D 0; \
(mp)->ma_poly =3D polys[0]; \
assert(MINSIZE < (mp)->ma_poly && (mp)->ma_poly < MINSIZE*2); \ } while(0)
=20 PyObject * --- 168,173 ----
*** 252,257 **** --- 216,240 ---- a dictentry* for which the me_value field is NULL. Exceptions are = never reported by this function, and outstanding exceptions are maintained. */ +=20
- /* #define DUMP_HASH_STUFF */
- #ifdef DUMP_HASH_STUFF
- static int nEntry =3D 0, nCollide =3D 0, nTrip =3D 0;
- #define BUMP_ENTRY ++nEntry
- #define BUMP_COLLIDE ++nCollide
- #define BUMP_TRIP ++nTrip
- #define PRINT_HASH_STUFF \
if ((nEntry & 0x1ff) =3D=3D 0) \
+=20fprintf(stderr, "%d %d %d\n", nEntry, nCollide, nTrip)
- #else
- #define BUMP_ENTRY
- #define BUMP_COLLIDE
- #define BUMP_TRIP
- #define PRINT_HASH_STUFF
- #endif
+=20 +=20 static dictentry * lookdict(dictobject *mp, PyObject *key, register long hash) {
*** 265,270 **** --- 248,254 ---- register int checked_error =3D 0; register int cmp; PyObject *err_type, *err_value, *err_tb;
BUMP_ENTRY; /* We must come up with (i, incr) such that 0 <=3D i < ma_size and 0 < incr < ma_size and both are a function of hash. i is the initial table index and incr the initial probe offset. */
*** 294,309 **** } freeslot =3D NULL; } ! /* Derive incr from hash, just to make it more arbitrary. Note that ! incr must not be 0, or we will get into an infinite loop./ ! incr =3D hash ^ ((unsigned long)hash >> 3); !=20 / In the loop, me_key =3D=3D dummy is by far (factor of 100s) the least likely outcome, so test for that last. / for (;;) { ! if (!incr) ! incr =3D 1; / and incr will never be 0 again / ! ep =3D &ep0[(i + incr) & mask]; if (ep->me_key =3D=3D NULL) { if (restore_error) PyErr_Restore(err_type, err_value, err_tb); --- 278,292 ---- } freeslot =3D NULL; } ! incr =3D hash; ! BUMP_COLLIDE; / In the loop, me_key =3D=3D dummy is by far (factor of 100s) the least likely outcome, so test for that last. */ for (;;) { ! BUMP_TRIP; ! i =3D (i << 2) + i + (incr & 0xf) + 1; ! incr >>=3D 4; ! ep =3D &ep0[i & mask]; if (ep->me_key =3D=3D NULL) { if (restore_error) PyErr_Restore(err_type, err_value, err_tb);
*** 335,344 **** } else if (ep->me_key =3D=3D dummy && freeslot =3D=3D NULL) freeslot =3D ep;
/* Cycle through GF(2**n). */
if (incr & 1)
incr ^=3D mp->ma_poly; /* clears the lowest bit */
} =20 --- 318,323 ----incr >>=3D 1; }
*** 370,375 **** --- 349,356 ---- mp->ma_lookup =3D lookdict; return lookdict(mp, key, hash); }
BUMP_ENTRY;
PRINT_HASH_STUFF; /* We must come up with (i, incr) such that 0 <=3D i < ma_size and 0 < incr < ma_size and both are a function of hash */ i =3D hash & mask;
*** 387,400 **** } /* Derive incr from hash, just to make it more arbitrary. Note that incr must not be 0, or we will get into an infinite loop./ ! incr =3D hash ^ ((unsigned long)hash >> 3); !=20 / In the loop, me_key =3D=3D dummy is by far (factor of 100s) the least likely outcome, so test for that last. / for (;;) { ! if (!incr) ! incr =3D 1; / and incr will never be 0 again / ! ep =3D &ep0[(i + incr) & mask]; if (ep->me_key =3D=3D NULL) return freeslot =3D=3D NULL ? ep : freeslot; if (ep->me_key =3D=3D key --- 368,382 ---- } / Derive incr from hash, just to make it more arbitrary. Note that incr must not be 0, or we will get into an infinite loop./ ! incr =3D hash; ! BUMP_COLLIDE; / In the loop, me_key =3D=3D dummy is by far (factor of 100s) the least likely outcome, so test for that last. */ for (;;) { ! BUMP_TRIP; ! i =3D (i << 2) + i + (incr & 0xf) + 1; ! incr >>=3D 4; ! ep =3D &ep0[i & mask]; if (ep->me_key =3D=3D NULL) return freeslot =3D=3D NULL ? ep : freeslot; if (ep->me_key =3D=3D key
*** 404,413 **** return ep; if (ep->me_key =3D=3D dummy && freeslot =3D=3D NULL) freeslot =3D ep;
/* Cycle through GF(2**n). */
if (incr & 1)
incr ^=3D mp->ma_poly; /* clears the lowest bit */
} =20 --- 386,391 ----incr >>=3D 1; }
*** 448,454 **** static int dictresize(dictobject *mp, int minused) { ! int newsize, newpoly; dictentry *oldtable, *newtable, *ep; int i; int is_oldtable_malloced; --- 426,432 ---- static int dictresize(dictobject *mp, int minused) { ! int newsize; dictentry *oldtable, *newtable, *ep; int i; int is_oldtable_malloced;
*** 456,475 **** =20 assert(minused >=3D 0); =20 ! /* Find the smallest table size > minused, and its poly[] entry. / ! newpoly =3D 0; ! newsize =3D MINSIZE; ! for (i =3D 0; i < sizeof(polys)/sizeof(polys[0]); ++i) { ! if (newsize > minused) { ! newpoly =3D polys[i]; ! break; ! } ! newsize <<=3D 1; ! if (newsize < 0) /* overflow */ ! break; ! } ! if (newpoly =3D=3D 0) { ! /* Ran out of polynomials or newsize overflowed. */ PyErr_NoMemory(); return -1; } --- 434,445 ---- =20 assert(minused >=3D 0); =20 ! / Find the smallest table size > minused. */ ! for (newsize =3D MINSIZE; ! newsize <=3D minused && newsize >=3D 0; ! newsize <<=3D 1) ! ; ! if (newsize < 0) { PyErr_NoMemory(); return -1; }
*** 511,517 **** mp->ma_table =3D newtable; mp->ma_size =3D newsize; memset(newtable, 0, sizeof(dictentry) * newsize);
mp->ma_poly =3D newpoly; mp->ma_used =3D 0; i =3D mp->ma_fill; mp->ma_fill =3D 0;
--- 481,486 ----
------=_NextPart_000_0006_01C0E963.C83DC7A0--
- Previous message: [Python-Dev] SF hacked
- Next message: [Python-Dev] One more dict trick
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]