bpo-43475: Fix worst case collision behavior for NaN instances (GH-2… · python/cpython@a07da09 (original) (raw)
10 files changed
lines changed
Original file line number |
Diff line number |
Diff line change |
@@ -692,10 +692,9 @@ Here are the rules in detail: |
|
|
692 |
692 |
as ``-hash(-x)``. If the resulting hash is ``-1``, replace it with |
693 |
693 |
``-2``. |
694 |
694 |
|
695 |
|
-- The particular values ``sys.hash_info.inf``, ``-sys.hash_info.inf`` |
696 |
|
- and ``sys.hash_info.nan`` are used as hash values for positive |
697 |
|
- infinity, negative infinity, or nans (respectively). (All hashable |
698 |
|
- nans have the same hash value.) |
|
695 |
+- The particular values ``sys.hash_info.inf`` and ``-sys.hash_info.inf`` |
|
696 |
+ are used as hash values for positive |
|
697 |
+ infinity or negative infinity (respectively). |
699 |
698 |
|
700 |
699 |
- For a :class:`complex` number ``z``, the hash values of the real |
701 |
700 |
and imaginary parts are combined by computing ``hash(z.real) + |
@@ -740,7 +739,7 @@ number, :class:`float`, or :class:`complex`:: |
|
|
740 |
739 |
"""Compute the hash of a float x.""" |
741 |
740 |
|
742 |
741 |
if math.isnan(x): |
743 |
|
- return sys.hash_info.nan |
|
742 |
+ return super().__hash__() |
744 |
743 |
elif math.isinf(x): |
745 |
744 |
return sys.hash_info.inf if x > 0 else -sys.hash_info.inf |
746 |
745 |
else: |
| Original file line number | Diff line number | Diff line change | | |
| ------------------------------------- | ------------------- | -------------------------------------------------------------------------- | ------------------------------------------- | |
| @@ -855,7 +855,7 @@ always available. | | | | |
| 855 | 855 | +---------------------+--------------------------------------------------+ | | |
| 856 | 856 | | :const:`inf` | hash value returned for a positive infinity | |
| 857 | 857 | +---------------------+--------------------------------------------------+ | | |
| 858 | | - | :const:`nan` | hash value returned for a nan | |
| | 858 | + | :const:`nan` | (this attribute is no longer used) | | |
| 859 | 859 | +---------------------+--------------------------------------------------+ | | |
| 860 | 860 | | :const:`imag` | multiplier used for the imaginary part of a | |
| 861 | 861 | | | complex number | |
Original file line number |
Diff line number |
Diff line change |
@@ -7,7 +7,7 @@ extern "C" { |
|
|
7 |
7 |
|
8 |
8 |
/* Helpers for hash functions */ |
9 |
9 |
#ifndef Py_LIMITED_API |
10 |
|
-PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); |
|
10 |
+PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double); |
11 |
11 |
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*); |
12 |
12 |
// Similar to _Py_HashPointer(), but don't replace -1 with -2 |
13 |
13 |
PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*); |
@@ -29,7 +29,6 @@ PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); |
|
|
29 |
29 |
|
30 |
30 |
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) |
31 |
31 |
#define _PyHASH_INF 314159 |
32 |
|
-#define _PyHASH_NAN 0 |
33 |
32 |
#define _PyHASH_IMAG _PyHASH_MULTIPLIER |
34 |
33 |
|
35 |
34 |
|
Original file line number |
Diff line number |
Diff line change |
@@ -951,7 +951,7 @@ def __hash__(self): |
|
|
951 |
951 |
if self.is_snan(): |
952 |
952 |
raise TypeError('Cannot hash a signaling NaN value.') |
953 |
953 |
elif self.is_nan(): |
954 |
|
-return _PyHASH_NAN |
|
954 |
+return super().__hash__() |
955 |
955 |
else: |
956 |
956 |
if self._sign: |
957 |
957 |
return -_PyHASH_INF |
Original file line number |
Diff line number |
Diff line change |
@@ -0,0 +1,3 @@ |
|
|
|
1 |
+Hashes of NaN values now depend on object identity. Formerly, they always |
|
2 |
+hashed to 0 even though NaN values are not equal to one another. Having the |
|
3 |
+same hash for unequal values caused pile-ups in hash tables. |
Original file line number |
Diff line number |
Diff line change |
@@ -4536,7 +4536,6 @@ _dec_hash(PyDecObject *v) |
|
|
4536 |
4536 |
#error "No valid combination of CONFIG_64, CONFIG_32 and _PyHASH_BITS" |
4537 |
4537 |
#endif |
4538 |
4538 |
const Py_hash_t py_hash_inf = 314159; |
4539 |
|
-const Py_hash_t py_hash_nan = 0; |
4540 |
4539 |
mpd_uint_t ten_data[1] = {10}; |
4541 |
4540 |
mpd_t ten = {MPD_POS|MPD_STATIC |
4542 |
4541 |
0, 2, 1, 1, ten_data}; |
@@ -4555,7 +4554,7 @@ _dec_hash(PyDecObject *v) |
|
|
4555 |
4554 |
return -1; |
4556 |
4555 |
} |
4557 |
4556 |
else if (mpd_isnan(MPD(v))) { |
4558 |
|
-return py_hash_nan; |
|
4557 |
+return _Py_HashPointer(v); |
4559 |
4558 |
} |
4560 |
4559 |
else { |
4561 |
4560 |
return py_hash_inf * mpd_arith_sign(MPD(v)); |
@@ -5939,5 +5938,3 @@ PyInit__decimal(void) |
|
|
5939 |
5938 |
|
5940 |
5939 |
return NULL; /* GCOV_NOT_REACHED */ |
5941 |
5940 |
} |
5942 |
|
- |
5943 |
|
- |
Original file line number |
Diff line number |
Diff line change |
@@ -412,10 +412,10 @@ static Py_hash_t |
|
|
412 |
412 |
complex_hash(PyComplexObject *v) |
413 |
413 |
{ |
414 |
414 |
Py_uhash_t hashreal, hashimag, combined; |
415 |
|
-hashreal = (Py_uhash_t)_Py_HashDouble(v->cval.real); |
|
415 |
+hashreal = (Py_uhash_t)_Py_HashDouble((PyObject *) v, v->cval.real); |
416 |
416 |
if (hashreal == (Py_uhash_t)-1) |
417 |
417 |
return -1; |
418 |
|
-hashimag = (Py_uhash_t)_Py_HashDouble(v->cval.imag); |
|
418 |
+hashimag = (Py_uhash_t)_Py_HashDouble((PyObject *)v, v->cval.imag); |
419 |
419 |
if (hashimag == (Py_uhash_t)-1) |
420 |
420 |
return -1; |
421 |
421 |
/* Note: if the imaginary part is 0, hashimag is 0 now, |
Original file line number |
Diff line number |
Diff line change |
@@ -556,7 +556,7 @@ float_richcompare(PyObject *v, PyObject *w, int op) |
|
|
556 |
556 |
static Py_hash_t |
557 |
557 |
float_hash(PyFloatObject *v) |
558 |
558 |
{ |
559 |
|
-return _Py_HashDouble(v->ob_fval); |
|
559 |
+return _Py_HashDouble((PyObject *)v, v->ob_fval); |
560 |
560 |
} |
561 |
561 |
|
562 |
562 |
static PyObject * |
Original file line number |
Diff line number |
Diff line change |
@@ -56,8 +56,12 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; |
|
|
56 |
56 |
If the result of the reduction is infinity (this is impossible for |
57 |
57 |
integers, floats and Decimals) then use the predefined hash value |
58 |
58 |
_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead. |
59 |
|
- _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the |
60 |
|
- hashes of float and Decimal infinities and nans. |
|
59 |
+ _PyHASH_INF and -_PyHASH_INF are also used for the |
|
60 |
+ hashes of float and Decimal infinities. |
|
61 |
+ |
|
62 |
+ NaNs hash with a pointer hash. Having distinct hash values prevents |
|
63 |
+ catastrophic pileups from distinct NaN instances which used to always |
|
64 |
+ have the same hash value but would compare unequal. |
61 |
65 |
|
62 |
66 |
A selling point for the above strategy is that it makes it possible |
63 |
67 |
to compute hashes of decimal and binary floating-point numbers |
@@ -82,8 +86,10 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; |
|
|
82 |
86 |
|
83 |
87 |
*/ |
84 |
88 |
|
|
89 |
+Py_hash_t _Py_HashPointer(const void *); |
|
90 |
+ |
85 |
91 |
Py_hash_t |
86 |
|
-_Py_HashDouble(double v) |
|
92 |
+_Py_HashDouble(PyObject *inst, double v) |
87 |
93 |
{ |
88 |
94 |
int e, sign; |
89 |
95 |
double m; |
@@ -93,7 +99,7 @@ _Py_HashDouble(double v) |
|
|
93 |
99 |
if (Py_IS_INFINITY(v)) |
94 |
100 |
return v > 0 ? _PyHASH_INF : -_PyHASH_INF; |
95 |
101 |
else |
96 |
|
-return _PyHASH_NAN; |
|
102 |
+return _Py_HashPointer(inst); |
97 |
103 |
} |
98 |
104 |
|
99 |
105 |
m = frexp(v, &e); |
Original file line number |
Diff line number |
Diff line change |
@@ -1405,7 +1405,7 @@ get_hash_info(PyThreadState *tstate) |
|
|
1405 |
1405 |
PyStructSequence_SET_ITEM(hash_info, field++, |
1406 |
1406 |
PyLong_FromLong(_PyHASH_INF)); |
1407 |
1407 |
PyStructSequence_SET_ITEM(hash_info, field++, |
1408 |
|
-PyLong_FromLong(_PyHASH_NAN)); |
|
1408 |
+PyLong_FromLong(0)); // This is no longer used |
1409 |
1409 |
PyStructSequence_SET_ITEM(hash_info, field++, |
1410 |
1410 |
PyLong_FromLong(_PyHASH_IMAG)); |
1411 |
1411 |
PyStructSequence_SET_ITEM(hash_info, field++, |