Compatible numeric hashes - Code Review (original) (raw)
OLD
NEW
1
1
2 /* Generic object operations; and implementation of None (NoObject) */
2 /* Generic object operations; and implementation of None (NoObject) */
3
3
4 #include "Python.h"
4 #include "Python.h"
5 #include "sliceobject.h" /* For PyEllipsis_Type */
5 #include "sliceobject.h" /* For PyEllipsis_Type */
6 #include "frameobject.h"
6 #include "frameobject.h"
7
7
8 #ifdef __cplusplus
8 #ifdef __cplusplus
9 extern "C" {
9 extern "C" {
10 #endif
10 #endif
(...skipping 626 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading...
637 Py_DECREF(res);
637 Py_DECREF(res);
638 return ok;
638 return ok;
639 }
639 }
640
640
641 /* Set of hash utility functions to help maintaining the invariant that
641 /* Set of hash utility functions to help maintaining the invariant that
642 if a==b then hash(a)==hash(b)
642 if a==b then hash(a)==hash(b)
643
643
644 All the utility functions (_Py_Hash*()) return "-1" to signify an error.
644 All the utility functions (_Py_Hash*()) return "-1" to signify an error.
645 */
645 */
646
646
647 /* For numeric types, the hash of a number x is based on the reduction
648 of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that
649 hash(x) == hash(y) whenever x and y are numerically equal, even if
650 x and y have different types.
651
652 A quick summary of the hashing strategy:
653
654 (1) First define the 'reduction of x modulo P' for any rational
655 number x; this is a standard extension of the usual notion of
656 reduction modulo P for integers. If x == p/q (written in lowest
657 terms), the reduction is interpreted as the reduction of p times
658 the inverse of the reduction of q, all modulo P; if q is exactly
659 divisible by P then define the reduction to be infinity. So we've
660 got a well-defined map
661
662 reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
663
664 (2) Now for a rational number x, define hash(x) by:
665
666 0 if x == 0
667 1 + reduce(x-1) if x > 0
668 -hash(-x) if x < 0
669
670 If reduce(x-1) is infinity then use the predefined hash value
671 _PyHASH_INF instead. _PyHASH_INF, _PyHASH_NINF and _PyHASH_NAN are
672 also used for the hashes of float and Decimal infinities and nans.
673
674 A selling point for the above strategy is that it makes it possible
675 to compute hashes of decimal and binary floating-point numbers
676 efficiently, even if the exponent of the binary or decimal number
677 is large. The key point is that
678
679 reduce(x * y) == reduce(reduce(x) * reduce(y))
680
681 provided that {reduce(x), reduce(y)} != {0, infinity}. For binary
682 and decimal floats the reduction is never infinity, since the
683 denominator is a power of 2 (for binary) or a divisor of a power of
684 10 (for decimal). So we have:
685
686 reduce(x * 2**e) == reduce(reduce(x) * reduce(2**e))
687
688 reduce(x * 10**e) == reduce(reduce(x) * reduce(10**e))
689
690 and reduce(10**e) can be computed efficiently by the usual modular
691 exponentiation algorithm. For reduce(2**e) it's even better: since
692 P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
693 by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
694
695 */
696
647 long
697 long
648 _Py_HashDouble(double v)
698 _Py_HashDouble(double v)
649 {
699 {
650 » double intpart, fractpart;
700 » int e, sign;
651 » int expo;
701 » double m;
652 » long hipart;
702 » unsigned long x, y;
653 » long x;»» /* the final hash value */
654 » /* This is designed so that Python numbers of different types
655 » * that compare equal hash to the same value; otherwise comparisons
656 » * of mapping keys will turn out weird.
657 » */
658
703
659 » fractpart = modf(v, &intpart);
704 » if (!Py_IS_FINITE(v)) {
660 » if (fractpart == 0.0) {
705 » » if (Py_IS_INFINITY(v))
661 » » /* This must return the same hash as an equal int or long. */
706 » » » return v > 0 ? _PyHASH_INF : _PyHASH_NINF;
662 » » if (intpart > LONG_MAX/2 || -intpart > LONG_MAX/2) {
707 » » else
663 » » » /* Convert to long and use its hash. */
708 » » » return _PyHASH_NAN;
664 » » » PyObject *plong;» /* converted to Python long */
665 » » » if (Py_IS_INFINITY(intpart))
666 » » » » /* can't convert to long int -- arbitrary */
667 » » » » v = v < 0 ? -271828.0 : 314159.0;
668 » » » plong = PyLong_FromDouble(v);
669 » » » if (plong == NULL)
670 » » » » return -1;
671 » » » x = PyObject_Hash(plong);
672 » » » Py_DECREF(plong);
673 » » » return x;
674 » » }
675 » » /* Fits in a C long == a Python int, so is its own hash. */
676 » » x = (long)intpart;
677 » » if (x == -1)
678 » » » x = -2;
679 » » return x;
680 }
709 }
681 » /* The fractional part is non-zero, so we don't have to worry about
710
682 » * making this match the hash of some other type.
711 » m = frexp(v, &e);
683 » * Use frexp to get at the bits in the double.
712
684 » * Since the VAX D double format has 56 mantissa bits, which is the
713 » sign = 1;
685 » * most of any double format in use, each of these parts may have as
714 » if (m < 0) {
686 » * many as (but no more than) 56 significant bits.
715 » » sign = -1;
687 » * So, assuming sizeof(long) >= 4, each part can be broken into two
716 » » m = -m;
688 » * longs; frexp and multiplication are used to do that.
717 » }
689 » * Also, since the Cray double format has 15 exponent bits, which is
718
690 » * the most of any double format in use, shifting the exponent field
719 » /* process 28 bits at a time; this should work well both for binary
691 » * left by 15 won't overflow a long (again assuming sizeof(long) >= 4).
720 » and hexadecimal floating point. */
692 » */
721 » x = 0;
693 » v = frexp(v, &expo);
722 » while (m) {
694 » v *= 2147483648.0;» /* 2**31 */
723 » » x = ((x << 28) & _PyHASH_MASK) | x >> (_PyHASH_BITS - 28);
695 » hipart = (long)v;» /* take the top 32 bits */
724 » » m *= 268435456.0; /* 2**28 */
696 » v = (v - (double)hipart) * 2147483648.0; /* get the next 32 bits */
725 » » e -= 28;
697 » x = hipart + (long)v + (expo << 15);
726 » » y = (unsigned long)m; /* pull out integer part */
698 » if (x == -1)
727 » » m -= y;
699 » » x = -2;
728 » » x += y;
700 » return x;
729 » » if (x > _PyHASH_MASK)
730 » » » x -= _PyHASH_MASK;
731 » }
732
733 » /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */
734 » e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
735 » x = ((x << e) & _PyHASH_MASK) | x >> (_PyHASH_BITS - e);
736
737 » x = x * sign;
738 » if (x == (unsigned long)-1)
739 » » x = (unsigned long)-2;
740 » return (long)x;
701 }
741 }
702
742
703 long
743 long
704 _Py_HashPointer(void *p)
744 _Py_HashPointer(void *p)
705 {
745 {
706 long x;
746 long x;
707 size_t y = (size_t)p;
747 size_t y = (size_t)p;
708 /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
748 /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
709 excessive hash collisions for dicts and sets */
749 excessive hash collisions for dicts and sets */
710 y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
750 y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
(...skipping 1160 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading...
1871 assert(op->ob_refcnt == 0);
1911 assert(op->ob_refcnt == 0);
1872 ++_PyTrash_delete_nesting;
1912 ++_PyTrash_delete_nesting;
1873 (*dealloc)(op);
1913 (*dealloc)(op);
1874 --_PyTrash_delete_nesting;
1914 --_PyTrash_delete_nesting;
1875 }
1915 }
1876 }
1916 }
1877
1917
1878 #ifdef __cplusplus
1918 #ifdef __cplusplus
1879 }
1919 }
1880 #endif
1920 #endif
OLD
NEW