Compatible numeric hashes - Code Review (original) (raw)

OLD

NEW

1

1

2 /* Generic object operations; and implementation of None (NoObject) */

2 /* Generic object operations; and implementation of None (NoObject) */

3

3

4 #include "Python.h"

4 #include "Python.h"

5 #include "sliceobject.h" /* For PyEllipsis_Type */

5 #include "sliceobject.h" /* For PyEllipsis_Type */

6 #include "frameobject.h"

6 #include "frameobject.h"

7

7

8 #ifdef __cplusplus

8 #ifdef __cplusplus

9 extern "C" {

9 extern "C" {

10 #endif

10 #endif

(...skipping 626 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading...

637 Py_DECREF(res);

637 Py_DECREF(res);

638 return ok;

638 return ok;

639 }

639 }

640

640

641 /* Set of hash utility functions to help maintaining the invariant that

641 /* Set of hash utility functions to help maintaining the invariant that

642 if a==b then hash(a)==hash(b)

642 if a==b then hash(a)==hash(b)

643

643

644 All the utility functions (_Py_Hash*()) return "-1" to signify an error.

644 All the utility functions (_Py_Hash*()) return "-1" to signify an error.

645 */

645 */

646

646

647 /* For numeric types, the hash of a number x is based on the reduction

648 of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that

649 hash(x) == hash(y) whenever x and y are numerically equal, even if

650 x and y have different types.

651

652 A quick summary of the hashing strategy:

653

654 (1) First define the 'reduction of x modulo P' for any rational

655 number x; this is a standard extension of the usual notion of

656 reduction modulo P for integers. If x == p/q (written in lowest

657 terms), the reduction is interpreted as the reduction of p times

658 the inverse of the reduction of q, all modulo P; if q is exactly

659 divisible by P then define the reduction to be infinity. So we've

660 got a well-defined map

661

662 reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.

663

664 (2) Now for a rational number x, define hash(x) by:

665

666 0 if x == 0

667 1 + reduce(x-1) if x > 0

668 -hash(-x) if x < 0

669

670 If reduce(x-1) is infinity then use the predefined hash value

671 _PyHASH_INF instead. _PyHASH_INF, _PyHASH_NINF and _PyHASH_NAN are

672 also used for the hashes of float and Decimal infinities and nans.

673

674 A selling point for the above strategy is that it makes it possible

675 to compute hashes of decimal and binary floating-point numbers

676 efficiently, even if the exponent of the binary or decimal number

677 is large. The key point is that

678

679 reduce(x * y) == reduce(reduce(x) * reduce(y))

680

681 provided that {reduce(x), reduce(y)} != {0, infinity}. For binary

682 and decimal floats the reduction is never infinity, since the

683 denominator is a power of 2 (for binary) or a divisor of a power of

684 10 (for decimal). So we have:

685

686 reduce(x * 2**e) == reduce(reduce(x) * reduce(2**e))

687

688 reduce(x * 10**e) == reduce(reduce(x) * reduce(10**e))

689

690 and reduce(10**e) can be computed efficiently by the usual modular

691 exponentiation algorithm. For reduce(2**e) it's even better: since

692 P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication

693 by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.

694

695 */

696

647 long

697 long

648 _Py_HashDouble(double v)

698 _Py_HashDouble(double v)

649 {

699 {

650 » double intpart, fractpart;

700 » int e, sign;

651 » int expo;

701 » double m;

652 » long hipart;

702 » unsigned long x, y;

653 » long x;»» /* the final hash value */

654 » /* This is designed so that Python numbers of different types

655 » * that compare equal hash to the same value; otherwise comparisons

656 » * of mapping keys will turn out weird.

657 » */

658

703

659 » fractpart = modf(v, &intpart);

704 » if (!Py_IS_FINITE(v)) {

660 » if (fractpart == 0.0) {

705 » » if (Py_IS_INFINITY(v))

661 » » /* This must return the same hash as an equal int or long. */

706 » » » return v > 0 ? _PyHASH_INF : _PyHASH_NINF;

662 » » if (intpart > LONG_MAX/2 || -intpart > LONG_MAX/2) {

707 » » else

663 » » » /* Convert to long and use its hash. */

708 » » » return _PyHASH_NAN;

664 » » » PyObject *plong;» /* converted to Python long */

665 » » » if (Py_IS_INFINITY(intpart))

666 » » » » /* can't convert to long int -- arbitrary */

667 » » » » v = v < 0 ? -271828.0 : 314159.0;

668 » » » plong = PyLong_FromDouble(v);

669 » » » if (plong == NULL)

670 » » » » return -1;

671 » » » x = PyObject_Hash(plong);

672 » » » Py_DECREF(plong);

673 » » » return x;

674 » » }

675 » » /* Fits in a C long == a Python int, so is its own hash. */

676 » » x = (long)intpart;

677 » » if (x == -1)

678 » » » x = -2;

679 » » return x;

680 }

709 }

681 » /* The fractional part is non-zero, so we don't have to worry about

710

682 » * making this match the hash of some other type.

711 » m = frexp(v, &e);

683 » * Use frexp to get at the bits in the double.

712

684 » * Since the VAX D double format has 56 mantissa bits, which is the

713 » sign = 1;

685 » * most of any double format in use, each of these parts may have as

714 » if (m < 0) {

686 » * many as (but no more than) 56 significant bits.

715 » » sign = -1;

687 » * So, assuming sizeof(long) >= 4, each part can be broken into two

716 » » m = -m;

688 » * longs; frexp and multiplication are used to do that.

717 » }

689 » * Also, since the Cray double format has 15 exponent bits, which is

718

690 » * the most of any double format in use, shifting the exponent field

719 » /* process 28 bits at a time; this should work well both for binary

691 » * left by 15 won't overflow a long (again assuming sizeof(long) >= 4).

720 » and hexadecimal floating point. */

692 » */

721 » x = 0;

693 » v = frexp(v, &expo);

722 » while (m) {

694 » v *= 2147483648.0;» /* 2**31 */

723 » » x = ((x << 28) & _PyHASH_MASK) | x >> (_PyHASH_BITS - 28);

695 » hipart = (long)v;» /* take the top 32 bits */

724 » » m *= 268435456.0; /* 2**28 */

696 » v = (v - (double)hipart) * 2147483648.0; /* get the next 32 bits */

725 » » e -= 28;

697 » x = hipart + (long)v + (expo << 15);

726 » » y = (unsigned long)m; /* pull out integer part */

698 » if (x == -1)

727 » » m -= y;

699 » » x = -2;

728 » » x += y;

700 » return x;

729 » » if (x > _PyHASH_MASK)

730 » » » x -= _PyHASH_MASK;

731 » }

732

733 » /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */

734 » e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);

735 » x = ((x << e) & _PyHASH_MASK) | x >> (_PyHASH_BITS - e);

736

737 » x = x * sign;

738 » if (x == (unsigned long)-1)

739 » » x = (unsigned long)-2;

740 » return (long)x;

701 }

741 }

702

742

703 long

743 long

704 _Py_HashPointer(void *p)

744 _Py_HashPointer(void *p)

705 {

745 {

706 long x;

746 long x;

707 size_t y = (size_t)p;

747 size_t y = (size_t)p;

708 /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid

748 /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid

709 excessive hash collisions for dicts and sets */

749 excessive hash collisions for dicts and sets */

710 y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));

750 y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));

(...skipping 1160 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading...

1871 assert(op->ob_refcnt == 0);

1911 assert(op->ob_refcnt == 0);

1872 ++_PyTrash_delete_nesting;

1912 ++_PyTrash_delete_nesting;

1873 (*dealloc)(op);

1913 (*dealloc)(op);

1874 --_PyTrash_delete_nesting;

1914 --_PyTrash_delete_nesting;

1875 }

1915 }

1876 }

1916 }

1877

1917

1878 #ifdef __cplusplus

1918 #ifdef __cplusplus

1879 }

1919 }

1880 #endif

1920 #endif

OLD

NEW