bpo-32436: Don't use native popcount() (also fixes bpo-32641) (#5292) · python/cpython@b7a80d5 (original) (raw)

Original file line number Diff line number Diff line change
@@ -4,11 +4,6 @@
4 4 #include "internal/pystate.h"
5 5 #include "internal/hamt.h"
6 6
7 -/* popcnt support in Visual Studio */
8 -#ifdef _MSC_VER
9 -#include <intrin.h>
10 -#endif
11 -
12 7 /*
13 8 This file provides an implemention of an immutable mapping using the
14 9 Hash Array Mapped Trie (or HAMT) datastructure.
@@ -440,18 +435,21 @@ hamt_bitpos(int32_t hash, uint32_t shift)
440 435 static inline uint32_t
441 436 hamt_bitcount(uint32_t i)
442 437 {
443 -#if defined(__GNUC__) && (__GNUC__ > 4)
444 -return (uint32_t)__builtin_popcountl(i);
445 -#elif defined(__clang__) && (__clang_major__ > 3)
446 -return (uint32_t)__builtin_popcountl(i);
447 -#elif defined(_MSC_VER)
448 -return (uint32_t)__popcnt(i);
449 -#else
450 -/* https://graphics.stanford.edu/\~seander/bithacks.html */
438 +/* We could use native popcount instruction but that would
439 + require to either add configure flags to enable SSE4.2
440 + support or to detect it dynamically. Otherwise, we have
441 + a risk of CPython not working properly on older hardware.
442 +
443 + In practice, there's no observable difference in
444 + performance between using a popcount instruction or the
445 + following fallback code.
446 +
447 + The algorithm is copied from:
448 + https://graphics.stanford.edu/\~seander/bithacks.html
449 + */
451 450 i = i - ((i >> 1) & 0x55555555);
452 451 i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
453 452 return ((i + (i >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
454 -#endif
455 453 }
456 454
457 455 static inline uint32_t