diff --git a/ewah/ewok.h b/ewah/ewok.h index f6ad190..e54d822 100644 --- a/ewah/ewok.h +++ b/ewah/ewok.h @@ -34,17 +34,14 @@ typedef uint64_t eword_t; #define BITS_IN_WORD (sizeof(eword_t) * 8) /** - * Do not use __builtin_popcountll. The GCC implementation - * is notoriously slow on all platforms. + * Use -march=native and latest GCC to get fast popcount. * * See: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36041 + * https://gcc.gnu.org/viewcvs/gcc/trunk/libgcc/libgcc2.c?r1=200506&r2=200505&pathrev=200506 */ static inline uint32_t ewah_bit_popcount64(uint64_t x) { - x = (x & 0x5555555555555555ULL) + ((x >> 1) & 0x5555555555555555ULL); - x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); - x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >> 4) & 0x0F0F0F0F0F0F0F0FULL); - return (x * 0x0101010101010101ULL) >> 56; + return __builtin_popcountll(x); } #ifdef __GNUC__