#ifndef RECIPROCAL_DIV64_H #define RECIPROCAL_DIV64_H #include #include "helper.h" /* * This algorithm is based on the paper "Division by Invariant * Integers Using Multiplication" by Torbjörn Granlund and Peter * L. Montgomery. * * The assembler implementation from Agner Fog, which this code is * based on, can be found here: * http://www.agner.org/optimize/asmlib.zip * * This optimization for A/B is helpful if the divisor B is mostly * runtime invariant. The reciprocal of B is calculated in the * slow-path with reciprocal_value(). The fast-path can then just use * a much faster multiplication operation with a variable dividend A * to calculate the division A/B. */ struct reciprocal_value64 { uint64_t m; uint8_t sh1, sh2; }; static inline struct reciprocal_value64 reciprocal_value64(uint64_t d) { struct reciprocal_value64 R; unsigned __int128 m; int l; if (unlikely(d == 1)) { l = 0; } else { l = ((sizeof(unsigned long long)*8)) - __builtin_clzll(d - 1); } m = (((unsigned __int128)1 << 64) * ((1ULL << l) - d)); m /= d; ++m; R.m = (uint64_t)m; R.sh1 = min(l, 1); R.sh2 = max(l - 1, 0); return R; } static inline uint64_t reciprocal_divide64(uint64_t a, struct reciprocal_value64 R) { uint64_t t = (uint64_t)(((unsigned __int128)a * R.m) >> 64); return (t + ((a - t) >> R.sh1)) >> R.sh2; } static __always_inline uint64_t reciprocal_remainder64(uint64_t A, uint64_t B, struct reciprocal_value64 R) { uint64_t div, mod; div = reciprocal_divide64(A, R); mod = A - (uint64_t) (div * B); if (mod >= B) mod -= B; return mod; } #endif /* RECIPROCAL_DIV64_H */