#ifndef RECIPROCAL_DIV64_H
#define RECIPROCAL_DIV64_H
#include
#include "helper.h"
/*
* This algorithm is based on the paper "Division by Invariant
* Integers Using Multiplication" by Torbjörn Granlund and Peter
* L. Montgomery.
*
* The assembler implementation from Agner Fog, which this code is
* based on, can be found here:
* http://www.agner.org/optimize/asmlib.zip
*
* This optimization for A/B is helpful if the divisor B is mostly
* runtime invariant. The reciprocal of B is calculated in the
* slow-path with reciprocal_value(). The fast-path can then just use
* a much faster multiplication operation with a variable dividend A
* to calculate the division A/B.
*/
struct reciprocal_value64 {
uint64_t m;
uint8_t sh1, sh2;
};
static inline struct reciprocal_value64 reciprocal_value64(uint64_t d)
{
struct reciprocal_value64 R;
unsigned __int128 m;
int l;
if (unlikely(d == 1)) {
l = 0;
} else {
l = ((sizeof(unsigned long long)*8)) - __builtin_clzll(d - 1);
}
m = (((unsigned __int128)1 << 64) * ((1ULL << l) - d));
m /= d;
++m;
R.m = (uint64_t)m;
R.sh1 = min(l, 1);
R.sh2 = max(l - 1, 0);
return R;
}
static inline uint64_t reciprocal_divide64(uint64_t a, struct reciprocal_value64 R)
{
uint64_t t = (uint64_t)(((unsigned __int128)a * R.m) >> 64);
return (t + ((a - t) >> R.sh1)) >> R.sh2;
}
static __always_inline uint64_t reciprocal_remainder64(uint64_t A, uint64_t B, struct reciprocal_value64 R)
{
uint64_t div, mod;
div = reciprocal_divide64(A, R);
mod = A - (uint64_t) (div * B);
if (mod >= B) mod -= B;
return mod;
}
#endif /* RECIPROCAL_DIV64_H */