#ifndef _BITOPS_H #define _BITOPS_H #include #include "helper.h" #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) /* Technically wrong, but this avoids compilation errors on some gcc * versions. */ #define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) #else #define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) #endif #define ADDR BITOP_ADDR(addr) /* * We do the locked ops that don't return the old value as * a mask operation on a byte. */ #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) #define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) #define CONST_MASK(nr) (1 << ((nr) & 7)) #if defined(__x86_64__) || defined(__i386__) /** * set_bit - Atomically set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * * This function is atomic and may not be reordered. See __set_bit() * if you do not require the atomic guarantees. * * Note: there are no guarantees that this function will not be reordered * on non x86 architectures, so if you are writing portable code, * make sure not to rely on its reordering guarantees. * * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ static __always_inline void set_bit(unsigned int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)CONST_MASK(nr)) : "memory"); } else { asm volatile(LOCK_PREFIX "bts %1,%0" : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); } } /** * __set_bit - Set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * * Unlike set_bit(), this function is non-atomic and may be reordered. * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ static inline void __set_bit(int nr, volatile unsigned long *addr) { asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); } /** * clear_bit - Clears a bit in memory * @nr: Bit to clear * @addr: Address to start counting from * * clear_bit() is atomic and may not be reordered. However, it does * not contain a memory barrier, so if it is used for locking purposes, * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ static __always_inline void clear_bit(int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)~CONST_MASK(nr))); } else { asm volatile(LOCK_PREFIX "btr %1,%0" : BITOP_ADDR(addr) : "Ir" (nr)); } } /* * clear_bit_unlock - Clears a bit in memory * @nr: Bit to clear * @addr: Address to start counting from * * clear_bit() is atomic and implies release semantics before the memory * operation. It can be used for an unlock. */ static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) { barrier(); clear_bit(nr, addr); } static inline void __clear_bit(int nr, volatile unsigned long *addr) { asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); } /* * __clear_bit_unlock - Clears a bit in memory * @nr: Bit to clear * @addr: Address to start counting from * * __clear_bit() is non-atomic and implies release semantics before the memory * operation. It can be used for an unlock if no other CPUs can concurrently * modify other bits in the word. * * No memory barrier is required here, because x86 cannot reorder stores past * older loads. Same principle as spin_unlock. */ static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) { barrier(); __clear_bit(nr, addr); } /** * __change_bit - Toggle a bit in memory * @nr: the bit to change * @addr: the address to start counting from * * Unlike change_bit(), this function is non-atomic and may be reordered. * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ static inline void __change_bit(int nr, volatile unsigned long *addr) { asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); } /** * change_bit - Toggle a bit in memory * @nr: Bit to change * @addr: Address to start counting from * * change_bit() is atomic and may not be reordered. * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ static inline void change_bit(int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "xorb %1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)CONST_MASK(nr))); } else { asm volatile(LOCK_PREFIX "btc %1,%0" : BITOP_ADDR(addr) : "Ir" (nr)); } } /** * test_and_set_bit - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ static inline int test_and_set_bit(int nr, volatile unsigned long *addr) { int oldbit; asm volatile(LOCK_PREFIX "bts %2,%1\n\t" "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); return oldbit; } /** * test_and_set_bit_lock - Set a bit and return its old value for lock * @nr: Bit to set * @addr: Address to count from * * This is the same as test_and_set_bit on x86. */ static __always_inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); } /** * __test_and_set_bit - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * * This operation is non-atomic and can be reordered. * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) { int oldbit; asm("bts %2,%1\n\t" "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr)); return oldbit; } /** * test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to clear * @addr: Address to count from * * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) { int oldbit; asm volatile(LOCK_PREFIX "btr %2,%1\n\t" "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); return oldbit; } /** * __test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to clear * @addr: Address to count from * * This operation is non-atomic and can be reordered. * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) { int oldbit; asm volatile("btr %2,%1\n\t" "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr)); return oldbit; } /* WARNING: non atomic and it can be reordered! */ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) { int oldbit; asm volatile("btc %2,%1\n\t" "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); return oldbit; } /** * test_and_change_bit - Change a bit and return its old value * @nr: Bit to change * @addr: Address to count from * * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) { int oldbit; asm volatile(LOCK_PREFIX "btc %2,%1\n\t" "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); return oldbit; } static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & (addr[nr / BITS_PER_LONG])) != 0; } static inline int variable_test_bit(int nr, volatile const unsigned long *addr) { int oldbit; asm volatile("bt %2,%1\n\t" "sbb %0,%0" : "=r" (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); return oldbit; } #define test_bit(nr,addr) \ (__builtin_constant_p(nr) ? \ constant_test_bit((nr),(addr)) : \ variable_test_bit((nr),(addr))) #define for_each_set_bit(bit, addr, size) \ for ((bit) = find_first_bit((addr), (size)); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) #define BIT(nr) (1UL << (nr)) #define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #define BITS_PER_BYTE 8 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) static inline unsigned long __ffz(unsigned long word) { asm volatile("bsf %1,%0" :"=r" (word) :"r" (~word)); return word; } static inline unsigned long __ffs(unsigned long word) { asm volatile("bsf %1,%0" :"=r" (word) :"rm" (word)); return word; } static inline unsigned long __fls(unsigned long word) { asm volatile("bsr %1,%0" :"=r" (word) :"rm" (word)); return word; } static __inline__ int get_bitmask_order(unsigned int count) { int order; order = __fls(count); return order; /* We could be slightly more clever with -1 here... */ } static __inline__ int get_count_order(unsigned int count) { int order; order = __fls(count) - 1; if (count & (count - 1)) order++; return order; } /** * ffs - find first set bit in word * @x: the word to search * * This is defined the same way as the libc and compiler builtin ffs * routines, therefore differs in spirit from the other bitops. * * ffs(value) returns 0 if value is 0 or the position of the first * set bit if value is nonzero. The first (least significant) bit * is at position 1. */ static inline int safari_ffs(int x) { int r; /* * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before, except that the * top 32 bits will be cleared. * * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ asm("bsfl %1,%0" : "=r" (r) : "rm" (x), "0" (-1)); return r + 1; } /** * fls - find last set bit in word * @x: the word to search * * This is defined in a similar way as the libc and compiler builtin * ffs, but returns the position of the most significant set bit. * * fls(value) returns 0 if value is 0 or the position of the last * set bit if value is nonzero. The last (most significant) bit is * at position 32. */ static inline int fls(int x) { int r; /* * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before, except that the * top 32 bits will be cleared. * * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ asm("bsrl %1,%0" : "=r" (r) : "rm" (x), "0" (-1)); return r + 1; } /** * fls64 - find last set bit in a 64-bit word * @x: the word to search * * This is defined in a similar way as the libc and compiler builtin * ffsll, but returns the position of the most significant set bit. * * fls64(value) returns 0 if value is 0 or the position of the last * set bit if value is nonzero. The last (most significant) bit is * at position 64. */ #if BITS_PER_LONG == 32 static __always_inline int fls64(uint64_t x) { uint32_t h = x >> 32; if (h) return fls(h) + 32; return fls(x); } #elif BITS_PER_LONG == 64 static __always_inline int fls64(uint64_t x) { long bitpos = -1; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before. */ asm("bsrq %1,%0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; } #else #error BITS_PER_LONG not 32 or 64 #endif static inline unsigned fls_long(unsigned long l) { if (sizeof(l) == 4) return fls(l); return fls64(l); } /** * __ffs64 - find first set bit in a 64 bit word * @word: The 64 bit word * * On 64 bit arches this is a synomyn for __ffs * The result is not defined if no bits are set, so check that @word * is non-zero before calling this. */ static inline unsigned long __ffs64(uint64_t word) { #if BITS_PER_LONG == 32 if (((u32)word) == 0UL) return __ffs((u32)(word >> 32)) + 32; #elif BITS_PER_LONG != 64 #error BITS_PER_LONG not 32 or 64 #endif return __ffs((unsigned long)word); } /* * Compile time versions of __arch_hweightN() */ #define __const_hweight8(w) \ ( (!!((w) & (1ULL << 0))) + \ (!!((w) & (1ULL << 1))) + \ (!!((w) & (1ULL << 2))) + \ (!!((w) & (1ULL << 3))) + \ (!!((w) & (1ULL << 4))) + \ (!!((w) & (1ULL << 5))) + \ (!!((w) & (1ULL << 6))) + \ (!!((w) & (1ULL << 7))) ) #define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8 )) #define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16)) #define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) /* * Generic interface. */ #define hweight8(w) (__builtin_constant_p(w) ? __const_hweight8(w) : __arch_hweight8(w)) #define hweight16(w) (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w)) #define hweight32(w) (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w)) #define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w)) /* * Interface for known constant arguments */ #define HWEIGHT8(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight8(w)) #define HWEIGHT16(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight16(w)) #define HWEIGHT32(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight32(w)) #define HWEIGHT64(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight64(w)) /* * Type invariant interface to the compile time constant hweight functions. */ #define HWEIGHT(w) HWEIGHT64((u64)w) static inline unsigned int __arch_hweight8(uint8_t w) { return __builtin_popcount(w); } static inline unsigned int __arch_hweight16(uint16_t w) { return __builtin_popcount(w); } static inline unsigned int __arch_hweight32(uint32_t w) { return __builtin_popcount(w); } static inline unsigned int __arch_hweight64(uint64_t w) { if (sizeof(long) == 4) return __builtin_popcountll(w); else return __builtin_popcountl(w); } /** * find_last_bit - find the last set bit in a memory region * @addr: The address to start the search at * @size: The maximum size to search * * Returns the bit number of the first set bit, or size. */ extern unsigned long find_last_bit(const unsigned long *addr, unsigned long size); /** * find_next_bit - find the next set bit in a memory region * @addr: The address to base the search on * @offset: The bitnumber to start searching at * @size: The bitmap size in bits */ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); /** * find_next_zero_bit - find the next cleared bit in a memory region * @addr: The address to base the search on * @offset: The bitnumber to start searching at * @size: The bitmap size in bits */ extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset); /** * find_first_bit - find the first set bit in a memory region * @addr: The address to start the search at * @size: The maximum size to search * * Returns the bit number of the first set bit. */ extern unsigned long find_first_bit(const unsigned long *addr, unsigned long size); /** * find_first_zero_bit - find the first cleared bit in a memory region * @addr: The address to start the search at * @size: The maximum size to search * * Returns the bit number of the first cleared bit. */ extern unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); /* ************************** */ #define ___constant_swab16(x) \ ((uint16_t)( \ (((uint16_t)(x) & (uint16_t)0x00ffU) << 8) | \ (((uint16_t)(x) & (uint16_t)0xff00U) >> 8) )) #define ___constant_swab32(x) \ ((uint32_t)( \ (((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) | \ (((uint32_t)(x) & (uint32_t)0x0000ff00UL) << 8) | \ (((uint32_t)(x) & (uint32_t)0x00ff0000UL) >> 8) | \ (((uint32_t)(x) & (uint32_t)0xff000000UL) >> 24) )) #define ___constant_swab64(x) \ ((uint64_t)( \ (uint64_t)(((uint64_t)(x) & (uint64_t)0x00000000000000ffULL) << 56) | \ (uint64_t)(((uint64_t)(x) & (uint64_t)0x000000000000ff00ULL) << 40) | \ (uint64_t)(((uint64_t)(x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \ (uint64_t)(((uint64_t)(x) & (uint64_t)0x00000000ff000000ULL) << 8) | \ (uint64_t)(((uint64_t)(x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \ (uint64_t)(((uint64_t)(x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \ (uint64_t)(((uint64_t)(x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \ (uint64_t)(((uint64_t)(x) & (uint64_t)0xff00000000000000ULL) >> 56) )) #define swab16(x) \ (__builtin_constant_p((uint16_t)(x)) ? \ ___constant_swab16((x)) : \ __fswab16((x))) #define swab32(x) \ (__builtin_constant_p((uint32_t)(x)) ? \ ___constant_swab32((x)) : \ __builtin_bswap32((x))) #define swab64(x) \ (__builtin_constant_p((uint64_t)(x)) ? \ ___constant_swab64((x)) : \ __builtin_bswap64((x))) #define swab64p(x) swab64(*(x)) #define swab32p(x) swab32(*(x)) #define swab16p(x) swab16(*(x)) static inline __attribute_const__ uint16_t __fswab16(uint16_t x) { return x<<8 | x>>8; } #else # error only i386 and x86_64 supported #endif #endif