diff --git a/libidu/idu-hash.h b/libidu/idu-hash.h index 529439c..8fedb9b 100644 --- a/libidu/idu-hash.h +++ b/libidu/idu-hash.h @@ -20,6 +20,8 @@ #define _hash_h_ #include +#include +#include typedef unsigned long (*hash_func_t) (void const *key); typedef int (*hash_cmp_func_t) (void const *x, void const *y); @@ -68,38 +70,122 @@ extern void *hash_deleted_item; /* hash and comparison macros for string keys. */ -#if LONG_MAX == 9223372036854775807L -#define __LONG_ROT 64 -#else -#define __LONG_ROT 32 -#endif -#define __RES_ROT 2 - -#define STRING_HASH_1(_key_, _result_) { \ - unsigned char const *kk = (unsigned char const *) (_key_) - 1; \ - unsigned char __rot = 42; \ - while (*++kk) { \ - __rot ^= kk[1]; \ - (_result_) += (unsigned long)*kk << (__rot & 7); \ - (_result_) = ((_result_) << __RES_ROT) | ((_result_) >> (__LONG_ROT - __RES_ROT)); \ - } \ -} while (0) -#define return_STRING_HASH_1(_key_) do { \ - unsigned long result = 0; \ - STRING_HASH_1 ((_key_), result); \ - return result; \ -} while (0) +struct __una_u16 { uint16_t x __attribute__((packed)); }; +struct __una_u32 { uint32_t x __attribute__((packed)); }; +struct __una_u64 { uint64_t x __attribute__((packed)); }; -#define STRING_HASH_2(_key_, _result_) do { \ - unsigned char const *kk = (unsigned char const *) (_key_) - 1; \ - while (*++kk) \ - (_result_) += (*kk << (kk[1] & 0x7)); \ -} while (0) -#define return_STRING_HASH_2(_key_) do { \ - unsigned long result = 0; \ - STRING_HASH_2 ((_key_), result); \ - return result; \ -} while (0) +static inline uint16_t __get_unaligned_cpu16(const void *p) +{ + const struct __una_u16 *ptr = (const struct __una_u16 *)p; + return ptr->x; +} + +static inline uint32_t __get_unaligned_cpu32(const void *p) +{ + const struct __una_u32 *ptr = (const struct __una_u32 *)p; + return ptr->x; +} + +static inline uint64_t __get_unaligned_cpu64(const void *p) +{ + const struct __una_u64 *ptr = (const struct __una_u64 *)p; + return ptr->x; +} + +static inline void __put_unaligned_cpu16(uint16_t val, void *p) +{ + struct __una_u16 *ptr = (struct __una_u16 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu32(uint32_t val, void *p) +{ + struct __una_u32 *ptr = (struct __una_u32 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu64(uint64_t val, void *p) +{ + struct __una_u64 *ptr = (struct __una_u64 *)p; + ptr->x = val; +} + +/* jhash by Bob Jenkins, 2006 */ +static inline uint32_t rol32(uint32_t word, unsigned int shift) +{ + return (word << shift) | (word >> (32 - shift)); +} + +/* __jhash_mix - mix 3 32-bit values reversibly. */ +#define __jhash_mix(a,b,c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c,16); c += b; \ + b -= a; b ^= rol32(a,19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ +#define __jhash_final(a,b,c) \ +{ \ + c ^= b; c -= rol32(b,14); \ + a ^= c; a -= rol32(c,11); \ + b ^= a; b -= rol32(a,25); \ + c ^= b; c -= rol32(b,16); \ + a ^= c; a -= rol32(c,4); \ + b ^= a; b -= rol32(a,14); \ + c ^= b; c -= rol32(b,24); \ +} + +/* An arbitrary initial parameter */ +#define JHASH_INIT_PARAM 0xDEADBEEF + +/* The most generic version, hashes an arbitrary sequence + * of bytes. No alignment or length assumptions are made about + * the input key. The result depends on endianness. + */ +static inline uint32_t jhash(const void *key, uint32_t length, uint32_t initval) +{ + uint32_t a, b, c; + const uint8_t *k = key; + + /* Set up the internal state */ + a = b = c = JHASH_INIT_PARAM + length + initval; + + /* all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) { + a += __get_unaligned_cpu32(k); + b += __get_unaligned_cpu32(k + 4); + c += __get_unaligned_cpu32(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + + /* last block: affect all 32 bits of (c) */ + /* all the case statements fall through */ + switch (length) { + case 12: c += (uint32_t)k[11]<<24; + case 11: c += (uint32_t)k[10]<<16; + case 10: c += (uint32_t)k[9]<<8; + case 9 : c += k[8]; + case 8 : b += (uint32_t)k[7]<<24; + case 7 : b += (uint32_t)k[6]<<16; + case 6 : b += (uint32_t)k[5]<<8; + case 5 : b += k[4]; + case 4 : a += (uint32_t)k[3]<<24; + case 3 : a += (uint32_t)k[2]<<16; + case 2 : a += (uint32_t)k[1]<<8; + case 1 : a += k[0]; + __jhash_final(a, b, c); + case 0 : + break; + } + + return c; +} #define STRING_COMPARE(_x_, _y_, _result_) do { \ unsigned char const *xx = (unsigned char const *) (_x_) - 1; \ @@ -120,23 +206,48 @@ extern void *hash_deleted_item; /* hash and comparison macros for integer keys. */ -#define INTEGER_HASH_1(_key_, _result_) do { \ - (_result_) += ((unsigned long)(_key_)); \ -} while (0) -#define return_INTEGER_HASH_1(_key_) do { \ - unsigned long result = 0; \ - INTEGER_HASH_1 ((_key_), result); \ - return result; \ -} while (0) +#if LONG_MAX == 9223372036854775807L +#define BITS_PER_LONG 64 +#elif LONG_MAX == 2147483647L +#define BITS_PER_LONG 32 +#else +#error invalid LONG_MAX +#endif -#define INTEGER_HASH_2(_key_, _result_) do { \ - (_result_) += ~((unsigned long)(_key_)); \ -} while (0) -#define return_INTEGER_HASH_2(_key_) do { \ - unsigned long result = 0; \ - INTEGER_HASH_2 ((_key_), result); \ - return result; \ -} while (0) +/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ +#define GOLDEN_RATIO_PRIME_32 UINT32_C(0x9e370001) +/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ +#define GOLDEN_RATIO_PRIME_64 UINT64_C(0x9e37fffffffc0001) + +#if BITS_PER_LONG == 32 +#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32 +#define hash_long(val, bits) hash_32(val, bits) +#elif BITS_PER_LONG == 64 +#define hash_long(val, bits) hash_64(val, bits) +#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64 +#endif + +static inline uint64_t hash_64(uint64_t val, unsigned int bits) +{ + uint64_t hash = val * GOLDEN_RATIO_PRIME_64; + + /* High bits are more random, so use them. */ + return hash >> (64 - bits); +} + +static inline uint32_t hash_32(uint32_t val, unsigned int bits) +{ + /* On some cpus multiply is faster, on others gcc will do shifts */ + uint32_t hash = val * GOLDEN_RATIO_PRIME_32; + + /* High bits are more random, so use them. */ + return hash >> (32 - bits); +} + +static inline unsigned long hash_ptr(void *ptr, unsigned int bits) +{ + return hash_long((unsigned long)ptr, bits); +} #define INTEGER_COMPARE(_x_, _y_, _result_) do { \ (_result_) = _x_ - _y_; \ @@ -149,11 +260,7 @@ extern void *hash_deleted_item; /* hash and comparison macros for address keys. */ -#define ADDRESS_HASH_1(_key_, _result_) INTEGER_HASH_1 (((unsigned long)(_key_)) >> 3, (_result_)) -#define ADDRESS_HASH_2(_key_, _result_) INTEGER_HASH_2 (((unsigned long)(_key_)) >> 3, (_result_)) #define ADDRESS_COMPARE(_x_, _y_, _result_) INTEGER_COMPARE ((_x_), (_y_), (_result_)) -#define return_ADDRESS_HASH_1(_key_) return_INTEGER_HASH_1 (((unsigned long)(_key_)) >> 3) -#define return_ADDRESS_HASH_2(_key_) return_INTEGER_HASH_2 (((unsigned long)(_key_)) >> 3) #define return_ADDRESS_COMPARE(_x_, _y_) return_INTEGER_COMPARE ((_x_), (_y_)) #endif /* not _hash_h_ */ diff --git a/libidu/walker.c b/libidu/walker.c index 41785f4..0cf2aa3 100644 --- a/libidu/walker.c +++ b/libidu/walker.c @@ -1002,15 +1002,20 @@ absolute_file_name_1 (char *buf, struct file_link const *flink) static unsigned long member_file_hash_1 (void const *key) { - return_ADDRESS_HASH_1 (((struct member_file const *) key)->mf_link); + unsigned long ptr = (unsigned long)(((struct member_file const *) key)->mf_link); + ptr ^= 0xd0ec6b1d; + return hash_ptr((void*)ptr, BITS_PER_LONG); } static unsigned long member_file_hash_2 (void const *key) { - return_ADDRESS_HASH_2 (((struct member_file const *) key)->mf_link); + unsigned long ptr = (unsigned long)(((struct member_file const *) key)->mf_link); + ptr ^= 0x48371132; + return hash_ptr((void*)ptr, BITS_PER_LONG); } + static int member_file_hash_compare (void const *x, void const *y) { @@ -1054,25 +1059,30 @@ member_file_qsort_compare (void const *x, void const *y) /* Hash stuff for `struct file_link'. */ static unsigned long -file_link_hash_1 (void const *key) +file_link_hash (void const *key, unsigned int tweak) { unsigned long result = 0; - struct file_link const *parent = (IS_ROOT_FILE_LINK (((struct file_link const *) key)) + const char *k; + unsigned long hash_parent; + + struct file_link *parent = (IS_ROOT_FILE_LINK (((struct file_link const *) key)) ? 0 : ((struct file_link const *) key)->fl_parent); - STRING_HASH_1 (((struct file_link const *) key)->fl_name, result); - ADDRESS_HASH_1 (parent, result); + k = ((struct file_link const *) key)->fl_name; + hash_parent = hash_ptr(parent, 32); + result = jhash(k, strlen(k), hash_parent ^ tweak); return result; } static unsigned long +file_link_hash_1 (void const *key) +{ + return file_link_hash(key, 0x9e1e0d73); +} + +static unsigned long file_link_hash_2 (void const *key) { - unsigned long result = 0; - struct file_link const *parent = (IS_ROOT_FILE_LINK (((struct file_link const *) key)) - ? 0 : ((struct file_link const *) key)->fl_parent); - STRING_HASH_2 (((struct file_link const *) key)->fl_name, result); - ADDRESS_HASH_2 (parent, result); - return result; + return file_link_hash(key, 0x6c552261); } static int @@ -1111,21 +1121,25 @@ links_depth (struct file_link const *flink) /* Hash stuff for `struct dev_ino'. */ static unsigned long +dev_ino_hash (void const *key, uint64_t tweak) +{ + uint64_t res_dev, res_ino; + + res_dev = (((struct dev_ino const *) key)->di_dev); + res_ino = (((struct dev_ino const *) key)->di_ino); + return hash_64(res_dev, 64) ^ hash_64(res_ino, 64) ^ tweak; +} + +static unsigned long dev_ino_hash_1 (void const *key) { - unsigned long result = 0; - INTEGER_HASH_1 (((struct dev_ino const *) key)->di_dev, result); - INTEGER_HASH_1 (((struct dev_ino const *) key)->di_ino, result); - return result; + return dev_ino_hash(key, UINT64_C(0xbc42e6f576c740e9)); } static unsigned long dev_ino_hash_2 (void const *key) { - unsigned long result = 0; - INTEGER_HASH_2 (((struct dev_ino const *) key)->di_dev, result); - INTEGER_HASH_2 (((struct dev_ino const *) key)->di_ino, result); - return result; + return dev_ino_hash(key, UINT64_C(0xe2b6735e7a7b438b)); } static int diff --git a/src/mkid.c b/src/mkid.c index b1a0fa9..50830b7 100644 --- a/src/mkid.c +++ b/src/mkid.c @@ -786,13 +786,15 @@ write_id_file (struct idhead *idhp) static unsigned long token_hash_1 (void const *key) { - return_STRING_HASH_1 (TOKEN_NAME ((struct token const *) key)); + char *k = TOKEN_NAME ((struct token const *) key); + return jhash(k, strlen(k), 0xBABE); } static unsigned long token_hash_2 (void const *key) { - return_STRING_HASH_2 (TOKEN_NAME ((struct token const *) key)); + char *k = TOKEN_NAME ((struct token const *) key); + return jhash(k, strlen(k), 0xBEEFF); } static int