哈希函数(hash function)是一个将某个集合内的元素映射到一个大小固定的元素集合的函数,其在数据查找、数据去重等方面有着许多的应用。这里,就来看看常用的一些哈希函数是如何来实现的。
0. RsHash
最简单的一种Hash函数实现方法,来自于Rober Sedgwicks的C语言算法书:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| unsigned int RsHash(const char* str, unsigned int len){ unsigned int b = 378551; unsigned int a = 63689; unsigned int hash = 0;
for(int i = 0; i < len; ++i){ hash = hash * a + (*str++); a *= b; }
return hash; }
|
1. JsHash
由Justin Sobel提出的Hash函数:
1 2 3 4 5 6 7 8 9 10 11
| unsigned int JsHash(const char* str, unsigned int len){ unsigned int hash = 1315423911;
for(int i = 0; i < len; ++i){ hash ^= ((hash << 5) + (*str++) + (hash >> 2)); }
return hash; }
|
2. PjwHash
PwjHash是由AT&T的Peter J.Wenberger提出的Hash算法:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| unsigned int PjwHash(const char* str, unsigned int len){ const unsigned int bitsOfUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8); const unsigned int threeQuarters = (unsigned int)((bitsOfUnsignedInt * 3)/4); const unsigned int halfQuarter = (unsigned int)(bitsOfUnsignedInt/8); const unsigned int highBits = (unsigned int)(0xFFFFFFFF <<(bitsOfUnsignedInt - halfQuarter));
unsigned int hash = 0; unsigned int test = 0;
for(int i = 0; i < len; ++i){ hash = (hash << halfQuarter) + (*str++);
if((test = hash & highBits) != 0){ hash = ((hash^(test >> threeQuarters)) & (~highBits)); } }
return hash; }
|
3. ElfHash
该Hash函数在UNIX系统中有着广泛的应用:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| unsigned int ElfHash(const char* str, unsigned int len){ unsigned int hash = 0; unsigned int x = 0;
for(int i = 0; i < len; ++i){ hash = (hash << 4) + (*str++);
if((x = hash & 0xF0000000L) != 0){ hash ^= (x >> 24); }
hash &= ~x; }
return hash; }
|
4. BkdrHash
由Brian Kernighan和Dennis Ritchie在《C Programming Language》中提出的Hash算法:
Java中的hashCode()
函数通常使用该方法实现
1 2 3 4 5 6 7 8 9 10 11 12
| unsigned int BkdrHash(const char* str, unsigned int len){ unsigned int seed = 131; unsigned int hash = 0;
for(int i = 0; i < len; ++i){ hash = (hash * seed) + (*str++); }
return hash; }
|
5. SdbmHash
在开源项目SDBM中使用的一个Hash算法:
1 2 3 4 5 6 7 8 9 10 11
| unsigned int SdbmHash(const char* str, unsigned int len){ unsigned int hash = 0;
for(int i = 0; i < len; ++i){ hash = (*str++) + (hash << 6) + (hash << 16) - hash; }
return hash; }
|
6. DjbHash
由Daniel J.Bernstein教授提出的一个Hash算法:
1 2 3 4 5 6 7 8 9 10 11
| unsigned int DjbHash(const char* str, unsigned int len){ unsigned int hash = 5381;
for(int i = 0; i < len; ++i){ hash = (hash << 5 + hash) + (*str++); }
return hash; }
|
7. DekHash
Donald E.Knuth提出的一个hash函数:
1 2 3 4 5 6 7 8 9 10 11
| unsigned int DekHash(const char* str, unsigned int len){ unsigned int hash = len;
for(int i = 0; i < len; ++i){ hash = ((hash << 5)^(hash >> 27))^(*str++); }
return hash; }
|
8. ApHash
由Arash Partow提出的一种Hash算法:
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| unsigned int ApHash(const char* str, unsigned int len){ unsigned int hash = 0xAAAAAAAA;
for(unsigned int i = 0; i < len; ++i){ hash ^= ((i & 1 == 0) ? (((hash << 7)^(*str)) * (hash >> 3)) : (~((hash << 11) + (*str)^(hash >> 5)))); ++str; }
return hash; }
|
参考文献