常用哈希函数C实现

哈希函数(hash function)是一个将某个集合内的元素映射到一个大小固定的元素集合的函数,其在数据查找、数据去重等方面有着许多的应用。这里,就来看看常用的一些哈希函数是如何来实现的。

0. RsHash

最简单的一种Hash函数实现方法,来自于Rober Sedgwicks的C语言算法书:

1
2
3
4
5
6
7
8
9
10
11
12
13

unsigned int RsHash(const char* str, unsigned int len){
unsigned int b = 378551;
unsigned int a = 63689;
unsigned int hash = 0;

for(int i = 0; i < len; ++i){
hash = hash * a + (*str++);
a *= b;
}

return hash;
}

1. JsHash

由Justin Sobel提出的Hash函数:

1
2
3
4
5
6
7
8
9
10

unsigned int JsHash(const char* str, unsigned int len){
unsigned int hash = 1315423911;

for(int i = 0; i < len; ++i){
hash ^= ((hash << 5) + (*str++) + (hash >> 2));
}

return hash;
}

2. PjwHash

PwjHash是由AT&T的Peter J.Wenberger提出的Hash算法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

unsigned int PjwHash(const char* str, unsigned int len){
const unsigned int bitsOfUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
const unsigned int threeQuarters = (unsigned int)((bitsOfUnsignedInt * 3)/4);
const unsigned int halfQuarter = (unsigned int)(bitsOfUnsignedInt/8);
const unsigned int highBits = (unsigned int)(0xFFFFFFFF <<(bitsOfUnsignedInt - halfQuarter));

unsigned int hash = 0;
unsigned int test = 0;

for(int i = 0; i < len; ++i){
hash = (hash << halfQuarter) + (*str++);

if((test = hash & highBits) != 0){
hash = ((hash^(test >> threeQuarters)) & (~highBits));
}
}

return hash;
}

3. ElfHash

该Hash函数在UNIX系统中有着广泛的应用:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

unsigned int ElfHash(const char* str, unsigned int len){
unsigned int hash = 0;
unsigned int x = 0;

for(int i = 0; i < len; ++i){
hash = (hash << 4) + (*str++);

if((x = hash & 0xF0000000L) != 0){
hash ^= (x >> 24);
}

hash &= ~x;
}

return hash;
}

4. BkdrHash

由Brian Kernighan和Dennis Ritchie在《C Programming Language》中提出的Hash算法:

Java中的hashCode()函数通常使用该方法实现

1
2
3
4
5
6
7
8
9
10
11

unsigned int BkdrHash(const char* str, unsigned int len){
unsigned int seed = 131; /*31 131 1313 13131 131313 etc*/
unsigned int hash = 0;

for(int i = 0; i < len; ++i){
hash = (hash * seed) + (*str++);
}

return hash;
}

5. SdbmHash

在开源项目SDBM中使用的一个Hash算法:

1
2
3
4
5
6
7
8
9
10

unsigned int SdbmHash(const char* str, unsigned int len){
unsigned int hash = 0;

for(int i = 0; i < len; ++i){
hash = (*str++) + (hash << 6) + (hash << 16) - hash;
}

return hash;
}

6. DjbHash

由Daniel J.Bernstein教授提出的一个Hash算法:

1
2
3
4
5
6
7
8
9
10

unsigned int DjbHash(const char* str, unsigned int len){
unsigned int hash = 5381;

for(int i = 0; i < len; ++i){
hash = (hash << 5 + hash) + (*str++);
}

return hash;
}

7. DekHash

Donald E.Knuth提出的一个hash函数:

1
2
3
4
5
6
7
8
9
10

unsigned int DekHash(const char* str, unsigned int len){
unsigned int hash = len;

for(int i = 0; i < len; ++i){
hash = ((hash << 5)^(hash >> 27))^(*str++);
}

return hash;
}

8. ApHash

由Arash Partow提出的一种Hash算法:

1
2
3
4
5
6
7
8
9
10
11
12

unsigned int ApHash(const char* str, unsigned int len){
unsigned int hash = 0xAAAAAAAA;

for(unsigned int i = 0; i < len; ++i){
hash ^= ((i & 1 == 0) ? (((hash << 7)^(*str)) * (hash >> 3)) :
(~((hash << 11) + (*str)^(hash >> 5))));
++str;
}

return hash;
}

参考文献