JasonWang's Blog

常用哈希函数C实现

字数统计: 660阅读时长: 3 min
2017/08/06

哈希函数(hash function)是一个将某个集合内的元素映射到一个大小固定的元素集合的函数,其在数据查找、数据去重等方面有着许多的应用。这里,就来看看常用的一些哈希函数是如何来实现的。

0. RsHash

最简单的一种Hash函数实现方法,来自于Rober Sedgwicks的C语言算法书:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

unsigned int RsHash(const char* str, unsigned int len){
unsigned int b = 378551;
unsigned int a = 63689;
unsigned int hash = 0;

for(int i = 0; i < len; ++i){
hash = hash * a + (*str++);
a *= b;
}

return hash;
}


1. JsHash

由Justin Sobel提出的Hash函数:

1
2
3
4
5
6
7
8
9
10
11

unsigned int JsHash(const char* str, unsigned int len){
unsigned int hash = 1315423911;

for(int i = 0; i < len; ++i){
hash ^= ((hash << 5) + (*str++) + (hash >> 2));
}

return hash;
}

2. PjwHash

PwjHash是由AT&T的Peter J.Wenberger提出的Hash算法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

unsigned int PjwHash(const char* str, unsigned int len){
const unsigned int bitsOfUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
const unsigned int threeQuarters = (unsigned int)((bitsOfUnsignedInt * 3)/4);
const unsigned int halfQuarter = (unsigned int)(bitsOfUnsignedInt/8);
const unsigned int highBits = (unsigned int)(0xFFFFFFFF <<(bitsOfUnsignedInt - halfQuarter));

unsigned int hash = 0;
unsigned int test = 0;

for(int i = 0; i < len; ++i){
hash = (hash << halfQuarter) + (*str++);

if((test = hash & highBits) != 0){
hash = ((hash^(test >> threeQuarters)) & (~highBits));
}
}

return hash;
}

3. ElfHash

该Hash函数在UNIX系统中有着广泛的应用:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

unsigned int ElfHash(const char* str, unsigned int len){
unsigned int hash = 0;
unsigned int x = 0;

for(int i = 0; i < len; ++i){
hash = (hash << 4) + (*str++);

if((x = hash & 0xF0000000L) != 0){
hash ^= (x >> 24);
}

hash &= ~x;
}

return hash;
}

4. BkdrHash

由Brian Kernighan和Dennis Ritchie在《C Programming Language》中提出的Hash算法:

Java中的hashCode()函数通常使用该方法实现

1
2
3
4
5
6
7
8
9
10
11
12

unsigned int BkdrHash(const char* str, unsigned int len){
unsigned int seed = 131; /*31 131 1313 13131 131313 etc*/
unsigned int hash = 0;

for(int i = 0; i < len; ++i){
hash = (hash * seed) + (*str++);
}

return hash;
}

5. SdbmHash

在开源项目SDBM中使用的一个Hash算法:

1
2
3
4
5
6
7
8
9
10
11

unsigned int SdbmHash(const char* str, unsigned int len){
unsigned int hash = 0;

for(int i = 0; i < len; ++i){
hash = (*str++) + (hash << 6) + (hash << 16) - hash;
}

return hash;
}

6. DjbHash

由Daniel J.Bernstein教授提出的一个Hash算法:

1
2
3
4
5
6
7
8
9
10
11

unsigned int DjbHash(const char* str, unsigned int len){
unsigned int hash = 5381;

for(int i = 0; i < len; ++i){
hash = (hash << 5 + hash) + (*str++);
}

return hash;
}

7. DekHash

Donald E.Knuth提出的一个hash函数:

1
2
3
4
5
6
7
8
9
10
11

unsigned int DekHash(const char* str, unsigned int len){
unsigned int hash = len;

for(int i = 0; i < len; ++i){
hash = ((hash << 5)^(hash >> 27))^(*str++);
}

return hash;
}

8. ApHash

由Arash Partow提出的一种Hash算法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14

unsigned int ApHash(const char* str, unsigned int len){
unsigned int hash = 0xAAAAAAAA;

for(unsigned int i = 0; i < len; ++i){
hash ^= ((i & 1 == 0) ? (((hash << 7)^(*str)) * (hash >> 3)) :
(~((hash << 11) + (*str)^(hash >> 5))));
++str;
}

return hash;
}


参考文献

CATALOG
  1. 1. 0. RsHash
  2. 2. 1. JsHash
  3. 3. 2. PjwHash
  4. 4. 3. ElfHash
  5. 5. 4. BkdrHash
  6. 6. 5. SdbmHash
  7. 7. 6. DjbHash
  8. 8. 7. DekHash
  9. 9. 8. ApHash
  10. 10. 参考文献