字符串Hash函數

Hash函數:Hash,一般翻譯做”散列“,也有直接音譯爲”哈希“的,就是把任意長度的輸入(又叫做預映射, pre-image),通過散列算法,變換成固定長度的輸出,該輸出就是散列值。這種轉換是一種壓縮映射,也就是,散列值的空間通常遠小於輸入的空間,不同的輸入可能會散列成相同的輸出,而不可能從散列值來唯一的確定輸入值。簡單的說就是一種將任意長度的消息壓縮到某一固定長度的消息摘要的函數。完美的hash函數,滿足key1!=key2,則hash(key1)!=hash(key2)。

字符串Hash函數:在處理大規模字符串數據時常常需要把每個字符串映射出一個ID值,以下列出一些常用經典的字符串Hash函數。

#include <iostream>

using namespace std;

//Brian Kernighan與Dennis Ritchie的《The C Program Language》中提出,也是Java中採用的Hash算法
template<class T>
unsigned int BKDRHash(const T* str)
{
	register unsigned int hash = 0;
	register unsigned int seed = 131;//31,131,1313,13131,131313...
	while(*str)
	{
		hash = hash*seed + (*str++);
		//hash = (hash << 7) + (hash << 1) + hash + (*str++);
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//在開源項目SDBM(數據庫引擎)中應用,和BKDRHash方法基本相同只是seed不同
template<class T>
unsigned int SDBMHash(const T* str)
{
	register unsigned int hash = 0;
	register unsigned int seed = 65599;
	while(*str)
	{
		hash = hash*seed + (*str++);
		//hash = (hash << 6) + (hash << 16) - hash + (*str++);
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//在Robert Sedgwicks的《Algorithms in C》中提出
template<class T>
unsigned int RSHash(const T* str)
{
	register unsigned int hash = 0;
	register unsigned int magic = 63689;
	register unsigned int factor = 378551;
	while(*str)
	{
		hash = hash*magic + (*str++);
		magic *= factor;
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Arash Partow提出
template<class T>
unsigned int APHash(const T* str)
{
	register unsigned int hash = 0;
	unsigned int ch;
	for(long i=0;ch = (unsigned int)(*str++); ++i)
	{
		if(0==(i&1))
		{
			hash ^= ((hash << 7)^ch^(hash >> 3));
		}
		else
		{
			hash ^= (~((hash << 11)^ch^(hash >> 5)));
		}
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Justin Sobel 提出
template<class T>
unsigned int JSHash(const T* str)
{
	register unsigned int hash = 1315423911;
	while(*str)
	{
		hash ^= ((hash << 5) + (hash >> 2) + (*str++));
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Donald E. Knuth在《Art Of Computer Programming Volume 3》中提出
template<class T>
unsigned int DEKHash(const T* str)
{
	register unsigned int hash = 1315423911;
	while(*str)
	{
		hash ^= ((hash << 5)^(hash >> 2)^(*str++));
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Unix System中使用的一種Hash算法,後在Microsoft中hash_map中實現
template<class T>
unsigned int FNVHash(const T* str)
{
	register unsigned int hash = 2166136261;
	while(*str)
	{
		hash *= 16777619;
		hash ^= (*str++);
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Daniel J. Bernstein教授發明的一種Hash算法
template<class T>
unsigned int DJBHash(const T* str)
{
	register unsigned int hash = 5381;
	while(*str)
	{
		hash += ((hash << 5) + *str++);
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Daniel J. Bernstein教授發明的又一種Hash算法
template<class T>
unsigned int DJB2Hash(const T* str)
{
	register unsigned int hash = 5381;
	while(*str)
	{
		hash = (hash*33)^(*str++);
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}


//基於AT&T貝爾實驗室Peter J. Weinberger的論文而發明的一種Hash算法
template<class T>
unsigned int PJWHash(const T* str)
{
	static const unsigned int TotalBits             = sizeof(unsigned int)*8;
	static const unsigned int ThreeQuarters             = (TotalBits*3)/4;
	static const unsigned int OneEighth             = TotalBits/8;
	static const unsigned int HighBits             = ((unsigned int)-1) << (TotalBits-OneEighth);

	register unsigned int hash = 0;
	unsigned int magic = 0;
	while(*str)
	{
		hash = (hash<<OneEighth)+(*str++);
		if((magic=(hash&HighBits))!=0)
		{
			hash = ((hash^(magic>>ThreeQuarters))&(~HighBits));
		}
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//由Unix的Extended Library Function附帶的一種Hash算法,其實是PJW Hash的變形
template<class T>
unsigned int ELFHash(const T* str)
{
	static const unsigned int TotalBits             = sizeof(unsigned int)*8;
	static const unsigned int ThreeQuarters         = (TotalBits*3)/4;
	static const unsigned int OneEighth             = TotalBits/8;
	static const unsigned int HighBits              = ((unsigned int)-1) << (TotalBits-OneEighth);

	register unsigned int hash = 0;
	unsigned int magic = 0;
	while(*str)
	{
		hash = (hash<<OneEighth)+(*str++);
		if((magic=(hash&HighBits))!=0)
		{
			hash ^= (magic >> ThreeQuarters);
			hash &= ~magic;
		}
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//編程珠璣中提到的一種
template<class T>
unsigned int PearlsHash(const T* str)
{
	register unsigned int hash = 0;
	register unsigned int NHASH = 29989;
	register unsigned int MULT = 31;
	while(*str)
	{
		hash = (hash*MULT)+(*str++);
	}
	return (hash%NHASH);
}

int main()
{
	char* str = "192.168.10.0";

	unsigned int hashCode;
	hashCode = BKDRHash<char>(str);
	cout << "BKDRHash hashCode = " << hashCode << endl;

	hashCode = SDBMHash<char>(str);
	cout << "SDBMHash hashCode = " << hashCode << endl;

	hashCode = RSHash<char>(str);
	cout << "RSHash hashCode = " << hashCode << endl;

	hashCode = APHash<char>(str);
	cout << "APHash hashCode = " << hashCode << endl;

	hashCode = JSHash<char>(str);
	cout << "JSHash hashCode = " << hashCode << endl;

	hashCode = DEKHash<char>(str);
	cout << "DEKHash hashCode = " << hashCode << endl;

	hashCode = FNVHash<char>(str);
	cout << "FNVHash hashCode = " << hashCode << endl;

	hashCode = DJBHash<char>(str);
	cout << "DJBHash hashCode = " << hashCode << endl;

	hashCode = DJB2Hash<char>(str);
	cout << "DJB2Hash hashCode = " << hashCode << endl;

	hashCode = PJWHash<char>(str);
	cout << "PJWHash hashCode = " << hashCode << endl;

	hashCode = ELFHash<char>(str);
	cout << "ELFHash hashCode = " << hashCode << endl;

	hashCode = PearlsHash<char>(str);
	cout << "PearlsHash hashCode = " << hashCode << endl;

	system("pause");
	return 0;
}

注:以上代碼是在Win+VS2012中運行

点赞