哈希函数--开放定址法

最新推荐文章于 2025-06-01 19:50:57 发布

ReginaPhilange

最新推荐文章于 2025-06-01 19:50:57 发布

阅读量1.2k

点赞数

CC 4.0 BY-SA版权

分类专栏：数据结构及其算法

本文链接：https://blog.csdn.net/qq_41268108/article/details/82669012

数据结构及其算法专栏收录该内容

14 篇文章

订阅专栏

哈希概念

顺序搜索以及二叉树搜索树中，元素存储位置和元素各关键码之间没有对应的关系，因此在查找一个元素时，必须要经过关键码得多次比较。搜索的效率取决于搜索过程中元素的比较次数。

理想搜索方法：可以不经过任何比较，一次直接从表中得到要搜索的元素。

如果构造一种存储结构，通过某种函数（hashFunc）使元素的存储位置与他的关键码之间能够建立一一映射的关系，那么在查找时通该函数可以很快找到该元素。

当向该结构中：

插入元素时：根据待插入元素的关键码，以此函数计算出该元素的存储位置并按此位置进行存放。

搜索元素时：对元素的关键码进行同样的计算，把求得的函数值当做元素的存储位置，在结构中按此位置取元素比较，若关键码相等，则搜索成功。

常用的哈希函数构造方法包括：

直接寻址法（直接定址法）：
- 公式：f(key)=a*key+b （a,b都是常数）
- 适合查找表较小且连续的情况
- 优点：简单、均匀，不会产生冲突
- 缺点：需要知道关键字的分布，现实中不常用
数字分析法
- 方法：抽取关键字中的一部分来计算存储位置
- 适用于关键词较长的情况
平方取中法
- 方法：将关键字先平方，然后截取中间x位作为存储位置
- 适合用于不知道关键词分布，且位数不长的情况
折叠法
- 方法：将关键字拆分成若干部分后累加起来，根据散列表表长取总和的后若干位作为存储位置
- 适用于不知道关键字分布，且位数较长的情况
除留余数法
- 方法：f(key)=key mod p (p<=m)，m是散列表表长
- p取小于等于m的最小质数或者不包含小于20质因子的合数，以减少冲突的情况
随机数法
- 方法：f(key)=random(key)
- 注意random的随机种子需要是固定的，以便查询的时候能够根据key重新找到存储位置
- 适用于关键字长度不等的情况

常用冲突处理方法：

开放定址法：
- 方法： fi(key)=(f(key)+di) mod m，(di=1,2,3,4...,m−1)fi(key)=(f(key)+di) mod m，(di=1,2,3,4...,m−1)
- 线性探测：只要一旦发现冲突，就寻找下一个空的散列地址
- 二次探测：di=12,−12,22,−22,...,q2,−q2di=12,−12,22,−22,...,q2,−q2 ，目的是不让关键词集中在某块区域，产生堆积
- 随机探测：didi 是一个随机数，但查询时需要设置和插入时相同的随机种子
再散列函数法：
- 方法：fi(key)=RHi(key) (i=1,2,...k)fi(key)=RHi(key) (i=1,2,...k)
- 遇到冲突就重新采用一个散列函数计算新的存储位置，可以使关键字不产生聚集
链地址法（拉链）
- 方法：将所有关键字的同义词记录在一个单链表中，在散列表中只存储所有同义词表的头指针
公共溢出区法
- 方法：为所有冲突的关键字开辟一个公共的溢出区来存放
- 适用于相对于基本表来说冲突数据很少的情况

hashtable.h

#pragma once
#include<stdio.h>
#include<malloc.h>
#include<assert.h>
#include<stdlib.h>
#include<string.h>


typedef char* HTKeyType;
typedef int HTValueType;

enum State
{
	EMPTY = 0,
	EXIST = 1,
		DELETE = 2,
};

typedef struct HashData
{
	enum State		_state;
	HTKeyType	_key;
	HTValueType _value;
}HashData;

typedef struct HashTable
{
	HashData* _tables;
	int _len;   // 长度
	int _size;  // 有效数据个数
}HashTable;

void HTInit(HashTable* ht,int len);
void HTDestroy(HashTable* ht);

int HTInsert(HashTable* ht, HTKeyType key, HTValueType value);
void CheckCapacity(HashTable * ht);
int HTRemove(HashTable* ht, HTKeyType key);
HashData* HTFind(HashTable* ht, HTKeyType key);

int HTSize(HashTable* ht);
int HTEmpty(HashTable* ht);

hashtable.c

#include"HashTable.h"
void HTInit(HashTable* ht, int len)
{
	assert(len > 0);
	assert(ht != NULL);
	int i;
	ht->_tables = (HashData *)malloc(sizeof(HashData)*len);
	memset(ht->_tables, 0, sizeof(HashData)*len);
	ht->_size = 0;
	ht->_len = 0;
	for (i = 0; i < len; ++i)
	{
		ht->_tables[i]._state = EMPTY;
	}
}
void HTDestroy(HashTable* ht)
{
	assert(ht != NULL);
	free(ht->_tables);
	ht->_tables = NULL;
	ht->_size = ht->_len = 0;
}
int StrHash(const char* str)
{
	int hash = 0;
	int seed = 31;
	while (*str)
	{
		//hash += *str;
		hash = hash*seed + str;
		++str;
	}
	return hash;
}
size_t HTHashFunc(HTKeyType key, size_t len)
{
	return StrHash(key)%len;
}
int HTInsert(HashTable* ht, HTKeyType key, HTValueType value)
{

	// 增容
	int i = 0;
	size_t index;
	size_t start;
	CheckCapacity(ht);

	start = HTHashFunc(key, ht->_len);
	index = start;
	while (ht->_tables[index]._state == EXIST)
	{
		if (ht->_tables[index]._key == key)
		{
			return 0;
		}
		else
		{
			++i;
			index = start + i*i;
			index %= ht->_len;
		}
	}

	ht->_tables[index]._state = EXIST;
	ht->_tables[index]._key = key;
	ht->_tables[index]._value = value;
	ht->_size++;

	return 1;
}
int HTRemove(HashTable* ht, HTKeyType key)
{
	HashData * data;
	assert(ht != NULL);
	data = HTFind(ht, key);
	if (data)
	{
		data->_state = DELETE;
		ht->_size--;
		return 1;
	}
	else
	{
		return 0;
	}
}
HashData* HTFind(HashTable* ht, HTKeyType key)
{
	int index;
	assert(ht != NULL);
	index = HTHashFunc(key, ht->_len);
	while (ht->_tables[index]._state != EMPTY)
	{
		if (ht->_tables[index]._key == key)
		{
			if (ht->_tables[index]._state == EXIST)
			{
				return &(ht->_tables[index]);
			}
			else
			{
				return NULL;
			}
		}
		else
		{
			++index;
			if (index == ht->_len)
			{
				index = 0;
			}
		}
		return NULL;
	}
}
void CheckCapacity(HashTable * ht)
{
	if (ht->_size * 10 / ht->_len > 7)
	{
		
		HashTable newht;
		size_t i;
		HTInit(&newht, (ht->_len)*2);
		for (i = 0; i <(ht->_len); ++i)
		{
			if (ht->_tables[i]._state == EXIST)
			{
				HTInsert(&newht, ht->_tables[i]._key, ht->_tables[i]._value);
			}
		}

		HTDestroy(ht);
		ht->_tables = newht._tables;
		ht->_size = newht._size;
		ht->_len = newht._len;
	}
	
}
void HTPrint(HashTable *ht)
{
	int i = 0;
	assert(ht != NULL);
	char* strState[3] = { "EMPITY", "EXIST", "DELETE" };
	for (; i < ht->_len; ++i)
	{
		printf("%s->table[%d]%d:%d\n", strState[ht->_tables[i]._state],i, ht->_tables[i]._key, ht->_tables[i]._value);
	}
	printf("\n");
}
int HTSize(HashTable* ht)
{
	assert(ht != NULL);
	return ht->_size;
}
int HTEmpty(HashTable* ht)
{
	assert(ht != NULL);
	return ht->_size = 0;
}

test.c

#include"HashTable.h"

//void TestHashTable()
//{
//	HashData* ret;
//	HashTable ht;
//	size_t i;
//	HTInit(&ht, 10);
//	//HTInsert(&ht, 7, 0);
//	HTInsert(&ht, 17, 0);
//	//HTInsert(&ht, 27, 0);
//	//HTInsert(&ht, 8, 0);
//	HTPrint(&ht);
//	
//	HTRemove(&ht, 17);
//	ret = HTFind(&ht, 27);
//	ret->_value = 10;
//	HTPrint(&ht);
//
//	for (i = 0; i < 10; ++i)
//	{
//		HTInsert(&ht, rand(), i);
//	}
//	HTPrint(&ht);
//}
void TestHashTable()
{
	//int i = 0;
	//HashData* ret;
	//char* strs[] = {"insert","insert","insert","insert","insert","sort", "insert", "sort", "find"};
	////char* strs[] = {"abcd", "bcad", "cbad","cbad", "aad", "abc", "acb"};
	////char* strs[] = {"工具", "工具", "工具", "调试"};
	//HashTable ht;
	//HTInit(&ht, 10);
	//for (; i < sizeof(strs)/sizeof(char*); ++i)
	//{
	//	ret = HTFind(&ht, strs[i]);
	//	if (ret)
	//	{
	//		ret->_value++;
	//	}
	//	else
	//	{
	//		HTInsert(&ht, strs[i], 1);
	//	}
	//}

	//HTPrint(&ht);

	char str[] = "工具";
	char str_[6];
	str_[0] = -55;
	str_[1] = -74;
	str_[2] = ' ';
	str_[3] = -79;
	str_[4] = -56;
	str_[5] = '\0';

		printf("%s\n", str);
	printf("%s\n", str_);
}
int main()
{
	TestHashTable();
	system("pause");
	return 0;
}