实验描述:
实验分析:
所谓哈希查找也就是根据输入的数据找到我们想要查找的信息,而且要做到O(1)的时间复杂度,即并不随着数据量的变大而查找次数变得很大,基础哈希表的创建过程如下:
- 根据输入计算Hash值,如果不冲突直接放入对应的槽中;
- 如果冲突可以采用几种冲突解决的方法进行解决,开放地址法和链式存储法,现在本助教给出开放地址法中的再Hash的方法进行解决冲突;
- 通过1和2已经创建了Hash表,我们再创建的时候记录冲突最大的次数,也将是我们在查找时的最大查找次数,进行查找,计算平均查找次数。
实验代码
#include<stdio.h>
#include<iostream>
#include<iomanip>
#include<fstream>
#include<string>
#include<vector>
using namespace std;
#define MAXNUM 1000
#define HASHSPACE 2
string com = "";
struct student {
string name;
string sex;
string age;
string mail;
string phone;
struct student() {
name = "";
sex = "";
age = "";
mail = "";
phone = "";
}
};
const int Sum_Space = int(HASHSPACE*MAXNUM);
student stus[MAXNUM], stus_hash[Sum_Space - 1];
int Hash(string name, int confict_num) {
unsigned int index = 0,sum = 0;
for (int i = 0; i < name.size(); i++) {
sum += int(name[i])*(i*i+1);
}
index = sum % (Sum_Space - 1);
if (confict_num == 0) return index;
int Gk = (sum + 1) % (Sum_Space - 3);
return ((index + (confict_num*Gk) % (Sum_Space - 1)))%(Sum_Space-1);
}
int CreatHashTable(student *stus, student *stus_hash) {
int max_confict_num = 0;
int index = 0;
for (int i = 0; i < MAXNUM; i++) {
bool flag = false;
int confict_num = 0;
string name = stus[i].name;
while (1) {
index = Hash(name,confict_num);
if (stus_hash[index].name._Equal(com))
{
memcpy(stus_hash + index, stus + i, sizeof(student) * 1);
break;
}
confict_num++;
if (max_confict_num < confict_num)
max_confict_num = confict_num;
}
}
return max_confict_num;
}
int find_hash(string name, int max_confict_num,student *stus_hash,int &find_error_time) {
int find_time = 0;
while (find_time <= max_confict_num) {
int index = Hash(name, find_time);
if (stus_hash[index].name == name)
{
//cout << "Find Success!" << " Index:" << index << endl;
return find_time+1;
}
else
{
//cout << "Find Confict With " << index << endl;
find_time++;
}
}
cout << "Error Can't Find The " << name << endl;
find_error_time = find_time;
return -1;
}
int main() {
/**********************************Read Student Info**********************************************/
fstream name("name.txt"),sex("sex.txt"),age("age.txt"),mail("mail.txt"),phone("phone.txt");
//fstream name("name_100000.txt"), sex("sex_100000.txt"), age("age_100000.txt"), mail("mail_100000.txt"), phone("phone_100000.txt");
string tmp_name,tmp_sex,tmp_age,tmp_mail,tmp_phone;
int num = 0,max_confict_num = 0;
while (getline(name, tmp_name)&&getline(sex, tmp_sex) && getline(age, tmp_age) && getline(mail, tmp_mail) && getline(phone, tmp_phone)) {
if (num == MAXNUM) break;
stus[num].name = tmp_name;
stus[num].age = tmp_age;
stus[num].mail = tmp_mail;
stus[num].phone = tmp_phone;
stus[num].sex = tmp_sex;
//cout << "num:" <<tmp_name <<tmp_name.size() << endl;
num++;
}
/**********************************Creat Hash Table************************************************/
cout << "/**********************************Hash Search************************************************/" << endl;
max_confict_num = CreatHashTable(stus, stus_hash);
cout << "Used_Num:Data_Num=" << Sum_Space << ":" << MAXNUM << "="<<float(Sum_Space) / float(MAXNUM) << endl;
cout << "max_confict_num:" << max_confict_num << endl;
int val = 0;
for (int i = 0; i < Sum_Space - 1; i++) {
if (stus_hash[i].name._Equal(com))
val++;
//cout << "index:"<<setw(3)<<i<<" name:" << setw(8) << stus_hash[i].name << " sex:" << setw(2) << stus_hash[i].sex << " age:" << setw(3) << stus_hash[i].age << " mail:" << setw(14) << stus_hash[i].mail << " phone:" << setw(12) << stus_hash[i].phone << endl;
}
cout << "None Val Num:" << val << endl;
/*********************************Compute Average Find Time*****************************************/
int find_sum_time = 0,find_error_time=0;
for (int i = 0; i < MAXNUM; i++) {
int find_time = find_hash(stus[i].name, max_confict_num, stus_hash,find_error_time);
if (find_time == -1) {
cout << "Error Find!" << endl;
getchar();
}
else
find_sum_time += find_time;
}
cout << "Average Find Time :" << find_sum_time / float(MAXNUM) << endl;
/***********************************Test The Case of Can't Find*************************************/
int find_time = find_hash("Jerry", max_confict_num, stus_hash,find_error_time);
if (find_time == -1) cout << "Find Error Time :" << find_error_time << endl;
getchar();
}
实验结果:
从以上结果我们能看出,随着槽数的增加,最大冲突次数变小,平均查找次数也变小,但随之带来的没用到的空间也变多了,时间空间需要做一个权衡了,当然同学们日后还会学习更加高级的查找结构红黑树,能带来更好的查找性能 。