Demo entry 6657380

2333

   

Submitted by anonymous on Nov 03, 2017 at 07:51
Language: C++. Code size: 2.2 kB.

#include <iostream>
#include <string>
#include <fstream>
#include <cmath>
#include <time.h>
#include <iomanip>
using namespace std;

#define	  PATH	"D:\\input.txt"	//文件地址
#define	  NUM	100000				//定义hash表的长度

string map[NUM] = {};				//存放单词的hash表
int word_occur[NUM] = {};			//记录单词出现次数

int hash_processor(string str)	//hash函数
{
	unsigned int hash = 0;
	for (int i = 0; i < str.size(); i += 1)
	{
		hash = hash * 23 + int(str[i]);
	}
	hash %= 99787;
	return hash;
}
string str_processor(string str)	//字符串预处理
{
	string str2;
	//多余字符忽略,大写转小写
	for (int i = 0; i < str.size(); i++)
	{
		if (str[i] >= 'A' && str[i] <= 'Z')
		{
			str2 += str[i] + 32;
		}
		if (str[i] >= 'a' && str[i] <= 'z')
		{
			str2 += str[i];
		}
		if (str[i] == '\'' || str[i] == '-')
		{
			str2 += str[i];
		}
	}
	str = str2;
	return str;
}
int main()
{
	clock_t start = clock();
	string word;
	int hash = 0;
	int word_count = 0;			//单词总数
	int bomb_count = 0;			//碰撞总数
	int hash_count = 0;			//hash总数

	//读入文件
	ifstream file;
	file.open(PATH);

	if (file.is_open())	//读入文件成功
	{
		while (file >> word) //逐词读取
		{
			word = str_processor(word);		//单词预处理
			word_count++;					//单词计数
			hash = hash_processor(word);	//获得单词hash
			if (map[hash] == "")			//map无该单词,增加并计数
			{
				map[hash] = word;
				word_occur[hash]++;
				hash_count++;
			}
			if (map[hash] != word && map[hash] != "")//发生hash碰撞,输出碰撞记录
			{
				//碰撞记录输出

				cout << left << setw(15) << word << " 和已有单词   "
					<< left << setw(15) << map[hash] << " 发生hash碰撞" << endl;

				bomb_count++;
			}
			if (map[hash] == word)			//map有该单词,计数
			{
				word_occur[hash]++;
			}
		}
	}
	//读入文件失败报错
	else
	{
		cout << "无法读入文件,请检查路径及文件名,按任意键退出" << endl;
		cin.get();
		exit(1);
	}
	clock_t ends = clock();
	//输出单词表
	for (int i = 0; i < NUM; i++)
	{
		if (map[i] != "")
			cout << map[i] << " " << word_occur[i] << endl;
	}

	cout << "单词总数:" << word_count << endl;
	cout << "碰撞次数:" << bomb_count << endl;
	cout << "堆积比例:" << (double)bomb_count / NUM << endl;
	cout << "运行时间:" << (double)(ends - start) / CLOCKS_PER_SEC << "秒" << endl << endl;
	cout << "按任意键退出...";
	cin.get();
	return 0;
}

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).