Trie树简介:
Trie树又叫字典树,是一种多叉单词查找树,其每个节点都是一个字母,建立好树后,对一单词的查找可迅速完成,查找深度为该单词的长度。
Trie树的建立:
Trie树首先从已有的词库中读取单词来建立树,数的深度为最长单词的长度加一,多出的那个长度为根节点,根节点不包含字符数据,值有指向各子树的指针。
优点:
可利用字符串的公共前缀减小查询时间,减小无谓字符之间的比较,缩短查询的时间。例如,可以用作快速查找一篇文章中是否有某个单词出现。
//使用trie来建立后缀树进行字符串的索引
#include <iostream>
using namespace std;
typedef struct stTrieNode
{
bool bLeaf; //是否为叶子节点,true为叶子节点
bool bMidLeaf; //是否为处在中间的隐式叶子节点
int firstAppear;
struct stTrieNode* toNext[32];
}stTrieNode, *LPstTrieNode;
//根据字母构建新的trie树节点
LPstTrieNode builtNode(LPstTrieNode root, int loc, char szNodeInfo, int len, int firstAppear)
{
root->toNext[loc] = (LPstTrieNode)malloc(sizeof(stTrieNode));
memset(root->toNext[loc], 0, sizeof(stTrieNode));
//root->firstAppear = firstAppear;
root->toNext[loc]->firstAppear = firstAppear;
if (len == 1) //已经是最后一个节点,建立节点时要加上
{
root->toNext[loc]->bLeaf = 1;
}
return (LPstTrieNode)(root->toNext[loc]);
}
//将单词strinfo加入到trie树中
void AddToTree(LPstTrieNode root, char *strInfo, int len, int firstAppear)
{
char sztemp = strInfo[0];
int loc = 0 + (sztemp - 'a');
if (len <= 0)
{
return;
}
if (root->toNext[loc] == NULL)
{
LPstTrieNode nextNode = builtNode(root, loc, sztemp, len, firstAppear);
AddToTree(nextNode, strInfo+1, len-1, firstAppear);
}
else if (root->toNext[loc] != NULL)
{
AddToTree(root->toNext[loc], strInfo+1,len-1,firstAppear);
}
}
//检查checkword是否在trie树中
bool checkWord(LPstTrieNode root, char *checkWord , int *loc)
{
int len = 0;
int charloc = 0;
len = strlen(checkWord);
LPstTrieNode lpTemp = root;
while(charloc < len) //字符串没有检索完
{
int lpLoc = 0 + (checkWord[charloc] -'a');
if (lpLoc > 26 || lpLoc < 0)
{
return false;
}
if (lpTemp->toNext[lpLoc] != NULL)
{
lpTemp = lpTemp->toNext[lpLoc];
charloc++;
if (charloc == len ) //最后一个字符
{
*loc = lpTemp->firstAppear;
return true;
}
}
else
return false;
}
return false;
}
int main()
{
char WordContent[128]; //从文本中读出的单词
char wordForCheck[128]; //验证单词
FILE *fReadIn = fopen("Stringmodle.txt","r+");
if (fReadIn == NULL)
{
cout<<"无法打开文件words.txt"<<endl;
system("pause");
return 0;
}
LPstTrieNode root = (LPstTrieNode)malloc(sizeof(stTrieNode));
memset(root, 0, sizeof(stTrieNode));
//读取数据到wordcontent中,建树过程
fscanf(fReadIn,"%s",WordContent);
int len = strlen(WordContent);
for (int i =0; i< len; i++)
{
AddToTree(root, WordContent+i, len-i, i);
}
fclose(fReadIn);
//验证一个单词是否在树中
while(true)
{
cout<<"输入要检验的单词: ";
bool nflag;
int appearLoc;
cin>>wordForCheck;
if (wordForCheck[0] == '0') //输入0验证结束
{
break;
}
else
nflag = checkWord(root, wordForCheck,&appearLoc);
if (nflag)
{
cout<<wordForCheck<<" 存在, 首次出现位置为:"<<appearLoc<<endl<<endl;
}
else
cout<<wordForCheck<<" 不存在"<<endl<<endl;
}
return 1;
}