写了个简单的类,用来检索关键词和查询前缀用。
举例:和谐关键词用。
//string[] keys = { "和谐", "社会", "你我共建" };
//BuildTree bt = new BuildTree(keys);
//string testStr = "和谐,和(干扰)谐,你#我#共#建。";
//this.textBox1.Text += "原文:" + testStr;
//this.textBox1.Text += "\r\n结果:" + bt.Replace(testStr);
//this.textBox1.Text += "\r\n模糊匹配结果:" + bt.Replace(testStr, 4);
还有很多功能自己调试吧。。
附:
public class TireTree
{
public Dictionary<int, TireTree> subNode = new Dictionary<int, TireTree>();
public bool isTerminate = false;
public TireTree Search(int key)
{
if (subNode.ContainsKey(key))
{
return subNode[key];
}
else
{
return null;
}
}
public TireTree Add(int key, bool isTerminate)
{
var tempItem = new TireTree();
tempItem.isTerminate = isTerminate;
subNode.Add(key, tempItem);
return tempItem;
}
~TireTree()
{
//Debug.Print("节点析构");
}
}
public class BuildTree
{
TireTree innerRoot;
public BuildTree(string[] keys)
{
innerRoot = Build(keys);
}
public TireTree Build(string[] keys)
{
TireTree root = new TireTree();
TireTree pTemp;
foreach (string key in keys)
{
//临时指针指向根
pTemp = root;
//获取一个单词
for (int i = 0; i < key.Length; i++)
{
char word = key[i];
//获取该单词的一个单字
//如果该字不是目标节点的子节点
if (pTemp.Search((int)word) == null)
{
//增加一个子节点,并把这个子节点作为目标节点
if (i == key.Length - 1)
{
//如果这个字是该词最后一个字,标记为终结点
pTemp = pTemp.Add((int)word, true);
}
else
{
pTemp = pTemp.Add((int)word, false);
}
}
else
{
pTemp = pTemp.Search((int)word);
if (i == key.Length - 1)
{
pTemp.isTerminate = true;
}
}
}
}
innerRoot = root;
return root;
}
#region 查找
public WordInText[] Search(string strText)
{
List<WordInText> result = new List<WordInText>();
TireTree pTemp = innerRoot;
int floor = 0;
int wordStart = 0;
//string tempWord="";
for (int i = 0; i < strText.Length; i++)
{
char word = strText[i];
pTemp = pTemp.Search((int)word);
floor += 1;
// tempWord += word;
if (pTemp == null)
{
pTemp = innerRoot;
floor = 0;
wordStart = i + 1;
//tempWord = "";
continue;
}
if (pTemp.isTerminate == true)
{
result.Add(new WordInText() { Start = wordStart, Length = floor });
// result.Add(tempWord);
}
}
return result.ToArray();
}
public WordInText[] Search(string strText, int wordLength)
{
List<WordInText> result = new List<WordInText>();
var ptemp = innerRoot;
bool isAuto = false;
int tempStep = 0;
int start = 0;
int length = 0;
for (int i = 0; i < strText.Length; i++)
{
char word = strText[i];
var nextNode = ptemp.Search(word);
if (isAuto)
{
//进入匹配状态,说明上N个字是匹配的。
length += 1;
if (nextNode == null && tempStep <= wordLength)
{
//匹配丢失,但在可容忍程度内
//跳过这个字,匹配下一个。
tempStep += 1;//增加丢失数
continue;
}
else if (nextNode == null && tempStep > wordLength)
{
//匹配丢失,但是丢失了很多,不可容忍
//退出匹配状态
isAuto = false;
ptemp = innerRoot;//把匹配指针指根节点
tempStep = 0;
length = 0;
//退出匹配状态时应回溯一位,万一在root可以匹配呢?
i -= 1;
}
else
{
//说明匹配到了下个点。
if (nextNode.isTerminate == true)
{
result.Add(new WordInText() { Start = start, Length = length + 1 });
tempStep += 999; //在终结点不再允许模糊匹配
}
ptemp = nextNode;
}
}
else
{
if (nextNode == null)
{
continue;
}
else
{
isAuto = true;//第一个字触发进入匹配状态
ptemp = nextNode;
start = i;
}
}
}
return result.ToArray();
}
public string[] SearchWord(string strText)
{
List<string> result = new List<string>();
TireTree pTemp = innerRoot;
string tempWord = "";
for (int i = 0; i < strText.Length; i++)
{
char word = strText[i];
pTemp = pTemp.Search((int)word);
tempWord += word;
if (pTemp == null)
{
pTemp = innerRoot;
tempWord = "";
continue;
}
if (pTemp.isTerminate == true)
{
result.Add(tempWord);
}
}
return result.ToArray();
}
#endregion
#region 替换
public string Replace(string strText)
{
var result = Search(strText);
return Replace(result, strText);
}
public string Replace(string strText, int wordLength)
{
var result = Search(strText, wordLength);
return Replace(result, strText);
}
private string Replace(WordInText[] result, string strText)
{
char[] testchar = strText.ToCharArray();
foreach (var item in result)
{
for (int i = item.Start; i < item.Start + item.Length; i++)
{
testchar[i] = '*';
}
}
return new string(testchar);
}
#endregion
#region 包含
public bool Contains(string strText, int wordLength)
{
var ptemp = innerRoot;
bool isAuto = false;
int tempStep = 0;
for (int i = 0; i < strText.Length; i++)
{
char word = strText[i];
var nextNode = ptemp.Search(word);
if (isAuto)
{
//进入匹配状态,说明上N个字是匹配的。
if (nextNode == null && tempStep <= wordLength)
{
//匹配丢失,但在可容忍程度内
//跳过这个字,匹配下一个。
tempStep += 1;//增加丢失数
continue;
}
else if (nextNode == null && tempStep > wordLength)
{
//匹配丢失,但是丢失了很多,不可容忍
//退出匹配状态
isAuto = false;
ptemp = innerRoot;//把匹配指针指根节点
tempStep = 0;
//退出匹配状态时应回溯一位,万一在root可以匹配呢?
i -= 1;
}
else
{
//说明匹配到了下个点。
if (nextNode.isTerminate == true)
{
return true;
}
}
}
else
{
if (nextNode == null)
{
continue;
}
else
{
isAuto = true;//第一个字触发进入匹配状态
ptemp = nextNode;
}
}
}
return false;
}
public bool Contains(string strText)
{
List<WordInText> result = new List<WordInText>();
TireTree pTemp = innerRoot;
for (int i = 0; i < strText.Length; i++)
{
char word = strText[i];
pTemp = pTemp.Search((int)word);
// tempWord += word;
if (pTemp == null)
{
pTemp = innerRoot;
continue;
}
if (pTemp.isTerminate == true)
{
return true;
}
}
return false;
}
#endregion
#region 子节点
public string[] GetChild(string prefix, int count)
{
List<string> result = new List<string>();
//指针
TireTree ptemp = innerRoot;
//首先找到目标节点
for (int i = 0; i < prefix.Length; i++)
{
char word = prefix[i];
var nextNode = ptemp.Search((int)word);
if (nextNode == null)
{
break;
}
else
{
ptemp = nextNode;
}
}
//遍历目标节点
FindSub(ptemp, ref result, prefix);
return result.GetRange(0, count).ToArray();
}
private void FindSub(TireTree rootNode, ref List<string> result, string tempStr)
{
foreach (var item in rootNode.subNode)
{
if (item.Value.isTerminate)
{
result.Add(tempStr + (char)(item.Key));
}
FindSub(item.Value, ref result, tempStr + (char)(item.Key));
}
}
#endregion
~BuildTree()
{
//Debug.Print("构造器析构");
}
}
public class WordInText
{
public int Start;
public int Length;
}