C# Trie树工具类

写了个简单的类,用来检索关键词和查询前缀用。

举例:和谐关键词用。

//string[] keys = { "和谐", "社会", "你我共建" };


            //BuildTree bt = new BuildTree(keys);


            //string testStr = "和谐,和(干扰)谐,你#我#共#建。";


            //this.textBox1.Text += "原文:" + testStr;


            //this.textBox1.Text += "\r\n结果:" + bt.Replace(testStr);


            //this.textBox1.Text += "\r\n模糊匹配结果:" + bt.Replace(testStr, 4);

还有很多功能自己调试吧。。

附:

public class TireTree
    {
        public Dictionary<int, TireTree> subNode = new Dictionary<int, TireTree>();
        public bool isTerminate = false;

        public TireTree Search(int key)
        {
            if (subNode.ContainsKey(key))
            {
                return subNode[key];
            }
            else
            {
                return null;
            }
        }

        public TireTree Add(int key, bool isTerminate)
        {
            var tempItem = new TireTree();
            tempItem.isTerminate = isTerminate;

            subNode.Add(key, tempItem);
            return tempItem;
        }

        ~TireTree()
        {
            //Debug.Print("节点析构");
        }
    }

    public class BuildTree
    {
        TireTree innerRoot;
        public BuildTree(string[] keys)
        {
            innerRoot = Build(keys);
        }
        public TireTree Build(string[] keys)
        {
            TireTree root = new TireTree();

            TireTree pTemp;
            foreach (string key in keys)
            {
                //临时指针指向根
                pTemp = root;
                //获取一个单词
                for (int i = 0; i < key.Length; i++)
                {
                    char word = key[i];
                    //获取该单词的一个单字
                    //如果该字不是目标节点的子节点
                    if (pTemp.Search((int)word) == null)
                    {
                        //增加一个子节点,并把这个子节点作为目标节点
                        if (i == key.Length - 1)
                        {
                            //如果这个字是该词最后一个字,标记为终结点
                            pTemp = pTemp.Add((int)word, true);
                        }
                        else
                        {
                            pTemp = pTemp.Add((int)word, false);
                        }

                    }
                    else
                    {
                        pTemp = pTemp.Search((int)word);
                        if (i == key.Length - 1)
                        {
                            pTemp.isTerminate = true;
                        }
                    }
                }
            }
            innerRoot = root;
            return root;
        }

        #region 查找
        public WordInText[] Search(string strText)
        {
            List<WordInText> result = new List<WordInText>();
            TireTree pTemp = innerRoot;
            int floor = 0;
            int wordStart = 0;
            //string tempWord="";
            for (int i = 0; i < strText.Length; i++)
            {
                char word = strText[i];
                pTemp = pTemp.Search((int)word);
                floor += 1;
                // tempWord += word;
                if (pTemp == null)
                {
                    pTemp = innerRoot;
                    floor = 0;
                    wordStart = i + 1;
                    //tempWord = "";
                    continue;
                }
                if (pTemp.isTerminate == true)
                {
                    result.Add(new WordInText() { Start = wordStart, Length = floor });
                    // result.Add(tempWord);
                }
            }

            return result.ToArray();
        }

        public WordInText[] Search(string strText, int wordLength)
        {
            List<WordInText> result = new List<WordInText>();

            var ptemp = innerRoot;
            bool isAuto = false;
            int tempStep = 0;

            int start = 0;
            int length = 0;

            for (int i = 0; i < strText.Length; i++)
            {
                char word = strText[i];
                var nextNode = ptemp.Search(word);

                if (isAuto)
                {
                    //进入匹配状态,说明上N个字是匹配的。
                    length += 1;
                    if (nextNode == null && tempStep <= wordLength)
                    {
                        //匹配丢失,但在可容忍程度内
                        //跳过这个字,匹配下一个。
                        tempStep += 1;//增加丢失数
                        continue;
                    }
                    else if (nextNode == null && tempStep > wordLength)
                    {
                        //匹配丢失,但是丢失了很多,不可容忍
                        //退出匹配状态
                        isAuto = false;
                        ptemp = innerRoot;//把匹配指针指根节点
                        tempStep = 0;
                        length = 0;
                        //退出匹配状态时应回溯一位,万一在root可以匹配呢?
                        i -= 1;
                    }
                    else
                    {
                        //说明匹配到了下个点。
                        if (nextNode.isTerminate == true)
                        {
                            result.Add(new WordInText() { Start = start, Length = length + 1 });
                            tempStep += 999; //在终结点不再允许模糊匹配     
                        }
                        ptemp = nextNode;
                    }
                }
                else
                {
                    if (nextNode == null)
                    {
                        continue;
                    }
                    else
                    {
                        isAuto = true;//第一个字触发进入匹配状态             
                        ptemp = nextNode;
                        start = i;
                    }
                }
            }

            return result.ToArray();
        }

        public string[] SearchWord(string strText)
        {
            List<string> result = new List<string>();
            TireTree pTemp = innerRoot;
            string tempWord = "";
            for (int i = 0; i < strText.Length; i++)
            {
                char word = strText[i];
                pTemp = pTemp.Search((int)word);

                tempWord += word;
                if (pTemp == null)
                {
                    pTemp = innerRoot;
                    tempWord = "";
                    continue;
                }
                if (pTemp.isTerminate == true)
                {
                    result.Add(tempWord);
                }
            }

            return result.ToArray();
        }

        #endregion

        #region 替换
        public string Replace(string strText)
        {
            var result = Search(strText);
            return Replace(result, strText);
        }

        public string Replace(string strText, int wordLength)
        {
            var result = Search(strText, wordLength);
            return Replace(result, strText);
        }

        private string Replace(WordInText[] result, string strText)
        {
            char[] testchar = strText.ToCharArray();

            foreach (var item in result)
            {
                for (int i = item.Start; i < item.Start + item.Length; i++)
                {
                    testchar[i] = '*';
                }
            }

            return new string(testchar);
        }
        #endregion

        #region 包含
        public bool Contains(string strText, int wordLength)
        {
            var ptemp = innerRoot;
            bool isAuto = false;
            int tempStep = 0;

            for (int i = 0; i < strText.Length; i++)
            {
                char word = strText[i];
                var nextNode = ptemp.Search(word);

                if (isAuto)
                {
                    //进入匹配状态,说明上N个字是匹配的。

                    if (nextNode == null && tempStep <= wordLength)
                    {
                        //匹配丢失,但在可容忍程度内
                        //跳过这个字,匹配下一个。
                        tempStep += 1;//增加丢失数
                        continue;
                    }
                    else if (nextNode == null && tempStep > wordLength)
                    {
                        //匹配丢失,但是丢失了很多,不可容忍
                        //退出匹配状态
                        isAuto = false;
                        ptemp = innerRoot;//把匹配指针指根节点
                        tempStep = 0;

                        //退出匹配状态时应回溯一位,万一在root可以匹配呢?
                        i -= 1;
                    }
                    else
                    {
                        //说明匹配到了下个点。
                        if (nextNode.isTerminate == true)
                        {
                            return true;
                        }
                    }
                }
                else
                {
                    if (nextNode == null)
                    {
                        continue;
                    }
                    else
                    {
                        isAuto = true;//第一个字触发进入匹配状态             
                        ptemp = nextNode;
                    }
                }
            }

            return false;
        }

        public bool Contains(string strText)
        {
            List<WordInText> result = new List<WordInText>();
            TireTree pTemp = innerRoot;

            for (int i = 0; i < strText.Length; i++)
            {
                char word = strText[i];
                pTemp = pTemp.Search((int)word);

                // tempWord += word;
                if (pTemp == null)
                {
                    pTemp = innerRoot;
                    continue;
                }
                if (pTemp.isTerminate == true)
                {
                    return true;
                }
            }

            return false;
        }
        #endregion

        #region 子节点
        public string[] GetChild(string prefix, int count)
        {
            List<string> result = new List<string>();
            //指针
            TireTree ptemp = innerRoot;
            //首先找到目标节点
            for (int i = 0; i < prefix.Length; i++)
            {
                char word = prefix[i];
                var nextNode = ptemp.Search((int)word);

                if (nextNode == null)
                {
                    break;
                }
                else
                {
                    ptemp = nextNode;
                }
            }

            //遍历目标节点
            FindSub(ptemp, ref result, prefix);
            return result.GetRange(0, count).ToArray();
        }

        private void FindSub(TireTree rootNode, ref List<string> result, string tempStr)
        {
            foreach (var item in rootNode.subNode)
            {
                if (item.Value.isTerminate)
                {
                    result.Add(tempStr + (char)(item.Key));
                }
                FindSub(item.Value, ref result, tempStr + (char)(item.Key));
            }
        }

        #endregion

        ~BuildTree()
        {
            //Debug.Print("构造器析构");
        }
    }

    public class WordInText
    {
        public int Start;
        public int Length;
    }

    原文作者:Trie树
    原文地址: https://blog.csdn.net/yzh900927/article/details/42492461
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞

发表评论

电子邮件地址不会被公开。 必填项已用*标注