基于trie树的AC多模匹配算法

#!/usr/bin/python2.7
#-*-coding=utf-8-*-

class Trie:
    """用来存储关键词和进行多模匹配  """
    def __init__(self):
        self.nodetype=0
        self.child={}
        self.fail=None
        self.strout=""
        self.tag=0

    def add(self,word):
        """ 添加关键词word, word--unicode码 """
        node=self
        for  w in word:
            print w
            if w not in node.child:
                    node.child[w]=Trie()
            node=node.child[w]
        node.nodetype=1
        node.strout=word
        self.tag=0

    def find(self,word):
        """ 在trie树中搜索关键词, word---unicode码"""
        node=self
        for w in word:
            if w not in node.child:
                return False
            node=node.child[w]
        if node.nodetype==1:
            return True
        else:
            return False

    def getfail(self):
        """计算每个节点的失败跳转节点"""
        if self.tag:
            return
        que=[]
        self.fail=self
        que.append(self)
        while len(que):
            par=que[0]
            del que[0]
            for w,ch in par.child.items():
                while par.fail is not self and (w not in par.fail.child):
                    par=par.fail
                if par.fail is self and ((w not in self.child) or self.child[w] is ch):
                    ch.fail=self
                else:
                    ch.fail=par.fail.child[w]
                que.append(ch)
        self.tag=1
    
    def parse(self,lang):
        """ 对lang进行多模匹配,返回‘(匹配位置,keyword)’的列表,lang---unicode码"""
        if self.tag==0:
            self.getfail()
        result=[]
        node=self
        i=0
        n=len(lang)
        while i<n:
            if lang[i] in node.child:
                node=node.child[lang[i]]
                if node.nodetype==1:
                    result.append((i-len(node.strout)+1,node.strout))
                i+=1
            else:
                if node is self:
                    i+=1
                else:
                    node=node.fail
        return result	
                

    原文作者:Trie树
    原文地址: https://blog.csdn.net/shaolianbo/article/details/8557226
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞