基于trie树的AC多模匹配算法

#!/usr/bin/python2.7
#-*-coding=utf-8-*-

class Trie:
	"""用来存储关键词和进行多模匹配  """
	def __init__(self):
		self.nodetype=0
		self.child={}
		self.fail=None
		self.strout=""
		self.tag=0

	def add(self,word):
		""" 添加关键词word, word--unicode码 """
		node=self
		for  w in word:
			print w
			if w not in node.child:
					node.child[w]=Trie()
			node=node.child[w]
		node.nodetype=1
		node.strout=word
		self.tag=0

	def find(self,word):
		""" 在trie树中搜索关键词, word---unicode码"""
		node=self
		for w in word:
			if w not in node.child:
				return False
			node=node.child[w]
		if node.nodetype==1:
			return True
		else:
			return False

	def getfail(self):
		"""计算每个节点的失败跳转节点"""
		if self.tag:
			return
		que=[]
		self.fail=self
		que.append(self)
		while len(que):
			par=que[0]
			del que[0]
			for w,ch in par.child.items():
				while par.fail is not self and (w not in par.fail.child):
					par=par.fail
				if par.fail is self and ((w not in self.child) or self.child[w] is ch):
					ch.fail=self
				else:
					ch.fail=par.fail.child[w]
				que.append(ch)
		self.tag=1
	
	def parse(self,lang):
		""" 对lang进行多模匹配,返回‘(匹配位置,keyword)’的列表,lang---unicode码"""
		if self.tag==0:
			self.getfail()
		result=[]
		node=self
		i=0
		n=len(lang)
		while i<n:
			if lang[i] in node.child:
				node=node.child[lang[i]]
				if node.nodetype==1:
					result.append((i-len(node.strout)+1,node.strout))
				i+=1
			else:
				if node is self:
					i+=1
				else:
					node=node.fail
		return result	
				

    原文作者:Trie树
    原文地址: https://blog.csdn.net/shaolianbo/article/details/8557226
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞

发表评论

电子邮件地址不会被公开。 必填项已用*标注