Trie树学习

Trie树学习

标签(空格分隔): Trie树

文章目录

定义

字典树、单词搜索树或键树。用于统计和排序大量的字符串

三个基本性质

  1. 根节点不包含字符,每条边代表一个字符
  2. 从根节点到某一节点连起来的字符表示该节点表示的一个字符串
  3. 节点的子节点包含的字符都不相同

Java实现

import java.util.HashMap; import java.util.Map; /** * author:lanpeng * Date:2018/10/6 * Time:16:15 */ public class TrieTree { public static void main(String[] args) { String[] strs = {"我啊", "我打", "b", "我打你", "lisfa", "asfkjlsf"}; String[] words = {"我", "我打", "asf", "ac"}; TrieTree trieTree = new TrieTree(); TrieNode root = new TrieNode(); for(String a: strs) { trieTree.insert(root, a); } for (String s: words) { System.out.println(s+":"+trieTree.search(root,s)); } } //插入一个字符串 public void insert(TrieNode node, String str) { for(int i = 0; i < str.length(); ++i) { Character c = new Character(str.charAt(i)); if(!node.childdren.containsKey(c)) { node.childdren.put(c, new TrieNode()); } else { node.childdren.get(c).nCount++; } node = node.childdren.get(c); } } //搜索一个字符串 public int search(TrieNode node, String str) { for(int i = 0; i < str.length(); ++i) { Character c = new Character(str.charAt(i)); if(!node.childdren.containsKey(c)) { return 0; } node = node.childdren.get(c); } return node.nCount; } private static class TrieNode { int nCount;//计数 Map<Character, TrieNode> childdren;//子节点 public TrieNode() { nCount = 1; childdren = new HashMap<>(); } } } 
package com.csair.etm.quartz.utils;

import org.apache.commons.lang.CharUtils;
import org.apache.commons.lang.StringUtils;

import java.util.HashMap;
import java.util.Map;

/**
 * author:lanpeng
 * Date:2018/10/6
 * Time:16:15
 */
public class TrieTree {

    public static void main(String[] args) {
        String[] strs = {"我啊", "我打", "b", "我打你", "lisfa", "asfkjlsf"};
        String[] words = {"我", "我打", "asf", "ac"};
        String s = "aaaab啊asfkjlsf噩噩噩噩";
        TrieTree trieTree = new TrieTree();
        TrieNode root = new TrieNode();
        for(String a: strs) {
            trieTree.addWord(root, a);
        }
        System.out.println(trieTree.filter(root, s));
        System.out.println(trieTree.isSymbol(s.charAt(4)));
    }

    public void insert(TrieNode node, String str) {
        for(int i = 0; i < str.length(); ++i) {
            Character c = new Character(str.charAt(i));
            if(!node.childdren.containsKey(c)) {
                node.childdren.put(c, new TrieNode());
            } else {
                node.childdren.get(c).nCount++;
            }
            node = node.childdren.get(c);
        }
    }

    public int search(TrieNode node, String str) {
        for(int i = 0; i < str.length(); ++i) {
            Character c = new Character(str.charAt(i));
            if(!node.childdren.containsKey(c)) {
                return 0;
            }
            node = node.childdren.get(c);
        }
        return node.nCount;
    }

    public void addWord(TrieNode node, String str) {
        for (int i = 0; i < str.length(); ++i) {
            Character c = new Character(str.charAt(i));
            if(!node.childdren.containsKey(c)) {
                node.childdren.put(c, new TrieNode());
            } else {
                node.childdren.get(c).nCount++;
            }
            node = node.childdren.get(c);
            if (i == str.length() - 1) {
                node.end = true;
            }
        }
    }
    private boolean isSymbol(char c) {
        int ic = (int) c;
        //东亚文字
        return !CharUtils.isAsciiAlphanumeric(c) && (ic < 0x2E80 || ic > 0x9FFF);
    }

    public String filter(TrieNode rootNode, String text) {
        if (StringUtils.isBlank(text)) {
            return text;
        }
        StringBuilder result = new StringBuilder();
        String replaceMent = "***";
        TrieNode tempNode = rootNode;
        //begin一直向后移动,代表当前搜索的敏感词的头结点
        int begin = 0;
        //position是当前敏感词的某一个结点,来回移动的那个
        int position = 0;

        while (position < text.length()) {
            char c = text.charAt(position);
            tempNode = tempNode.childdren.get(c);
            //当前结点为null,说明不是敏感词
            if (tempNode == null) {
                result.append(text.charAt(begin));
                position = begin + 1;
                begin = position;
                tempNode = rootNode;
            } else if (tempNode.end) {
                result.append(replaceMent);
                position = position + 1;
                begin = position;
                tempNode = rootNode;
            } else {
                ++position;
            }
        }
        //position走到了最后,别忘了把begin剩下的也加进来,
        //不过也有可能begin也没有啥嘞
        result.append(text.substring(begin));
        return result.toString();
    }

    private static class TrieNode {
        int nCount;
        boolean end = false;
        Map<Character, TrieNode> childdren = new HashMap<>();
        public TrieNode() {
            nCount = 1;
            childdren = new HashMap<>();
        }
    }
}


    原文作者:Trie树
    原文地址: https://blog.csdn.net/qq_33655674/article/details/82991623
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞