字典
如果有n个条目,使用树结构,查询的复杂度是O(logn),假设要查询一百万个条目,则需要大约log2^20,也就是20。
Trie
查询每个条目的时间复杂度和字典中一共多少条目无关。时间复杂度为O(w),w为单词的长度,大多数的单词的长度比10要小。
import java.util.TreeMap; public class Trie { private class Node{ public boolean isWord; public TreeMap<Character, Node> next; public Node(boolean isWord){ this.isWord = isWord; next = new TreeMap<>(); } public Node(){ this(false); } } private Node root; private int size; public Trie(){ root = new Node(); size = 0; } // 获得Trie中存储的单词数量 public int getSize(){ return size; } // 向Trie中添加一个新的单词word public void add(String word){ Node cur = root; for (int i = 0; i < word.length(); i++){ char c = word.charAt(i); if (cur.next.get(c) == null){ cur.next.put(c, new Node()); } cur = cur.next.get(c); } if (!cur.isWord){ cur.isWord = true; size++; } } // 查询单词word是否在Trie中 public boolean contains(String word){ Node cur = root; for (int i = 0; i < word.length(); i++){ char c = word.charAt(i); if (cur.next.get(c) == null){ return false; } cur = cur.next.get(c); } return cur.isWord; } // 查询是否在Trie中有单词以prefix为前缀 public boolean isPrefix(String perfix){ Node cur = root; for (int i = 0; i < perfix.length(); i++){ char c = perfix.charAt(i); if (cur.next.get(c) == null){ return false; } cur = cur.next.get(c); } return true; } }
对Trie和AVLSet和BSTSet进行测试
import java.util.ArrayList; public class Main { public static void main(String[] args) { System.out.println("Pride and Prejudice"); ArrayList<String> words = new ArrayList<>(); if(FileOperation.readFile("pride-and-prejudice.txt", words)){ long startTime = System.nanoTime(); BSTSet<String> set = new BSTSet<>(); for(String word: words) set.add(word); for(String word: words) set.contains(word); long endTime = System.nanoTime(); double time = (endTime - startTime) / 1000000000.0; System.out.println("Total different words: " + set.getSize()); System.out.println("BSTSet: " + time + " s"); // --- startTime = System.nanoTime(); Trie trie = new Trie(); for(String word: words) trie.add(word); for(String word: words) trie.contains(word); endTime = System.nanoTime(); time = (endTime - startTime) / 1000000000.0; System.out.println("Total different words: " + trie.getSize()); System.out.println("Trie: " + time + " s"); // --- startTime = System.nanoTime(); AVLSet<String> avlSet = new AVLSet(); for(String word: words) avlSet.add(word); for(String word: words) avlSet.contains(word); endTime = System.nanoTime(); time = (endTime - startTime) / 1000000000.0; System.out.println("Total different words: " + avlSet.getSize()); System.out.println("AVLSet: " + time + " s"); } } }
测试结果:
Pride and Prejudice
Total different words: 6530
BSTSet: 0.164487683 s
Total different words: 6530
Trie: 0.133619481 s
Total different words: 6530
AVLSet: 0.146618689 s