GeekforGeeks Trie - 键树简介 - 构造 插入 和 搜索

Trie是非常高效的信息检索数据结构, 时间效率会是O(m),其中m是需要搜索的关键字的长度。缺点就是需要的存储空间大。

Trie的特点:

1. 每个Trie的节点都由多个分支构成

2. 每个分支代表可能的关键字的一个字符

3. 需要mark(标志)每个关键字的最后一个字符为leaf node(叶子节点)

英文字母的节点数据结构可以表示如下:

struct TrieNode
{
    int value; /* Used to mark leaf nodes */
    TrieNode *children[ALPHABET_SIZE];
};

插入关键字:

1. 关键字的每个字符都作为独立的trie节点, 注意每个子节点都是一组指针,指向下一个trie节点。

2 如果输入的关键字是新的,或者是比原有关键字长, 就需要构造新的节点, 而且需要标志它的结束点为叶子节点。 

3. 如果关键字比原有的某个关键字短,那么就可以只标志新的叶子节点。

4. 关键字的长度决定了trie的深度

搜索关键字:

1. 比较关键字的字符,然后往下一层移动

2. 如果关键字结束,或者没有这个字符在trie中,那么搜索结束。 前者比较最后一个节点是否是叶子节点,如果是表示搜索成功,否则不成功。后者表示搜索不成功。

参考原文:

http://www.geeksforgeeks.org/trie-insert-and-search/

实现程序:

#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <string>

#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])
#define ALPHABET_SIZE (26)
#define CHAR_TO_INDEX(c) ((int)c - (int)'a')

struct TrieNode
{
    int value; /* Used to mark leaf nodes */
    TrieNode *children[ALPHABET_SIZE];
};

struct TrieT
{
    TrieNode *root;
    int count;
};

// Returns new trie node (initialized to NULLs)
TrieNode *getNode(void)
{
    TrieNode *pNode = NULL;
    pNode = (TrieNode *)malloc(sizeof(TrieNode));

    if( pNode )
    {
        int i;
        pNode->value = 0;
        for(i = 0; i < ALPHABET_SIZE; i++)
        {
            pNode->children[i] = NULL;
        }
    }
    return pNode;
}

// Initializes trie (root is dummy node)
void initialize(TrieT *pTrie)
{
    pTrie->root = getNode();
    pTrie->count = 0;
}

// If not present, inserts key into trie
// If the key is prefix of trie node, just marks leaf node
void insert(TrieT *pTrie, char key[])
{
    int level = 0;
    int length = strlen(key);
    int index = 0;
    TrieNode *pCrawl;

    pTrie->count++;
    pCrawl = pTrie->root;

    for( level = 0; level < length; level++ )
    {
        index = CHAR_TO_INDEX(key[level]);
        if( !pCrawl->children[index] )
        {
            pCrawl->children[index] = getNode();
        }
        pCrawl = pCrawl->children[index];
    }
    // mark last node as leaf
    pCrawl->value = pTrie->count;
}

// Returns non zero, if key presents in trie
int search(TrieT *pTrie, char key[])
{
    int level;
    int length = strlen(key);
    int index;
    TrieNode *pCrawl;

    pCrawl = pTrie->root;
    for( level = 0; level < length; level++ )
    {
        index = CHAR_TO_INDEX(key[level]);
        if( !pCrawl->children[index] )
        {
            return 0;
        }
        pCrawl = pCrawl->children[index];
    }
    return (0 != pCrawl && pCrawl->value);
}

// Driver
int main()
{
    // Input keys (use only 'a' through 'z' and lower case)
    char keys[][8] = {"the", "a", "there", "answer", "any", "by", "bye", "their"};
    TrieT trie;

    char output[][32] = {"Not present in trie", "Present in trie"};

    initialize(&trie);

    // Construct trie
    for(int i = 0; i < ARRAY_SIZE(keys); i++)
    {
        insert(&trie, keys[i]);
    }

    // Search for different keys
    printf("%s --- %s\n", "the", output[search(&trie, "the")] );
    printf("%s --- %s\n", "these", output[search(&trie, "these")] );
    printf("%s --- %s\n", "their", output[search(&trie, "their")] );
    printf("%s --- %s\n", "thaw", output[search(&trie, "thaw")] );

    return 0;
}

更新 2014 5 -16

C++写的类,主要是带构造函数和析构函数,可以很好管理内存,甚至不需要递归地手动释放内存了,析构函数可以自动递归调用释放所有Node,这个是C++比C强大的地方之一:

class TrieInsertAndSearch
{
    const static int ALPH_SIZE = 26;

    struct Node
    {
        int val;
        Node *children[ALPH_SIZE];
        explicit Node(int v = 0) : val(v)
        {
            for (int i = 0; i < ALPH_SIZE; i++)
            {
                children[i] = nullptr;
            }
        }
        ~Node()
        {
            for (int i = 0; i < ALPH_SIZE; i++)
            {
                if (children[i]) delete children[i];
                children[i] = nullptr;
            }
        }
    };
    struct Tree
    {
        Node *root;
        int count;
        explicit Tree(int c = 0, Node *r = nullptr) : count(c), root(r){}
        ~Tree()
        {
            if (root) delete root;
            root = nullptr;
        }
    };

    Tree *pT;
    void insert(char key[])
    {
        int len = strlen(key);
        Node *pCrawl = pT->root;
        pT->count++;
        for (int lv = 0; lv < len; lv++)
        {
            int id = key[lv] - 'a';
            if (!pCrawl->children[id])
            {
                pCrawl->children[id] = new Node;
            }
            pCrawl = pCrawl->children[id];
        }
        pCrawl->val = pT->count;
    }
    bool search(char key[])
    {
        int len = strlen(key);
        Node *pCrawl = pT->root;
        for (int lv = 0; lv < len; lv++)
        {
            int id = key[lv] - 'a';
            if (!pCrawl->children[id]) return false;
            pCrawl = pCrawl->children[id];
        }
        return (pCrawl && pCrawl->val);
    }
public:
    TrieInsertAndSearch()
    {
        char keys[][8] = {"the", "a", "there", "answer", "any", "by", "bye", "their"};
        pT = new Tree(0, new Node);
        int n = sizeof(keys) / sizeof(keys[0]);
        for (int i = 0; i < n; i++)
        {
            insert(keys[i]);
        }

        // Search for different keys
        if (search("the")) printf("the is in Trie\n");
        else printf("the is not in Trie\n");
        if (search("these")) printf("these is in Trie\n");
        else printf("these is not in Trie\n");
        if (search("their")) printf("their is in Trie\n");
        else printf("their is not in Trie\n");
        if (search("thaw")) printf("thaw is in Trie\n");
        else printf("thaw is not in Trie\n");
    }
    ~TrieInsertAndSearch()
    {
        if (pT) delete pT;
        pT = nullptr;
    }
};

    原文作者:Trie树
    原文地址: https://blog.csdn.net/kenden23/article/details/24453639
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞