trie树与hash表的查找速度对比

#include <stdio.h>

#include <string.h>

#include <string>

#include <tr1/unordered_map>

#include "trie_tree.h"

using namespace std;

int trie_null(void * v, int f)

{

         return 0;

}

class StrHash

{

public:

         size_t operator()(const std::string &s) const

         {

                   unsigned int hash = 1315423911;

                   const char *str = s.c_str();

                   while (*str)

                   {

                            if (*str >= 'A' && *str <= 'Z')

                                     hash ^= ((hash << 5) + (*str++ + ('a' - 'A')) + (hash >> 2));

                            else

                                     hash ^= ((hash << 5) + (*str++) + (hash >> 2));

                   }

                   return (hash & 0x7FFFFFFF);

         }



};

class IgnoreCaseComparator

{

public:

         bool operator()(const std::string &s1, const std::string &s2) const

         {

                   return (strcasecmp(s1.c_str(), s2.c_str()) == 0);

         }

};

typedef std::tr1::unordered_map<const std::string, unsigned long, StrHash, IgnoreCaseComparator> NameTblmetaMap;

int main()

{

         trie_tree m_table;

         NameTblmetaMap m_tables;

         init_trie_tree(&m_table, 100000, trie_null, 0);

         for (long i = 11100000; i < 11110000; i++)

         {

                   char buf[20] =

                   { 0 };

                   sprintf(buf, "%ld", i);

                   m_tables[string(buf)] = i;

                   insert_trie_tree(&m_table, (unsigned char*) buf, (void*) i);

         }

         string *slist = new string[1000];

         char ** plist = new char*[1000];

         for (int i = 0; i < 1000; i++)

         {

                   plist[i] = new char[20];

                   sprintf(plist[i], "%ld", i + 11100000);

                   slist[i] = string(plist[i]);

         }

         struct timespec s, e;

         clock_gettime(CLOCK_REALTIME, &s);

         for (int i = 0; i < 100000; i++)

         {

                   for (int j = 0; j < 1000; j++)

                   {

                            get_trie_tree_value(&m_table, (unsigned char*) plist[j]);

                   }

         }

         clock_gettime(CLOCK_REALTIME, &e);

         printf("trie %lu\n",

                            (e.tv_sec - s.tv_sec) * 1000000000 + e.tv_nsec - s.tv_nsec);

         clock_gettime(CLOCK_REALTIME, &s);



         for (int i = 0; i < 100000; i++)

         {

                   for (int j = 0; j < 1000; j++)

                   {

                            m_tables.find(slist[j]);

                   }

         }

         clock_gettime(CLOCK_REALTIME, &e);

         printf("unordered_map %lu\n",

                            (e.tv_sec - s.tv_sec) * 1000000000 + e.tv_nsec - s.tv_nsec);

}

插入10000条记录,对其中的1000条记录进行查找,做100000轮。

Trie树的耗时为3251830320ns

Stl unordered_map耗时为7080112117ns

多次运行时间波动较小,可见此情况下trie树的速度超过unordered_map两倍

调小插入的记录数至1000时

Trie树的耗时为3195810157

Stl unordered_map耗时为7020927158

变化不大

分别注释掉trie树和unordered_map的插入,不做查找,然后插入11100000-11170000共7万条后,使用pmap查看内存占用

mapped: 19676K writeable/private: 3968K trie

mapped: 22480K writeable/private: 6776K unordered_map
二者实际占用内存只有4-6MB,都比较少,trie树相对更低一些,不过这个场景对trie树的内存占用是有很利的。

    原文作者:Trie树
    原文地址: https://blog.csdn.net/scythe666/article/details/52023303
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞