C++字符串查找算法(一)BM算法实现

#include <stdio.h>
#include "iostream"
#include <string.h>
#include <boost/shared_array.hpp>
#include <vector>

typedef unsigned char   uchar;  /* Short for unsigned char */
enum { alphabet_size = 256 };
inline int max(int x, int y)
{
    return (x > y ? x : y);
}
using namespace std;
class BM_Algorithm {
public:
    BM_Algorithm(const char* pattern , int pattern_length) {
        // init pattern string
        m_pattern_len = pattern_length;
        m_pattern.reset(new char[pattern_length]);
        memcpy(m_pattern.get(), pattern, pattern_length);

        // new a section memory for create bad char table and good suffix table
        int suffix_size = sizeof(int)*((m_pattern_len + 1) * 2 + alphabet_size);
        m_suff.reset(new int[suffix_size]);


        m_gsShift = m_suff.get() + m_pattern_len + 1;
        m_bcShift = m_gsShift + m_pattern_len + 1;

        compute_good_suffix_shifts(m_suff.get());
        compute_bad_character_shifts();

    }
    bool match(const char* text, size_t text_len ){
        int bcShift;
        int shift = m_pattern_len;
        int j = 0;
        const int plm1 = m_pattern_len - 1;
        const int tlmpl = text_len - m_pattern_len;
        int index = 0;
        while (j <= tlmpl)
        {
            int i;
            for (i = plm1; (i >= 0) && (m_pattern[i] == text[i + j]); --i) {}
            if (i < 0){
                return true;
            } else {
                bcShift = m_bcShift[(uchar)text[i + j]] - plm1 + i;
                shift = max(bcShift, m_gsShift[i]);
            }
            j += shift;
        }
        return false;
    }

private:
    void compute_suffixes(int* suff){
        const int   plm1 = m_pattern_len - 1;
        int            f = 0;
        int            g = plm1;
        int *const splm1 = suff + plm1;
        *splm1 = m_pattern_len;
        int i;
        for (i = m_pattern_len - 2; i >= 0; i--)
        {
            int tmp = *(splm1 + i - f);
            if (g < i && tmp < i - g)
                suff[i] = tmp;
            else
            {
                if (i < g)
                    g = i;
                f = i;
                while (g >= 0 && m_pattern[g] == m_pattern[g + plm1 - f])
                    g--;
                suff[i] = f - g;
            }
        }
    }

    void compute_good_suffix_shifts(int* suff) {
        compute_suffixes(suff);

        int *end = m_gsShift + m_pattern_len;
        int *k;
        for (k = m_gsShift; k < end; k++)
            *k = m_pattern_len;

        int tmp;
        int i;
        int j = 0;
        const int plm1 = m_pattern_len - 1;
        for (i = plm1; i > -1; i--)
        {
            if (suff[i] == i + 1)
            {
                for (tmp = plm1 - i; j < tmp; j++)
                {
                    int *tmp2 = m_gsShift + j;
                    if (*tmp2 == m_pattern_len)
                        *tmp2 = tmp;
                }
            }
        }

        int *tmp2;
        for (tmp = plm1 - i; j < tmp; j++)
        {
            tmp2 = m_gsShift + j;
            if (*tmp2 == m_pattern_len)
                *tmp2 = tmp;
        }

        tmp2 = m_gsShift + plm1;
        for (i = 0; i <= m_pattern_len - 2; i++)
            *(tmp2 - suff[i]) = plm1 - i;
    }

    void compute_bad_character_shifts(){
        int *i;
        int *end = m_bcShift + alphabet_size;
        int j;
        const int plm1 = m_pattern_len - 1;
        for (i = m_bcShift; i < end; i++)
            *i = m_pattern_len;

        for (j = 0; j < plm1; j++)
            m_bcShift[(uchar)m_pattern[j]] = plm1 - j;
    }

private:

    boost::shared_array<char> m_pattern;
    int m_pattern_len;

    // a section of space for create good suffix table and bad character table
    boost::shared_array<int> m_suff;

    // good suffix skip table
    int* m_gsShift;

    // bad character skip table
    int* m_bcShift;
};

int main() {
    std::string src = "hello world";
    std::string pattern = "world";
    BM_Algorithm bm(pattern.c_str(),pattern.length());
    std::cout << bm.match(src.c_str(), src.length())<< std::endl;
    return 0;
}
    原文作者:查找算法
    原文地址: https://blog.csdn.net/Night_ZW/article/details/54573502
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞