#include <stdio.h>
#include "iostream"
#include <string.h>
#include <boost/shared_array.hpp>
#include <vector>
typedef unsigned char uchar; /* Short for unsigned char */
enum { alphabet_size = 256 };
inline int max(int x, int y)
{
return (x > y ? x : y);
}
using namespace std;
class BM_Algorithm {
public:
BM_Algorithm(const char* pattern , int pattern_length) {
// init pattern string
m_pattern_len = pattern_length;
m_pattern.reset(new char[pattern_length]);
memcpy(m_pattern.get(), pattern, pattern_length);
// new a section memory for create bad char table and good suffix table
int suffix_size = sizeof(int)*((m_pattern_len + 1) * 2 + alphabet_size);
m_suff.reset(new int[suffix_size]);
m_gsShift = m_suff.get() + m_pattern_len + 1;
m_bcShift = m_gsShift + m_pattern_len + 1;
compute_good_suffix_shifts(m_suff.get());
compute_bad_character_shifts();
}
bool match(const char* text, size_t text_len ){
int bcShift;
int shift = m_pattern_len;
int j = 0;
const int plm1 = m_pattern_len - 1;
const int tlmpl = text_len - m_pattern_len;
int index = 0;
while (j <= tlmpl)
{
int i;
for (i = plm1; (i >= 0) && (m_pattern[i] == text[i + j]); --i) {}
if (i < 0){
return true;
} else {
bcShift = m_bcShift[(uchar)text[i + j]] - plm1 + i;
shift = max(bcShift, m_gsShift[i]);
}
j += shift;
}
return false;
}
private:
void compute_suffixes(int* suff){
const int plm1 = m_pattern_len - 1;
int f = 0;
int g = plm1;
int *const splm1 = suff + plm1;
*splm1 = m_pattern_len;
int i;
for (i = m_pattern_len - 2; i >= 0; i--)
{
int tmp = *(splm1 + i - f);
if (g < i && tmp < i - g)
suff[i] = tmp;
else
{
if (i < g)
g = i;
f = i;
while (g >= 0 && m_pattern[g] == m_pattern[g + plm1 - f])
g--;
suff[i] = f - g;
}
}
}
void compute_good_suffix_shifts(int* suff) {
compute_suffixes(suff);
int *end = m_gsShift + m_pattern_len;
int *k;
for (k = m_gsShift; k < end; k++)
*k = m_pattern_len;
int tmp;
int i;
int j = 0;
const int plm1 = m_pattern_len - 1;
for (i = plm1; i > -1; i--)
{
if (suff[i] == i + 1)
{
for (tmp = plm1 - i; j < tmp; j++)
{
int *tmp2 = m_gsShift + j;
if (*tmp2 == m_pattern_len)
*tmp2 = tmp;
}
}
}
int *tmp2;
for (tmp = plm1 - i; j < tmp; j++)
{
tmp2 = m_gsShift + j;
if (*tmp2 == m_pattern_len)
*tmp2 = tmp;
}
tmp2 = m_gsShift + plm1;
for (i = 0; i <= m_pattern_len - 2; i++)
*(tmp2 - suff[i]) = plm1 - i;
}
void compute_bad_character_shifts(){
int *i;
int *end = m_bcShift + alphabet_size;
int j;
const int plm1 = m_pattern_len - 1;
for (i = m_bcShift; i < end; i++)
*i = m_pattern_len;
for (j = 0; j < plm1; j++)
m_bcShift[(uchar)m_pattern[j]] = plm1 - j;
}
private:
boost::shared_array<char> m_pattern;
int m_pattern_len;
// a section of space for create good suffix table and bad character table
boost::shared_array<int> m_suff;
// good suffix skip table
int* m_gsShift;
// bad character skip table
int* m_bcShift;
};
int main() {
std::string src = "hello world";
std::string pattern = "world";
BM_Algorithm bm(pattern.c_str(),pattern.length());
std::cout << bm.match(src.c_str(), src.length())<< std::endl;
return 0;
}
C++字符串查找算法(一)BM算法实现
原文作者:查找算法
原文地址: https://blog.csdn.net/Night_ZW/article/details/54573502
本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
原文地址: https://blog.csdn.net/Night_ZW/article/details/54573502
本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。