本文介绍多种字符串匹配的算法
1,朴素的字符串匹配。
使用循环来检查各个char是否相等,算法比较简单:
- int naive_string_match(string a,string b)
- {
- const char* stringa = a.c_str();
- const char* stringb = b.c_str();
- int a_len = a.length();
- int b_len = b.length();
- if ( a_len< b_len)
- {
- return -1;
- }
- int i;
- int j;
- for (i=0;i<(a_len-b_len+1);i++)
- {
- for (j=0;j<b_len;j++)
- {
- if (stringa[j+i]!=stringb[j])
- {
- break;
- }
- if (j == b_len-1)
- {
- return i;//match
- }
- }
- }
- return -1;
- }
2,KMP算法
通过预处理,计算出跳转表,以提高字符串匹配的效率,代码主要分为预处理和匹配两个部分,在预处理过程和匹配过程的算法非常类似
代码如下:
- int KMP(string a,string b)
- {
- const char* stringa = a.c_str();
- const char* stringb = b.c_str();
- int a_len = a.length();
- int b_len = b.length();
- //preprocessing
- int* p = new int[b_len];
- p[0]=-1;//index 0 is not used
- int temp=-1;
- int ii=1;
- for (;ii<b_len;ii++)
- {
- while (temp>0 && stringb[temp+1] != stringb[ii])
- {
- temp = p[temp];
- }
- if (stringb[temp+1] == stringb[ii])
- {
- temp++;
- }
- p[ii] = temp;
- }
- //matching
- int j=-1;
- int i;
- for (i=0;i<a_len;i++)
- {
- while (j>0&&stringb[j+1]!=stringa[i])
- {
- j=p[j];
- }
- if (stringb[j+1] == stringa[i])
- {
- j++;
- }
- if (j==(b_len-1))
- {
- delete[] p;
- return (i-b_len+1);
- }
- }
- delete[] p;
- return -1;
- }
3,寻找最大匹配
使用数组记录匹配的长度,记录下最大的匹配和匹配开始的位置,输出结果。
算法的复杂度是O(n×m)
代码如下:
- int find_biggest_macth(string a,string b)
- {
- const char* stringa = a.c_str();
- const char* stringb = b.c_str();
- int a_len = a.length();
- int b_len = b.length();
- int* matcharray = new int[a_len];
- memset(matcharray,0,sizeof(int)*a_len);
- int i;
- int j;
- int match_pos1=-1;
- int match_pos2=-1;
- int match_length=0;
- for (i=0;i<b_len;i++)
- {
- for (j=a_len-1;j>0;j–)
- {
- if (stringa[j] == stringb[i])
- {
- matcharray[j] = 1 + matcharray[j-1];
- if (match_length<matcharray[j])
- {
- match_length=matcharray[j];
- match_pos2=i-match_length+1;
- match_pos1=j-match_length+1;
- }
- }
- }
- if (stringa[0] == stringb[i])
- {
- matcharray[0] = 1;
- if (match_length<1)
- {
- match_length=1;
- match_pos2=i;
- match_pos1=0;
- }
- }
- }
- cout<<match_length<<” “<<match_pos1 <<” “<<match_pos2<<endl;
- return 0;
- }
4,Boyer Moore算法:
boyer moore算法和kmp算法类似,需要经过预处理,得到跳转表。不过计算跳转表的过程比kmp算法简单的多。
- int boyer_moore(string a,string b)
- {
- const char* stringa = a.c_str();
- const char* stringb = b.c_str();
- int a_len = a.length();
- int b_len = b.length();
- //preprocessing
- int skiptable[256];
- for (int h=0;h<b_len;h++)
- {
- int pos = stringb[h]-‘a’;
- skiptable[pos] = b_len-h;
- }
- //matching
- int i=0;
- while(i<a_len)
- {
- for (int j=b_len-1;j>=0;j–)
- {
- if (stringa[i+j]!= stringb[j])
- {
- int pos = stringb[j] – ‘a’;
- i += skiptable[pos];
- break;
- }
- if (j==0)//find
- {
- return i;
- }
- }
- }
- return -1;
- }