转自:grep之字符串搜索算法Boyer-Moore由浅入深(比KMP快3-5倍)
public class Solution {
public String strStr(String haystack, String needle) {
int begin = bm(haystack, needle);
return begin == -1 ? null : haystack.substring(begin);
}
int bm(String str, String pattern) {
if (pattern == null || pattern.length() == 0)
return 0;
int slen = str.length(), plen = pattern.length();
char[] sarr = str.toCharArray(), parr = pattern.toCharArray();
// pretreatment
int[] bmBc = preBmBc(parr);
int[] bmGs = preBmGs(parr);
// find
int i, j = 0;
while (j <= slen - plen) {
for (i = plen - 1; i >= 0 && parr[i] == sarr[i + j]; i--)
;
if (i < 0) { // find
return j;
} else {
j += Math.max(bmBc[sarr[i + j]] - (plen - 1 - i), bmGs[i]);
}
}
return -1;
}
// bad char heuristic
int[] preBmBc(char[] pattern) {
int m = pattern.length;
int[] bmBc = new int[256];
for (int i = 0; i < 256; i++) {
bmBc[i] = m;
}
// set the rightest index of character pattern[i]
for (int i = 0; i < m - 1; i++) {
bmBc[pattern[i]] = m - 1 - i;
}
return bmBc;
}
// good suffix heuristic
int[] preBmGs(char[] pattern) {
int m = pattern.length;
int[] bmGs = new int[m];
int[] suff = suffix(pattern);
// case 3: not match
for (int i = 0; i < m; i++) {
bmGs[i] = m;
}
// case 2: partial match suffix
int j = 0;
for (int i = m - 1; i >= 0; i--) {
if (suff[i] == i + 1) {
for (; j < m - 1 - i; j++) {
if (bmGs[j] == m)
bmGs[j] = m - 1 - i;
}
}
}
// case 1: match suffix
for (int i = 0; i <= m - 2; i++) {
bmGs[m - 1 - suff[i]] = m - 1 - i;
}
return bmGs;
}
// compute suffix length at index i
int[] suffix(char[] pattern) {
int m = pattern.length;
int[] suff = new int[m];
suff[m - 1] = m;
for (int i = m - 2; i >= 0; i--) {
int j = i;
// compute suffix length at index i
while (j >= 0 && pattern[j] == pattern[m - 1 - (i - j)])
j--;
suff[i] = i - j;
}
return suff;
}
public static void main(String[] args) throws Exception {
String haystack = "mahtavaatalomaisemaomalomailuun";
String needle = "maoma";
System.out.println(new Solution().strStr(haystack, needle));
}
}