字符串hash 代替kmp/判断字符串是否出现过 洛谷 P3375 P3370

先KMP
如果出题人不卡hash的话只模一次应该问题不会太大。。。貌似数据范围到达模数开根值以上时碰撞会变得可能性大一点
我选了1e8的模数,10000的数据刚好够(是指10000个不同的字符串)
实在不行就双hash…对每个保存两个hash值,hash1 hash2 只有两个串hash1 hash2都相等时才能判断他们相同
具体操作就是把第一次hash的代码复制一下改一下,注意看清楚模数。。。但是会慢一倍
别取常见质数。。打个质数表背一背常用的

#include <algorithm>
#include <iostream>
#include <cstring>
#include <cstdio>
#include <map>
using namespace std;
#define debug(x) cerr << #x << "=" << x << endl;
const int MAXN = 1000000 + 10;
const int MOD3 = 100000123;
int l=1,r=0,k,n,mians,sum[MAXN],t[MAXN],dif[MAXN],h,next[MAXN];
int g,ans[MAXN],tot;
string a,b;
char c[MAXN],d[MAXN];
typedef unsigned long long ll;
void initNext(string p) {
    int len = p.length();
    memset(next, -1, sizeof(next));
    for (int i = 1, j = -1; i < len; i++) {
        while (j != -1 && p[i] != p[j + 1]) j = next[j];
        if (p[i] == p[j + 1]) j++;
        next[i] = j;
    }
}
int KMP(string p, string t) { 
    int m = p.length(), n = t.length(), res = 0;
    for (int i = 0, j = -1; i < n; i++) {
        while (j != -1 && t[i] != p[j + 1]) j = next[j];
        if (t[i] == p[j + 1]) j++;
        if (j == m - 1) j = next[j], res++;
    }
    return res;
}
long long prefix[MAXN];
int main() {
    cin >> a >> b;
    int ls1 = a.length(), ls2 = b.length(); 
    for(int i=0; i<a.length(); i++) {
        c[i+1] = a[i];
    }
    for(int i=0; i<b.length(); i++) {
        d[i+1] = b[i];
    }
    int p = 131;
    for(int i=1; i<=ls1; i++) {
        long long temp = prefix[i-1] * p + c[i];
        temp %= MOD3;
        prefix[i] = temp;
    }
    long long temp = 0;
    for(int i=1; i<=ls2; i++) {
        temp = (temp * p + d[i]) % MOD3;
    }
    int s2jud = temp;
    long long pn = 1;//这个pn一定要开long long 做这种题一定注意多开long long
    for(int i=1; i<=ls2; i++) {
        pn = pn * p % MOD3;//有乘就要小心爆int 大不了有乘法的算式都开成long long 然后乘pn是要补上一些没乘上的东西
    }
    for(int i=ls2; i<=ls1; i++) {
        int judd = (prefix[i] - prefix[i-ls2]*pn%MOD3 + MOD3) % MOD3;//减法取模是( a%p - b%p ) % p 注意这个%p是要取正余数
        if(judd == s2jud) {
            printf("%d\n", i-ls2+1);
        }
    }
    initNext(b);
    for(int i=0; i<b.length(); i++)
        cout << next[i]+1 << " ";   
    return 0;
}

双hash(慢一倍)

#include <algorithm>
#include <iostream>
#include <cstring>
#include <cstdio>
#include <map>
using namespace std;
#define debug(x) cerr << #x << "=" << x << endl;
const int MAXN = 1000000 + 10;
const int MOD3 = 100000037;
const int MOD7 = 100000073;
int l=1,r=0,k,n,mians,sum[MAXN],t[MAXN],dif[MAXN],h,next[MAXN];
int g,ans[MAXN],tot,ls1,ls2,p;
string a,b;
char c[MAXN],d[MAXN];
void initNext(string p) {
    int len = p.length();
    memset(next, -1, sizeof(next));
    for (int i = 1, j = -1; i < len; i++) {
        while (j != -1 && p[i] != p[j + 1]) j = next[j];
        if (p[i] == p[j + 1]) j++;
        next[i] = j;
    }
}
int KMP(string p, string t) { 
    int m = p.length(), n = t.length(), res = 0;
    for (int i = 0, j = -1; i < n; i++) {
        while (j != -1 && t[i] != p[j + 1]) j = next[j];
        if (t[i] == p[j + 1]) j++;
        if (j == m - 1) j = next[j], res++;
    }
    return res;
}
long long prefix1[MAXN], prefix2[MAXN];
void hash1(int MOD) {
    for(int i=1; i<=ls1; i++) {
        long long temp = prefix1[i-1] * p + c[i];
        temp %= MOD;
        prefix1[i] = temp;
    }
}
void hash2(int MOD) {
    for(int i=1; i<=ls1; i++) {
        long long temp = prefix2[i-1] * p + c[i];
        temp %= MOD;
        prefix2[i] = temp;
    }
}
int s2hash(int MOD) {
    long long temp = 0;
    for(int i=1; i<=ls2; i++) {
        temp = (temp * p + d[i]) % MOD;
    }
    return temp;
}
int main() {
    cin >> a >> b;
    ls1 = a.length(), ls2 = b.length(); 
    for(int i=0; i<a.length(); i++) {
        c[i+1] = a[i];
    }
    for(int i=0; i<b.length(); i++) {
        d[i+1] = b[i];
    }
    p = 131;

    hash1(MOD3);
    hash2(MOD7);

    int s2jud11 = s2hash(MOD3);
    int s2jud22 = s2hash(MOD7);

    long long pn = 1;//这个pn一定要开long long 做这种题一定注意多开long long
    long long pn2 = 1;
    for(int i=1; i<=ls2; i++) {
        pn = pn * p % MOD3;//有乘就要小心爆int 大不了有乘法的算式都开成long long 然后乘pn是要补上一些没乘上的东西
        pn2 = pn2 * p % MOD7;
    }
    for(int i=ls2; i<=ls1; i++) {
        int judd1 = (prefix1[i] - prefix1[i-ls2]*pn%MOD3 + MOD3) % MOD3;//减法取模是( a%p - b%p ) % p 注意这个%p是要取正余数
        int judd2 = (prefix2[i] - prefix2[i-ls2]*pn2%MOD7 + MOD7) % MOD7;
        if(judd1 == s2jud11 && judd2 == s2jud22) {
            printf("%d\n", i-ls2+1);
        }
    }
    initNext(b);
    for(int i=0; i<b.length(); i++)
        cout << next[i]+1 << " ";   
    return 0;
}

然后是判断是否出现过
若模数太大数组存不下可以把hash值存下来,然后排个序,扫一遍,看相邻两个是否相同

#include <algorithm>
#include <iostream>
#include <cstring>
#include <cstdio>
#include <map>
using namespace std;
#define debug(x) cerr << #x << "=" << x << endl;
const int MAXN = 1000000 + 10;
const int MOD3 = 100000123;
int l=1,r=0,k,n,mians,sum[MAXN],t[MAXN],dif[MAXN],h,next[MAXN];
int g,ans;
int vis[MOD3]; 
int p = 131;
char a[MAXN],d[MAXN];
int hash(char *a) {
    long long temp = 0;
    int len = strlen(a);
    for(int i=0; i<len; i++) 
        temp = (temp * p + a[i]) % MOD3;
    return temp;
}
int main() {
    scanf("%d", &n);
    for(int i=1; i<=n; i++) {
        scanf("%s", a);
        int t = hash(a);
        if(vis[t]) continue;
        else vis[t] = 1, ans++;
    }
    printf("%d", ans);
    return 0;
}
    原文作者:KMP算法
    原文地址: https://blog.csdn.net/Fantasy_World/article/details/81986743
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞