SCU-4438 Censor(字符串哈希||KMP)

2023年3月25日 371次阅读来源: KMP算法

Censor

frog is now a editor to censor so-called sensitive words (敏感词).

She has a long text pp. Her job is relatively simple — just to find the first occurence of sensitive word ww and remove it.

frog repeats over and over again. Help her do the tedious work.

Input

The input consists of multiple tests. For each test:

The first line contains 11 string ww. The second line contains 11 string pp.

(1≤length of w,p≤5⋅1061≤length of w,p≤5⋅106, w,pw,p consists of only lowercase letter)

Output

For each test, write 11 string which denotes the censored text.

Sample Input

    abc
    aaabcbc
    b
    bbb
    abc
    ab

Sample Output

    a
    
    ab

题意：在每个样例的第二个字符串中找第一个模式串删除之.注意可能缩减后有产生新的模式串.

思路：当时写的KMP,用KMP匹配的方法，当匹配到模式串之后,再从删除串的上一个字符处往下搜寻,这里需要对匹配过程中的变量值有一个深刻的理解.

还可以用字符串hash的方法,先计算模式串的hash值，再在主串中不断计算与模式串相等长度的子串的hash值，相等时就删除，再从删除串的前一个字符开始继续往下哈希即可.

KMP代码：

#include<bits/stdc++.h>
#define mem(a,b) memset(a,b,sizeof(a))
#define mod 1000000007
using namespace std;
typedef unsigned long long ll;
const int maxn = 5e6+5;
const double eps = 1e-12;
const int inf = 0x3f3f3f3f;
map<int,int>::iterator it;

int Next[maxn];
char x[maxn]; 
char y[maxn];
char ans[maxn];

void get_next()//计算模式串next值
{
	int len = strlen(x);
	int j = Next[0] = -1;
	int i = 0;
	while(i < len)
	{
		while(-1 != j && x[i] != x[j]) j = Next[j];
		Next[++i] = ++j;
	}
}

void solve()
{
	stack < pair<char,int> > s;
	int i = 0;
	int j = 0;

	int m = strlen(x); 
	int n = strlen(y);
	while(i < n)
	{
	     while(-1 != j && y[i] != x[j]) j = Next[j];
	     j++;
	     s.push(make_pair(y[i],j));//记录答案和j值
	     i++;
	     if(j >= m)
	     {
	     	int k = m;
	     	while(k--)
	     		s.pop();
	     	if(s.empty()) j = 0;
	     	else j = s.top().second;//更新j值
	     }
	}
	int cnt = 0;
	while(!s.empty())
		ans[++cnt] = s.top().first,s.pop();
	
	for(int i = cnt;i>= 1;i--)
		printf("%c",ans[i]);
	printf("\n");
}

int main()
{
    while(~scanf("%s %s",x,y))
    {
    	get_next();
    	solve();
    }

    return 0;
}

Hash代码:

#include<bits/stdc++.h>
#define mem(a,b) memset(a,b,sizeof(a))
#define mod 1000000007
using namespace std;
typedef unsigned long long ll;
const int maxn = 5e6+5;
const double eps = 1e-12;
const int inf = 0x3f3f3f3f;
map<int,int>::iterator it;

ll bas = 131;
char s[maxn],t[maxn];
char ans[maxn];

int main()
{
	while(~scanf(" %s %s",s,t))
	{
		stack<ll> q;
		ll st = 1,cur = 0,p = 0;
		int len1 = strlen(s);
		int len2 = strlen(t);
		for(int i = 1;i<= len1;i++) st*= bas;
		for(int i = 0;i< len1;i++) p = p*bas+s[i];//计算模式串哈希值
		q.push(0);//压入初始值
		int cnt = 0;
		for(int i = 0;i< len2;i++)
		{
			ans[cnt] = t[i];
			cur = cur*bas+t[i];
			if(cnt>= len1-1)
			{
				if(cnt>= len1)
					cur = cur-st*ans[cnt-len1];
				q.push(cur);
				if(cur == p)//找到相同子串
				{
					for(int j = 0;j< len1;j++)
						q.pop();
					cnt-= len1;
					cur = q.top();
				}
			}
			else
				q.push(cur);
			cnt++;
		}
		for(int i = 0;i< cnt;i++)
			printf("%c",ans[i]);
		printf("\n");
	}
	
	return 0;
}

    原文作者：KMP算法
    原文地址: https://blog.csdn.net/nka_kun/article/details/81258546
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。