评严蔚敏《数据结构》中的KMP算法

2023年12月18日 364次阅读来源: KMP算法

最近老被KMP 算法给烦着，几经思考加探索加画图加验证加分析，终于在我的努力下，发现了书中一个重大的问题，它里面的KMP函数是化简了的，也就是说书上的解释与函数是不完全对应的，这可苦了那些绞尽脑汁苦苦思索的学生啊！

经我仔细考究，它的解释基本没问题，个别地方还是强调一下比较好。但是为什么只把化到非常简的函数放上去，而不把按照解释写出来的函数放上去呢？莫非作者偷懒？

不管怎么说，作者这样做是非常不合理的，写出来的解释与实际使用的函数不一致，虽然结果一样，但是学生的思维也会跟着走乱了。

我看到网上的代码大都是化简后的，依我看，十有八九都是看不懂的。

本来想把我依照书上说的写出来的代码弄上来，不过由于时间关系还是等下次吧！

任何代码都是由繁琐到精练的，企图一下子写出最精练的代码，完全是违背常理的。

最高效的代码未必容易看懂，还是从基本的开始吧！

———————————————————————————————————————————————

2009年2月20日星期五一个倒春寒的早上，经过一夜的思索，我终于把kmp的代码搞定了。

PS：这件事情自上次发表这篇文章，已经过去了几个月了，这段时间以来由于学业任务，一直没有时间研究这个问题。昨晚想了一下，没想出，今早又考虑了一段时间，得到了一些灵感，也把代码上的bug修正了，当然，这里指的是我的代码，书上的代码是没问题的，不过就是觉得它与书上的分析不能很好对应，是写给机器看的吧？？？？

好了，这里把我的代码发布一下，仅供参考，由于本人水平有限，希望各位能够不吝赐教。

以下代码都以输出next元素的值为目的。

一、先是书上的代码（为了方便显示next的值，我删减改动了部分代码）：

//用KMP算法对主串和模式串进行模式匹配。本题目给出部分代码，请补全内容。

#include “stdio.h”
#include “stdlib.h”
#include “iostream.h”

#define TRUE 1
#define FALSE 0

#define OK 1
#define ERROR 0

#define INFEASLBLE -1
#define OVERFLOW -2
#define MAXSTRLEN 255 //用户可在255以内定义最大串长

typedef unsigned char SString[MAXSTRLEN+1]; //0号单元存放串的长度

void get_next(SString T,int next[])
{
// 算法4.7
// 求模式串T的next函数值并存入数组next
// 请补全代码
int i,j;
i = 1;

next[1] = 0 ;

j = 0;

   while( i < T[0]-‘0’ )
   {
    if(j == 0 || T[i] == T[j])
    {
     ++i;
     ++j;

     //if(T[i] != T[j])//这里去掉注释后就变成了书上的改进算法
     next[i] = j;

     //else next[i] = next[j];//这里去掉注释后就变成了书上的改进算法
    }

    else j = next[j];
   }

}

int main()
{
SString S;

SString T = {‘8′,’a’,’a’,’c’,’a’,’b’,’a’,’a’,’c’};//,’c’};
//T[0]=i-1; // T[0]用于存储模式串中字符个数
//是char型啊！！！

int i,j,n,k = 0,posArray[100],l;
char ch;
int pos = 0; //pos!!!!!!!!要求模式串从pos个位置后开始匹配

int next[100];

get_next(T,next);

for(i = 1; i<=T[0]-‘0’;i++)
printf(“%d “,next[i]);

return 0;
}

二、这是我写的kmp非递归算法（都在一个main里）：

#include <stdio.h>

#define LEN 6

int main ()//这里应用的是分情况处理的思想
{
int j,k,i;
int next[LEN];
int flag = 0;//设标记是为了区分不同情况
char ch[LEN] = {‘0′,’a’,’a’,’a’,’a’,’b’};

next[1]=0;

   for (j=1;j<LEN;j++)
   {

    k = next[j];


    if(k>=2)
    {
     do {

       if(ch[k]==ch[j])
       {

        if(flag==0)
        {
          {}

        next[j+1] = next[j]+1;


        }


        if(flag==1)
        {

         {}

         next[j+1] = next[k]+1;

         flag = 0;


        }


       }





       else {

         k = next[k];

         if(k==1||k==0)
         break;


         flag = 1;


        }

      }while(ch[k] != ch[j]);


    }





    if(k == 0)

     next[j+1] = 1;

    if(k == 1)

    {
     {}

     if(ch[j] == ch[1])
      next[j+1] = 2;

     else next[j+1] = 1;


    }





   }

   for(i=1;i<LEN;i++)
   {
   printf(“%d “,next[i]);

   }

return 0;

}

三、这是我写的kmp非递归算法的改进版：

#include <stdio.h>

#define LEN 9

void validateNext(char ch[] , int next[],int len)
{
int i;
for(i = 2; i<=len;i++)//begin from 2
{
if(ch[i]==ch[ next[i] ])
next[i] = next[ next[i] ];

}

void getNext (char ch[],int next[],int len )
{
int j,k,i;
//int next[LEN];
int flag = 0;
//char ch[LEN] = {‘0′,’a’,’a’,’a’,’a’,’b’};

next[1]=0;

      }while(ch[k] != ch[j]);


    }





    if(k == 0)

     next[j+1] = 1;

    if(k == 1)

    {
     {}

     if(ch[j] == ch[1])
      next[j+1] = 2;

     else next[j+1] = 1;


    }





   }

// return 0;

}

int main()
{
int len,i;
int next[LEN];
//int flag = 0;
char ch[LEN] = {‘0′,’a’,’a’,’c’,’a’,’b’,’a’,’a’,’c’};

len = LEN ;

getNext(ch,next,len);

validateNext(ch,next,len);

for(i = 1;i<len;i++)
printf(“%d “,next[i]);

return 0;

}

四、这是我写的kmp递归算法：

#include <stdio.h>

#define LEN 9

//这要最终有return，就不怕陷入无限循环,大胆地递归吧！！！！

int getNext(char ch[] , int next[],int n)//n means the Number of next[]’s element

{

int j = n-1;
int k;

if(j != 0)//out of recursion
k = getNext(ch,next,j);

if( n == 1)
{
next[1] = 0;

return next[1];

}

if(n == 2)
{

next[2] = 1;

return next[2];

}

if(ch[k]==ch[j])

next[j+1] = next[j] +1;

else
{
k = getNext(ch,next,k);//recursion until ch[k]==ch[j]
next[j+1] = next[k] + 1; //
}

return next[j+1];
}

int main ()
{
int j,k,i;
int next[LEN];
int flag = 0;
char ch[LEN] = {‘0′,’a’,’a’,’c’,’a’,’b’,’a’,’a’,’c’};

//next[1]=0;

int len = LEN – 1;

getNext(ch,next,len);

   for(i=1;i<LEN;i++)
   {
   printf(“%d “,next[i]);

   }

return 0;

}

五、这是我写的kmp递归算法的改进版：

#include <stdio.h>

#define LEN 9

int getNext(char ch[] , int next[],int n)//n means the Number of next[]’s element

{

int j = n-1;
int k;

if(j != 0)//out of recursion
k = getNext(ch,next,j);

if( n == 1)
{
next[1] = 0;

return next[1];

}

if(n == 2)
{

next[2] = 1;

return next[2];

}

if(ch[k]==ch[j])

next[j+1] = next[j] +1;

else
{
k = getNext(ch,next,k);//recursion until ch[k]==ch[j]
next[j+1] = next[k] + 1; //
}

return next[j+1];
}

void validateNext(char ch[] , int next[],int len)
{
int i;
for(i = 2; i<=len;i++)//begin from 2
{
if(ch[i]==ch[ next[i] ])
next[i] = next[ next[i] ];

}

int main ()
{
int j,k,i;
int next[LEN];
int flag = 0;
char ch[LEN] = {‘0′,’a’,’a’,’c’,’a’,’b’,’a’,’a’,’c’};

//next[1]=0;

int len = LEN – 1;

getNext(ch,next,len);
validateNext(ch,next,len);

   for(i=1;i<LEN;i++)
   {
   printf(“%d “,next[i]);

   }

return 0;

}

注：留心的朋友应该会注意到，这里面多次使用了validateNext（）函数，这一点很关键，一开始我总是试图直接在getNext（）的基础上改进，后来发觉很困难，于是产生了分成两步完成的想法，一步先算出基础值，另一步再优化值，于是所有问题就迎刃而解了。这一点想法让我兴奋了很久。。。。。。YY咯。。。。。。。

PS：这里我用了validate这个单词不过后来查了一下，好像不合适，课本里用val，意思就是“修正”吧？

不过我查不到val的完整写法，自己还莫名其妙地用了validate，大家将就一下好了。。。。。。

———————————————————————————————————————————————

人活着，必定是为了思考。

我在思考中才能感知我的存在。

收藏于 2008-10-24

    原文作者：KMP算法
    原文地址: https://blog.csdn.net/pianistofsoftware/article/details/51984842
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。