基于KMP算法的C++字符串帮助类

利用C++实现的字符串基本查找替换算法类.基于KMP算法. 7月8日第一版,效率不满意,7月9日进行持续优化,提高算法效率,测试场景下大大超过C++标准库中的方法的效率.

7月9日:
对7月8日的程序做了优化,优化的点集中于:
1. KMP算法中需要的array根据pattern来生成,则在pattern不变的情况下,反复查询该pattern的情况不应该每次再重新生成pattern. (实际应用场景: 类似在word文件中查找某个关键字出现的次数).
2. C++标准库中的string类的某些操作效率较低,不适合在大循环(千万次循环级别)中使用.可以将string先转换成const char*,之后利用指针对const char*操作.
3. 补充了replace, replaceall方法.并利用直接操作memory的方法(memset, memcpy)来实现.

可以看到,在我的测试用例下面,基于KMP的算法的字符串查找的效率比C++自带的方法提升了7倍左右.
《基于KMP算法的C++字符串帮助类》

优化后的程序:

#ifndef _STRING_HELPER_H_
#define _STRING_HELPER_H_
#include <iostream>
using namespace std;

class StringHelper
{
public:
    StringHelper(const string searchSpaceStr, const string patternStr);
    ~StringHelper();
    bool find();    
    bool find(unsigned int& count);
    bool find(unsigned int& count, unsigned int &firstStartLocation);
    bool replace(const string replaceToStr, string& replaceRs);
    bool replaceAll(const string replaceToStr, string& replaceRs);
private:
    bool findFirst(unsigned int &firstStartLocation);
    bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr);
    bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation);
    void getNext(const char *pStr, int *nextArr);
    int *nextArray;
    char* m_SearchSpaceChArray;
    char* m_PatternChArray;
};

#endif // !_STRING_HELPER_H_
#include "StringHelper.h"


StringHelper::StringHelper(const string searchSpaceStr, const string patternStr)
{
    nextArray = NULL;
    m_SearchSpaceChArray = NULL;
    m_PatternChArray = NULL;
    m_SearchSpaceChArray = new char[searchSpaceStr.length() + 1];
    memset(m_SearchSpaceChArray, 0, sizeof(char) * (searchSpaceStr.length() + 1));
    memcpy(m_SearchSpaceChArray, searchSpaceStr.c_str(), sizeof(char) * searchSpaceStr.length());
    m_PatternChArray = new char[patternStr.length() + 1];
    memset(m_PatternChArray, 0, sizeof(char) * (patternStr.length() + 1));
    memcpy(m_PatternChArray, patternStr.c_str(), sizeof(char) * patternStr.length());
    if (NULL == nextArray)
    {
        nextArray = new int[searchSpaceStr.length()];
        getNext(m_PatternChArray, nextArray);
    }
    else
    {
        delete[] nextArray;
        nextArray = NULL;
        nextArray = new int[searchSpaceStr.length()];
        getNext(m_PatternChArray, nextArray);
    }
}

StringHelper::~StringHelper()
{
    if (NULL != nextArray)
    {
        delete[] nextArray;
        nextArray = NULL;
    }
    if (NULL != m_SearchSpaceChArray)
    {
        delete[] m_SearchSpaceChArray;
        m_SearchSpaceChArray = NULL;
    }
    if (NULL != m_PatternChArray)
    {
        delete[] m_PatternChArray;
        m_PatternChArray = NULL;
    }
}


bool StringHelper::find()
{
    bool result = false;
    result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray);
    return result;
}

bool StringHelper::find(unsigned int& count)
{
    count = 0;
    bool result = false;
    unsigned int startLocation = 0;
    unsigned int stopLocation = 0;
    while (startLocation < strlen(m_SearchSpaceChArray))
    {
        result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, startLocation, stopLocation);
        if (result)
        {
            count++;
        }
        else
        {
            break;
        }
    }
    if (0 != count)
    {
        result = true;
    }
    return result;
}

bool StringHelper::find(unsigned int& count, unsigned int &firstStartLocation)
{
    unsigned int startLocation = 0;
    count = 0;
    firstStartLocation = 0;
    bool result = false;
    unsigned int stopLocation = 0;
    while (startLocation < strlen(m_SearchSpaceChArray))
    {
        result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, startLocation, stopLocation);
        if (result)
        {
            count++;
            if (1 == count)
            {
                firstStartLocation = startLocation;
            }
        }
        else
        {
            break;
        }
    }
    if (0 != count)
    {
        result = true;
    }
    return result;
}

bool StringHelper::replace(const string replaceToStr, string& replaceRs)
{
    unsigned int count = 0;
    unsigned int startLocation = 0;
    replaceRs.clear();
    bool rs = findFirst(startLocation);
    if (rs)
    {
        int len = strlen(m_SearchSpaceChArray) - strlen(m_PatternChArray) + strlen(replaceToStr.c_str()) + 1;
        char *replaceRsChArray = new char[len];
        memset(replaceRsChArray, 0, sizeof(char) * len);
        memcpy(replaceRsChArray, m_SearchSpaceChArray, sizeof(char) * startLocation);
        memcpy(&replaceRsChArray[startLocation], replaceToStr.c_str(), sizeof(char) * replaceToStr.length());
        unsigned int stopLocation = startLocation + strlen(m_PatternChArray);
        if (stopLocation < strlen(m_SearchSpaceChArray))
        {
            memcpy(&replaceRsChArray[startLocation + strlen(replaceToStr.c_str())], &m_SearchSpaceChArray[stopLocation], sizeof(char) * (strlen(m_SearchSpaceChArray) - stopLocation));
        }
        replaceRs.append(replaceRsChArray);
    }

    return rs;
}

bool StringHelper::replaceAll(const string replaceToStr, string& replaceRs)
{
    bool result = false;
    unsigned int startLocation = 0;
    unsigned int stopLocation = 0;
    char* tempChArray = NULL;
    char* swapTempChArray = NULL;
    int count = 0;
    replaceRs.clear();
    while (startLocation < strlen(m_SearchSpaceChArray))
    {
        int lastStopPos = stopLocation;
        result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, startLocation, stopLocation);
        if (result)
        {
            count++;
            int len = 0;
            if (NULL == tempChArray)
            {
                len = startLocation + replaceToStr.length() + 1;
                tempChArray = new char[len];
                swapTempChArray = new char[len];
                if (NULL == tempChArray || NULL == swapTempChArray)
                {
                    return false;
                }               
                memset(tempChArray, 0, sizeof(char) * len);
                memset(swapTempChArray, 0, sizeof(char) * len);
                memcpy(tempChArray, m_SearchSpaceChArray, sizeof(char) * startLocation);
                memcpy(&tempChArray[startLocation], replaceToStr.c_str(), sizeof(char) * replaceToStr.length());
                memcpy(swapTempChArray, tempChArray, sizeof(char) * strlen(tempChArray));
            }
            else
            {
                int firstPartLen = strlen(tempChArray);
                len = strlen(tempChArray) + startLocation - lastStopPos + replaceToStr.length() + 1;
                delete[] tempChArray;
                tempChArray = NULL;
                tempChArray = new char[len];
                memset(tempChArray, 0, sizeof(char) * len);
                memcpy(tempChArray, swapTempChArray, sizeof(char) * strlen(swapTempChArray));
                memcpy(&tempChArray[strlen(tempChArray)], &m_SearchSpaceChArray[lastStopPos], sizeof(char) * (startLocation - lastStopPos));
                int lenlen = strlen(tempChArray);
                memcpy(&tempChArray[strlen(tempChArray)], replaceToStr.c_str(), sizeof(char) * replaceToStr.length());
                delete[] swapTempChArray;
                swapTempChArray = NULL;
                swapTempChArray = new char[len];
                memset(swapTempChArray, 0, len);
                memcpy(swapTempChArray, tempChArray, sizeof(char) * strlen(tempChArray));
            }
        }
        else
        {
            replaceRs.append(tempChArray);
            replaceRs.append(&m_SearchSpaceChArray[lastStopPos]);
            if (NULL != swapTempChArray)
            {
                delete[] swapTempChArray;
                swapTempChArray = NULL;
            }
            if (NULL != tempChArray)
            {
                delete[] tempChArray;
                tempChArray = NULL;
            }
            break;
        }
    }
    return result;
    return true;
}


bool StringHelper::findFirst(unsigned int & firstStartLocation)
{
    bool result = false;
    firstStartLocation = 0;
    unsigned int stopLocation = 0;
    result = kmpMapping(m_SearchSpaceChArray, m_PatternChArray, firstStartLocation, stopLocation);
    return result;
}

bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr)
{
    if (NULL == nextArray)
    {
        return false;
    }

    int i = 0;
    int j = 0;

    int searchSpaceLength = strlen(pSearchSpaceStr);
    int patternStrLength = strlen(pPatternStr);

    while (i < searchSpaceLength && j < patternStrLength)
    {
        if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j])
        {
            i++;
            j++;
        }
        else
        {
            j = nextArray[j];
        }
    }

    if (j == patternStrLength)
    {
        return true;
    }
    return false;
}

bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation)
{
    if (NULL == nextArray)
    {
        return false;
    }

    int i = stopLocation;
    int j = 0;

    int searchSpaceLength = strlen(pSearchSpaceStr);
    int patternStrLength = strlen(pPatternStr);

    while (i < searchSpaceLength && j < patternStrLength)
    {
        if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j])
        {
            i++;
            j++;
        }
        else
        {
            j = nextArray[j];
        }
    }

    if (j == patternStrLength)
    {
        startLocation = i - j;  //start pos
        stopLocation = i;
        return true;
    }
    return false;
}

//nextArr means while current position compare failed, the compare should start at which position
void StringHelper::getNext(const char *pStr, int *nextArr)
{
    int i = 0, k = -1, pLen = strlen(pStr);
    nextArr[i] = k;
    int mLen = pLen - 1;
    while (i < mLen)
    {
        if (k == -1 || pStr[i] == pStr[k])
        {
            i++;
            k++;
            if (pStr[i] == pStr[k])
            {
                nextArr[i] = nextArr[k];
            }
            else
            {
                nextArr[i] = k;
            }
        }
        else
            k = nextArr[k];
    }
}

优化前的代码:

#ifndef _STRING_HELPER_H_
#define _STRING_HELPER_H_
#include <iostream>
using namespace std;

class StringHelper
{
public:
    StringHelper();
    ~StringHelper();
    bool find(const string searchSpaceStr, const string patternStr);    
    bool find(const string searchSpaceStr, const string patternStr, unsigned int& count);
    bool find(const string searchSpaceStr, const string patternStr, unsigned int& count, unsigned int &firstStartLocation);
    bool replace(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs);
    bool replaceAll(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs);
private:
    bool findInit(const string searchSpaceStr, const string patternStr, char **pSearchSpaceChArray, char **pPatternChArray);
    bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr);
    bool kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation);
    void getNext(const char *pStr, int *nextArr);
    int *nextArray;
};

#endif // !_STRING_HELPER_H_
#include "StringHelper.h"


StringHelper::StringHelper()
{
    nextArray = NULL;
}

StringHelper::~StringHelper()
{
    if (NULL != nextArray)
    {
        delete[] nextArray;
        nextArray = NULL;
    }
}

bool StringHelper::find(const string searchSpaceStr, const string patternStr)
{
    bool result = false;
    char **pSearchSpaceChArray = new char*;
    char **pPatternChArray = new char*;
    result = findInit(searchSpaceStr, patternStr, pSearchSpaceChArray, pPatternChArray);
    if (result)
        result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray);

    if (*pSearchSpaceChArray != NULL)
    {
        delete[] *pSearchSpaceChArray;
        *pSearchSpaceChArray = NULL;
        if (pSearchSpaceChArray != NULL)
        {
            delete pSearchSpaceChArray;
            pSearchSpaceChArray = NULL;
        }
    }
    if (*pPatternChArray != NULL)
    {
        delete[] * pPatternChArray;
        *pPatternChArray = NULL;
        if (pPatternChArray != NULL)
        {
            delete pPatternChArray;
            pPatternChArray = NULL;
        }
    }
    return result;
}


bool StringHelper::find(const string searchSpaceStr, const string patternStr, unsigned int& count)
{
    count = 0;
    bool result = false;
    char **pSearchSpaceChArray = new char*;
    char **pPatternChArray = new char*;
    result = findInit(searchSpaceStr, patternStr, pSearchSpaceChArray, pPatternChArray);
    if (result)
    {
        unsigned int startLocation = 0;
        unsigned int stopLocation = 0;
        while (startLocation < strlen(*pSearchSpaceChArray))
        {
            result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray, startLocation, stopLocation);
            if (result)
            {
                count++;
            }
            else
            {
                break;
            }
        }
    }
    if (0 != count)
    {
        result = true;
    }
    return result;
}

bool StringHelper::find(const string searchSpaceStr, const string patternStr, unsigned int& count, unsigned int &firstStartLocation)
{
    unsigned int startLocation = 0;
    count = 0;
    firstStartLocation = 0;
    bool result = false;
    char **pSearchSpaceChArray = new char*;
    char **pPatternChArray = new char*;
    result = findInit(searchSpaceStr, patternStr, pSearchSpaceChArray, pPatternChArray);
    if (result)
    {
        unsigned int startLocation = 0;
        unsigned int stopLocation = 0;
        while (startLocation < strlen(*pSearchSpaceChArray))
        {
            result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray, startLocation, stopLocation);
            if (result)
            {
                count++;
                if (1 == count)
                {
                    firstStartLocation = startLocation;
                }
            }
            else
            {
                break;
            }
        }
    }
    if (0 != count)
    {
        result = true;
    }
    return result;
}

bool StringHelper::replace(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs)
{
    unsigned int count = 0;
    unsigned int startLocation = 0;
    replaceRs.clear();
    bool rs = find(searchSpaceStr, needReplaceStr, count, startLocation);
    if (rs)
    {
        replaceRs.append(searchSpaceStr.substr(0, startLocation));
        replaceRs.append(replaceToStr);
        unsigned int stopLocation = startLocation + strlen(needReplaceStr.c_str());
        if (stopLocation < searchSpaceStr.length())
        {
            replaceRs.append(searchSpaceStr.substr(stopLocation, searchSpaceStr.length()-stopLocation));
        }
    }
    return rs;
}

bool StringHelper::replaceAll(const string searchSpaceStr, const string needReplaceStr, const string replaceToStr, string& replaceRs)
{
    replaceRs.clear();
    int count = 0;
    int lastTimeStopLocation = 0;
    bool result = false;
    char **pSearchSpaceChArray = new char*;
    char **pPatternChArray = new char*;
    result = findInit(searchSpaceStr, needReplaceStr, pSearchSpaceChArray, pPatternChArray);
    if (result)
    {
        unsigned int startLocation = 0;
        unsigned int stopLocation = 0;
        while (startLocation < strlen(*pSearchSpaceChArray))
        {
            result = kmpMapping(*pSearchSpaceChArray, *pPatternChArray, startLocation, stopLocation);
            if (result)
            {
                if (lastTimeStopLocation != startLocation)
                {
                    replaceRs.append(searchSpaceStr.substr(lastTimeStopLocation, startLocation - lastTimeStopLocation));
                }               
                replaceRs.append(replaceToStr);
                lastTimeStopLocation = stopLocation;
                count++;
            }
            else
            {
                if (stopLocation < searchSpaceStr.length())
                {
                    replaceRs.append(searchSpaceStr.substr(stopLocation, searchSpaceStr.length() - stopLocation));
                }
                break;
            }
        }
    }
    if (0 != count)
    {
        result = true;
    }
    return result;
}

bool StringHelper::findInit(const string searchSpaceStr, const string patternStr, 
    char **pSearchSpaceChArray, char **pPatternChArray)
{
    bool result = false;
    *pSearchSpaceChArray = new char[strlen(searchSpaceStr.c_str())+1];
    *pPatternChArray = new char[strlen(patternStr.c_str())+1];
    strcpy(*pSearchSpaceChArray, searchSpaceStr.c_str());
    strcpy(*pPatternChArray, patternStr.c_str());
    if (0 == strlen(*pSearchSpaceChArray) || 0 == strlen(*pPatternChArray))
    {
        result = false;
    }
    else
    {
        result = true;
        if (NULL == nextArray)
        {
            nextArray = new int[strlen(*pPatternChArray)];
            getNext(*pPatternChArray, nextArray);
        }
        else
        {
            delete[] nextArray;
            nextArray = NULL;
            nextArray = new int[strlen(*pPatternChArray)];
            getNext(*pPatternChArray, nextArray);
        }
    }

    return result;
}


bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr)
{
    if (NULL == nextArray)
    {
        return false;
    }

    int i = 0;
    int j = 0;

    int searchSpaceLength = strlen(pSearchSpaceStr);
    int patternStrLength = strlen(pPatternStr);

    while (i < searchSpaceLength && j < patternStrLength)
    {
        if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j])
        {
            i++;
            j++;
        }
        else
        {
            j = nextArray[j];
        }
    }

    if (j == patternStrLength)
    {
        return true;
    }
    return false;
}

bool StringHelper::kmpMapping(const char* pSearchSpaceStr, const char* pPatternStr, unsigned int &startLocation, unsigned int &stopLocation)
{
    if (NULL == nextArray)
    {
        return false;
    }

    int i = stopLocation;
    int j = 0;

    int searchSpaceLength = strlen(pSearchSpaceStr);
    int patternStrLength = strlen(pPatternStr);

    while (i < searchSpaceLength && j < patternStrLength)
    {
        if (j == -1 || pSearchSpaceStr[i] == pPatternStr[j])
        {
            i++;
            j++;
        }
        else
        {
            j = nextArray[j];
        }
    }

    if (j == patternStrLength)
    {
        startLocation = i - j;  //start pos
        stopLocation = i;
        return true;
    }
    return false;
}

//nextArr means while current position compare failed, the compare should start at which position
void StringHelper::getNext(const char *pStr, int *nextArr)
{
    int i = 0, k = -1, pLen = strlen(pStr);
    nextArr[i] = k;
    int mLen = pLen - 1;
    while (i < mLen)
    {
        if (k == -1 || pStr[i] == pStr[k])
        {
            i++;
            k++;
            if (pStr[i] == pStr[k])
            {
                nextArr[i] = nextArr[k];
            }
            else
            {
                nextArr[i] = k;
            }
        }
        else
            k = nextArr[k];
    }
}
    原文作者:KMP算法
    原文地址: https://blog.csdn.net/luanzheng_365/article/details/74784635
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞