KMP 算法 (Knuth-Morris-Pratt 字符串匹配算法)


先给代码,有时间了再回来补注释和算法说明。

---------------------------------------------------------------------------

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const int * get_prefix(const char * P)
{
    int * pi = (int *)malloc(sizeof(int) * strlen(P));
    pi[0] = -1;
    int i = 1;
    int j = -1;
    while (P[i])
    {
        while (j >= 0 && P[j + 1] != P[i])
        {
            j = pi[j];
        }
        if (P[j + 1] == P[i])
        {
            ++j;
        }
        pi[i] = j;
        ++i;
    }
    return pi;
}

void kmp_match(const char * T, const char * P)
{
    const int * pi = get_prefix(P);
    int i = 0;
    int j = -1;
    while (T[i])
    {
        while (j >= 0 && P[j + 1] != T[i])
        {
            j = pi[j];
        }
        if (P[j + 1] == T[i])
        {
            ++j;
        }
        if (0 == P[j + 1])
        {
            printf("%s/n", T + i - j);
            j = pi[j];
        }
        ++i;
    }
    free(pi);
}

int main(int argc, char * argv[])
{
    kmp_match("abcdabcdabcdabcd", "abc");

    return 0;
}


参考:《算法导论》

---------------------------------------------------------------------------

/*
 * Knuth-Morris-Pratt 字符串匹配算法的三种实现。
 * 匹配部分都一样,差异只在求 next 数组。:)
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
 * 实现一
 */
char * kmp1(char * content, char * pattern)
{
    int i;
    int j;
    int len;
    int * next;

    if (NULL == content || NULL == pattern)
    {
        return NULL;
    }

    len = strlen(pattern);
    next = (int *)malloc(len * sizeof(int));

    /* Get the "next" array. */
    next[0] = -1;
    for (i = 1; pattern[i] != 0; ++i)
    {
        j = next[i - 1];
        while (pattern[i - 1] != pattern[j] && j >= 0)
        {
            j = next[j];
        }
        next[i] = j + 1;
    }

    /* Match. */
    i = 0;
    j = 0;
    while (content[i] && pattern[j])
    {
        if (content[i] == pattern[j])
        {
            ++i;
            ++j;
        }
        else
        {
            j = next[j];
            if (-1 == j)
            {
                ++i;
                ++j;
            }
        }
    }

    free(next);

    if (pattern[j])
    {
        return NULL;
    }
    else
    {
        return &content[i - j];
    }
}

/*
 * 实现二
 */
char * kmp2(char * content, char * pattern)
{
    int i;
    int j;
    int len;
    int * next;

    if (NULL == content || NULL == pattern)
    {
        return NULL;
    }

    len = strlen(pattern);
    next = (int *)malloc(len * sizeof(int));

    /* Get the "next" array. */
    next[0] = -1;
    i = 0;
    j = -1;
    while (pattern[i])
    {
        if (-1 == j || pattern[i] == pattern[j])
        {
            ++i;
            ++j;
            next[i] = j;
        }
        else
        {
            j = next[j];
        }
    }

    /* Match. */
    i = 0;
    j = 0;
    while (content[i] && pattern[j])
    {
        if (content[i] == pattern[j])
        {
            ++i;
            ++j;
        }
        else
        {
            j = next[j];
            if (-1 == j)
            {
                ++i;
                ++j;
            }
        }
    }

    free(next);

    if (pattern[j])
    {
        return NULL;
    }
    else
    {
        return &content[i - j];
    }
}

/*
 * 实现三
 *
 * 实现二的改进,改进处见注释。
 */
char * kmp3(char * content, char * pattern)
{
    int i;
    int j;
    int len;
    int * next;

    if (NULL == content || NULL == pattern)
    {
        return NULL;
    }

    len = strlen(pattern);
    next = (int *)malloc(len * sizeof(int));

    /* Get the "next" array. */
    next[0] = -1;
    i = 0;
    j = -1;
    while (pattern[i])
    {
        if (-1 == j || pattern[i] == pattern[j])
        {
            ++i;
            ++j;

            /* 此处是对实现二的改进。 */
            if (pattern[i] == pattern[j])
            {
                next[i] = next[j];
            }
            else
            {
                next[i] = j;
            }
        }
        else
        {
            j = next[j];
        }
    }

    /* Match. */
    i = 0;
    j = 0;
    while (content[i] && pattern[j])
    {
        if (content[i] == pattern[j])
        {
            ++i;
            ++j;
        }
        else
        {
            j = next[j];
            if (-1 == j)
            {
                ++i;
                ++j;
            }
        }
    }

    free(next);

    if (pattern[j])
    {
        return NULL;
    }
    else
    {
        return &content[i - j];
    }
}

int main(int argc, char * argv[])
{
    printf("%s/n", kmp1(argv[1], argv[2]));
    printf("%s/n", kmp2(argv[1], argv[2]));
    printf("%s/n", kmp3(argv[1], argv[2]));

    return 0;
}


参考:
1. 《数据结构 (C 语言版)》,严蔚敏,吴伟民,P79-84
2. 字符串匹配的 KMP 算法详解
3. KMP

---------------------------------------------------------------------------
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值