主题:[原创]贴下自己写的KMP和BM算法的ANSI C实现
KMP是看的老严的书,然后按照自己的思路写的,与老严的略有不同。
BM是看的 http://www-igm.univ-mlv.fr/~lecroq/string/node14.html 这里面,然后按照自己的思路写的,但是他的代码我没看懂,晕ing
希望大家多多指点,多提点意见
char * IndexKMP(char *str,const char *ptrn);
char * IndexBM(char *str,const char *ptrn);
返回ptrn在str中出现的首位置,否则返回NULL
先帖KMP的代码:
#include <stdlib.h>
#define MAX_SIZE ( 30 )
char * IndexKMP(char *str, const char *ptrn)
{
int next[MAX_SIZE];
int len_pt;
int i,j;
if ((NULL == str) || (NULL == ptrn))
{
return NULL;
}
len_pt = 0;
while('\0' != ptrn[len_pt])
{
++len_pt;
}
//1.生成next数组
next[0] = -1;
j = -1;
for (i=1; i < len_pt; ++i)
{
while ((j != -1) && (ptrn[i-1] != ptrn[j]))
{
j = next[j];
}
next[i] = ++j;
if (ptrn[i] == ptrn[j])
{
next[i] = next[j];
}
}
/*严版风格
while (i < len_pt - 1)
{
if((-1 == j) || (ptrn[i] ==ptrn[j]))
{
if ( ptrn[++i] != ptrn[++j] )
{
next[i] = j;
}
else
{
next[i] = next[j];
}
}
else
{
j = next[j];
}
}
*/
//2.利用next数组匹配字符串
i = j = 0;
while (('\0' != str[i]) && (j < len_pt))
{
if ((-1 == j) || (str[i] == ptrn[j]))
{
++i;
++j;
}
else
{
j = next[j];
}
}
if (j == len_pt)
{
return str + i - j;
}
else
{
return NULL;
}
}
再帖BM的:
#include <stdlib.h>
#define CHAR_SET ( 256 )
#define MAX_SIZE ( 30 )
char * IndexBM(char *str, const char *ptrn)
{
int bad_char[CHAR_SET]; //坏字符数组,字符最右位置
int good_suff[MAX_SIZE]; //好后缀数组
int len_st; //主串长度
int len_pt; //模式串长度
int base; //对齐左基址
int shift; //滑动位数
const char *suffix = NULL;
const char *compare = NULL;
int i;
if ((NULL == str) || (NULL == ptrn))
{
return NULL;
}
//1.生成坏字符数组bad_char并获得字符串长度
for (i=0; i < CHAR_SET; ++i)
{
bad_char[i] = -1;
}
for (i=0; ptrn[i] != '\0'; ++i)
{
bad_char[(unsigned char)ptrn[i]] = i;
}
len_pt = i;
for (len_st = 0; str[len_st] != '\0'; ++len_st)
;
//2.生成好后缀数组good_suff
for (i=0; i < len_pt; ++i)
{
for (shift = 1; shift <= len_pt; ++shift)
{
if (shift > i)
{
suffix = ptrn + shift;
compare = ptrn;
}
else
{
suffix = ptrn + i + 1;
compare = ptrn + i - shift + 1;
if (ptrn[i] == ptrn[i - shift])
{
continue;
}
}
while (('\0' != *suffix) && (*compare == *suffix))
{
++compare;
++suffix;
}
if ('\0' == *suffix)
{
good_suff[i] = shift;
break;
}
}
}
//3.开始匹配
base = 0;
while ((base + len_pt) <= len_st)
{
i = len_pt - 1;
while ((-1 != i) && (ptrn[i] == str[base + i]))
{
--i;
}
if (-1 != i)
{ //有失配产生
shift = i - bad_char[str[base + i]];
shift = shift > good_suff[i] ? shift : good_suff[i];
base += shift;
}
else
{ //完全匹配
return str + base;
}
}
return NULL;
}
BM是看的 http://www-igm.univ-mlv.fr/~lecroq/string/node14.html 这里面,然后按照自己的思路写的,但是他的代码我没看懂,晕ing
希望大家多多指点,多提点意见
char * IndexKMP(char *str,const char *ptrn);
char * IndexBM(char *str,const char *ptrn);
返回ptrn在str中出现的首位置,否则返回NULL
先帖KMP的代码:
#include <stdlib.h>
#define MAX_SIZE ( 30 )
char * IndexKMP(char *str, const char *ptrn)
{
int next[MAX_SIZE];
int len_pt;
int i,j;
if ((NULL == str) || (NULL == ptrn))
{
return NULL;
}
len_pt = 0;
while('\0' != ptrn[len_pt])
{
++len_pt;
}
//1.生成next数组
next[0] = -1;
j = -1;
for (i=1; i < len_pt; ++i)
{
while ((j != -1) && (ptrn[i-1] != ptrn[j]))
{
j = next[j];
}
next[i] = ++j;
if (ptrn[i] == ptrn[j])
{
next[i] = next[j];
}
}
/*严版风格
while (i < len_pt - 1)
{
if((-1 == j) || (ptrn[i] ==ptrn[j]))
{
if ( ptrn[++i] != ptrn[++j] )
{
next[i] = j;
}
else
{
next[i] = next[j];
}
}
else
{
j = next[j];
}
}
*/
//2.利用next数组匹配字符串
i = j = 0;
while (('\0' != str[i]) && (j < len_pt))
{
if ((-1 == j) || (str[i] == ptrn[j]))
{
++i;
++j;
}
else
{
j = next[j];
}
}
if (j == len_pt)
{
return str + i - j;
}
else
{
return NULL;
}
}
再帖BM的:
#include <stdlib.h>
#define CHAR_SET ( 256 )
#define MAX_SIZE ( 30 )
char * IndexBM(char *str, const char *ptrn)
{
int bad_char[CHAR_SET]; //坏字符数组,字符最右位置
int good_suff[MAX_SIZE]; //好后缀数组
int len_st; //主串长度
int len_pt; //模式串长度
int base; //对齐左基址
int shift; //滑动位数
const char *suffix = NULL;
const char *compare = NULL;
int i;
if ((NULL == str) || (NULL == ptrn))
{
return NULL;
}
//1.生成坏字符数组bad_char并获得字符串长度
for (i=0; i < CHAR_SET; ++i)
{
bad_char[i] = -1;
}
for (i=0; ptrn[i] != '\0'; ++i)
{
bad_char[(unsigned char)ptrn[i]] = i;
}
len_pt = i;
for (len_st = 0; str[len_st] != '\0'; ++len_st)
;
//2.生成好后缀数组good_suff
for (i=0; i < len_pt; ++i)
{
for (shift = 1; shift <= len_pt; ++shift)
{
if (shift > i)
{
suffix = ptrn + shift;
compare = ptrn;
}
else
{
suffix = ptrn + i + 1;
compare = ptrn + i - shift + 1;
if (ptrn[i] == ptrn[i - shift])
{
continue;
}
}
while (('\0' != *suffix) && (*compare == *suffix))
{
++compare;
++suffix;
}
if ('\0' == *suffix)
{
good_suff[i] = shift;
break;
}
}
}
//3.开始匹配
base = 0;
while ((base + len_pt) <= len_st)
{
i = len_pt - 1;
while ((-1 != i) && (ptrn[i] == str[base + i]))
{
--i;
}
if (-1 != i)
{ //有失配产生
shift = i - bad_char[str[base + i]];
shift = shift > good_suff[i] ? shift : good_suff[i];
base += shift;
}
else
{ //完全匹配
return str + base;
}
}
return NULL;
}