1.源码实现
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
/*取子串的函数*/
char *substr(const char *str, unsigned int start, unsigned int end, char *sub)
{
unsigned int n = end - start;
memcpy(sub, str+start, n);
sub[n] = 0x00;
return sub;
}
int main(int argc, char **argv)
{
regmatch_t pm[10];
regmatch_t tmp;
regex_t reg;
char ebuf[128];
char lbuf[256];
char sbuf[256];
char *pattern;
int x, z, w = 0;
int cflags = 0;
const size_t nmatch = 10;
memset(lbuf, 0x00, sizeof(lbuf));
//pattern = "\\(http\\|ftp\\|https\\):\\/\\/[\\w\\-_]+(\\.[w\\-_]+)+([\\w\\-\\.,@?^=%&:/~\\+#\\*[\\w\\-\\@?^=%&/~\\+#])?";
//pattern = "\\(https|ftp|file\\)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]";
pattern = "\\(http\\|https\\|ftp\\|file\\)://[-A-Za-z0-9+&@#/%?=~_\\|!:,.;]\\+[-A-Za-z0-9+&@#/%=~_\\|]";
/*编译正则表达式*/
z = regcomp(®, pattern, cflags);
if(z != 0)
{
regerror(z, ®, pattern, cflags);
return -1;
}
/*逐行处理输入数据*/
while(fgets(lbuf, sizeof(lbuf), stdin))
{
memset(pm, 0x00, sizeof(pm));
++w;
if((z=strlen(lbuf)) > 0 && lbuf[z-1] == '\n')
lbuf[z-1] = 0x00;
/*对每一行应用正则表达式匹配*/
z = regexec(®, lbuf, nmatch, pm, cflags);
if(z == REG_NOMATCH)
{
continue;
}
else if(z != 0)
{
regerror(z, ®, ebuf, sizeof(ebuf));
return -2;
}
tmp.rm_so = -1;
/*输出处理结果*/
for(x=0; x<nmatch && pm[x].rm_so != -1; ++x)
{
if(tmp.rm_so != pm[x].rm_so)
{
memset(sbuf, 0x00, sizeof(sbuf));
printf("$%d=%s\n", x, substr(lbuf, pm[x].rm_so, pm[x].rm_eo, sbuf));
tmp.rm_so = pm[x].rm_so;
}
}
memset(lbuf, 0x00, sizeof(lbuf));
}
/*释放正则表达式*/
regfree(®);
return 0;
}
2.样本文件test.txt
fffhttp://www.rsyslog.com/e/2007 1
2 https://database.clamav.net 1
3 https://database.clamav.net. 1
4 https://database.clamav.net 1
5 https://database.clamav.net. 1
6 https://database.clamav.net 1
7 https://database.clamav.net.
3.编译源码
$ gcc -o test test.c
4.运行及其结果
$ ./test test.txt
网友评论