美文网首页
c语言正则表达式匹配网址

c语言正则表达式匹配网址

作者: 一路向后 | 来源:发表于2021-08-02 21:20 被阅读0次

1.源码实现

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>

/*取子串的函数*/
char *substr(const char *str, unsigned int start, unsigned int end, char *sub)
{
    unsigned int n = end - start;

    memcpy(sub, str+start, n);

    sub[n] = 0x00;

    return sub;
}

int main(int argc, char **argv)
{
    regmatch_t pm[10];
    regmatch_t tmp;
    regex_t reg;
    char ebuf[128];
    char lbuf[256];
    char sbuf[256];
    char *pattern;
    int x, z, w = 0;
    int cflags = 0;
    const size_t nmatch = 10;

    memset(lbuf, 0x00, sizeof(lbuf));

    //pattern = "\\(http\\|ftp\\|https\\):\\/\\/[\\w\\-_]+(\\.[w\\-_]+)+([\\w\\-\\.,@?^=%&:/~\\+#\\*[\\w\\-\\@?^=%&/~\\+#])?";
    //pattern = "\\(https|ftp|file\\)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]";
    pattern = "\\(http\\|https\\|ftp\\|file\\)://[-A-Za-z0-9+&@#/%?=~_\\|!:,.;]\\+[-A-Za-z0-9+&@#/%=~_\\|]";

    /*编译正则表达式*/
    z = regcomp(&reg, pattern, cflags);
    if(z != 0)
    {
        regerror(z, &reg, pattern, cflags);
        return -1;
    }

    /*逐行处理输入数据*/
    while(fgets(lbuf, sizeof(lbuf), stdin))
    {
        memset(pm, 0x00, sizeof(pm));

        ++w;

        if((z=strlen(lbuf)) > 0 && lbuf[z-1] == '\n')
            lbuf[z-1] = 0x00;

        /*对每一行应用正则表达式匹配*/
        z = regexec(&reg, lbuf, nmatch, pm, cflags);
        if(z == REG_NOMATCH)
        {
            continue;
        }
        else if(z != 0)
        {
            regerror(z, &reg, ebuf, sizeof(ebuf));
            return -2;
        }

        tmp.rm_so = -1;

        /*输出处理结果*/
        for(x=0; x<nmatch && pm[x].rm_so != -1; ++x)
        {
            if(tmp.rm_so != pm[x].rm_so)
            {
                memset(sbuf, 0x00, sizeof(sbuf));
                printf("$%d=%s\n", x, substr(lbuf, pm[x].rm_so, pm[x].rm_eo, sbuf));
                tmp.rm_so = pm[x].rm_so;
            }
        }

        memset(lbuf, 0x00, sizeof(lbuf));
    }

    /*释放正则表达式*/
    regfree(&reg);

    return 0;
}

2.样本文件test.txt

fffhttp://www.rsyslog.com/e/2007    1   
2       https://database.clamav.net 1
3       https://database.clamav.net.    1
4       https://database.clamav.net 1
5       https://database.clamav.net.    1
6       https://database.clamav.net 1
7       https://database.clamav.net.

3.编译源码

$ gcc -o test test.c

4.运行及其结果

$ ./test test.txt

相关文章

网友评论

      本文标题:c语言正则表达式匹配网址

      本文链接:https://www.haomeiwen.com/subject/edgjvltx.html