美文网首页
2020-07-23 正则URL

2020-07-23 正则URL

作者: null_2562 | 来源:发表于2020-07-23 16:16 被阅读0次

    前提:翻RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax时候,发现了已经写好的URL正则
    地址戳这里
    正则表达式:

    ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
    

    光看记不住,拿C练了下手, C代码:

    //
    //  main.c
    //  uri_manager
    //
    //  Created by null on 2020/7/21.
    //  Copyright © 2020 null. All rights reserved.
    //
    
    #include <string.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <regex.h>
    
    
    typedef struct uri_part_s {
        char scheme[8];
        char authority[256];
        char path[256];
        char query[256];
        char fragment[256];
    }uri_part_s;
    
    #define SCHEME_INDEX              2
    #define AUTHORITY_INDEX           4
    #define PATH_INDEX                5
    #define QUERY_INDEX               7
    #define FRAGMENT_INDEX            9
    
    static uri_part_s uri_part(const char *uri, const char *re)
    {
        struct uri_part_s struct_uri_parts;
        //正则初始化
        int status = 0, flag = REG_EXTENDED, max_re = 10;
        regmatch_t pmatch[max_re];
        regex_t reg;
        
        regcomp(&reg, re, flag);
        status = regexec(&reg, uri, max_re, pmatch, 0);
        //匹配成功
        for (int i = 0; i < max_re; i ++) {
            switch (i) {
                case SCHEME_INDEX:
                {
                    char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
                    strlcpy(struct_uri_parts.scheme, pt, sizeof(struct_uri_parts.scheme));
                }
                    break;
                case AUTHORITY_INDEX:
                {
                    char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
                    strlcpy(struct_uri_parts.authority, pt, sizeof(struct_uri_parts.authority));
                }
                    break;
                case PATH_INDEX:
                {
                    char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
                    strlcpy(struct_uri_parts.path, pt, sizeof(struct_uri_parts.path));
                }
                    break;
                case QUERY_INDEX:
                {
                    char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
                    strlcpy(struct_uri_parts.query, pt, sizeof(struct_uri_parts.query));
                }
                    break;
                case FRAGMENT_INDEX:
                {
                    char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
                    strlcpy(struct_uri_parts.fragment, pt, sizeof(struct_uri_parts.fragment));
                }
                    break;
                    
                default:
                    
                    break;
            }
        }
        regfree(&reg);
        return struct_uri_parts;
    }
    
    
    /// RFC 2396  https://tools.ietf.org/html/rfc2396#appendix-B
    int main(int argc, const char * argv[]) {
        // insert code here...
        
        const char *origin_uri = "http://www.ics.uc i.edu/pub/ietf/uri/abc?abc=cba#Related";
        const char *parts_re = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(/?([^#]*))?(#(.*))?";
        
        struct uri_part_s parts = uri_part(origin_uri, parts_re);
        printf("%s\n", parts.scheme);
        printf("%s\n", parts.authority);
        printf("%s\n", parts.path);
        printf("%s\n", parts.query);
        printf("%s\n", parts.fragment);
        return 0;
    }
    
    

    特此记录一下
    完成

    相关文章

      网友评论

          本文标题:2020-07-23 正则URL

          本文链接:https://www.haomeiwen.com/subject/odehlktx.html