前提:翻RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax时候,发现了已经写好的URL正则
地址戳这里
正则表达式:
^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
光看记不住,拿C练了下手, C代码:
//
// main.c
// uri_manager
//
// Created by null on 2020/7/21.
// Copyright © 2020 null. All rights reserved.
//
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
typedef struct uri_part_s {
char scheme[8];
char authority[256];
char path[256];
char query[256];
char fragment[256];
}uri_part_s;
#define SCHEME_INDEX 2
#define AUTHORITY_INDEX 4
#define PATH_INDEX 5
#define QUERY_INDEX 7
#define FRAGMENT_INDEX 9
static uri_part_s uri_part(const char *uri, const char *re)
{
struct uri_part_s struct_uri_parts;
//正则初始化
int status = 0, flag = REG_EXTENDED, max_re = 10;
regmatch_t pmatch[max_re];
regex_t reg;
regcomp(®, re, flag);
status = regexec(®, uri, max_re, pmatch, 0);
//匹配成功
for (int i = 0; i < max_re; i ++) {
switch (i) {
case SCHEME_INDEX:
{
char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
strlcpy(struct_uri_parts.scheme, pt, sizeof(struct_uri_parts.scheme));
}
break;
case AUTHORITY_INDEX:
{
char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
strlcpy(struct_uri_parts.authority, pt, sizeof(struct_uri_parts.authority));
}
break;
case PATH_INDEX:
{
char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
strlcpy(struct_uri_parts.path, pt, sizeof(struct_uri_parts.path));
}
break;
case QUERY_INDEX:
{
char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
strlcpy(struct_uri_parts.query, pt, sizeof(struct_uri_parts.query));
}
break;
case FRAGMENT_INDEX:
{
char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
strlcpy(struct_uri_parts.fragment, pt, sizeof(struct_uri_parts.fragment));
}
break;
default:
break;
}
}
regfree(®);
return struct_uri_parts;
}
/// RFC 2396 https://tools.ietf.org/html/rfc2396#appendix-B
int main(int argc, const char * argv[]) {
// insert code here...
const char *origin_uri = "http://www.ics.uc i.edu/pub/ietf/uri/abc?abc=cba#Related";
const char *parts_re = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(/?([^#]*))?(#(.*))?";
struct uri_part_s parts = uri_part(origin_uri, parts_re);
printf("%s\n", parts.scheme);
printf("%s\n", parts.authority);
printf("%s\n", parts.path);
printf("%s\n", parts.query);
printf("%s\n", parts.fragment);
return 0;
}
特此记录一下
完成
网友评论