1 匹配单个字符

匹配符	功能
.	匹配任意1个字符（除了\n）
[ ]	匹配[ ]中列举的字符
\d	匹配数字，即0-9
\D	匹配非数字，即不是数字
\s	匹配空白，即空格，tab键
\S	匹配非空白
\w	匹配非特殊字符，即a-z、A-Z、0-9、_、汉字
\W	匹配特殊字符，即非字母、非数字、非汉字、非下划线

import re

fun = lambda match_obj:print(match_obj.group()) if match_obj else print("匹配失败")

# 1 .匹配符
fun(re.match("t.o", "onetwothree"))  # 匹配失败
fun(re.match("t.o", "twothree"))  # two
fun(re.match("t.o", "t\no"))  # 匹配失败

# 2 []匹配符
fun(re.match("h[123456]", "h3"))  # h3
fun(re.match("h[1-6]", "h7"))  # 匹配失败

# 3 \d匹配符（等价于[0-9]或[0123456789]）
fun(re.match("开发\d部", "开发7部"))  # 开发7部
fun(re.match("开发\d部", "开发七部"))  # 匹配失败

# 4 \D匹配符
fun(re.match("开发\D部", "开发7部"))  # 匹配失败
fun(re.match("开发\D部", "开发七部"))  # 开发七部

# 5 \s匹配符
fun(re.match("内容\sxxx", "内容 xxx"))  # 内容 xxx
fun(re.match("内容\sxxx", "内容:xxx"))  # 匹配失败

# 6 \S匹配符
fun(re.match("内容\Sxxx", "内容 xxx"))  # 匹配失败
fun(re.match("内容\Sxxx", "内容:xxx"))  # 内容:xxx

# 7 \w匹配符
fun(re.match("标题\w", "标题一"))  # 标题一
fun(re.match("标题\w", "标题%"))  # 匹配失败

# 8 \W匹配符
fun(re.match("标题\W", "标题一"))  # 匹配失败
fun(re.match("标题\W", "标题%"))  # 标题%

2 匹配多个字符

匹配符	功能
*	匹配前一个字符出现0次或者无限次，即可有可无
+	匹配前一个字符出现1次或者无限次，即至少有1次
?	匹配前一个字符出现1次或者0次，即要么有1次，要么没有
{m}	匹配前一个字符出现m次
{m,n}	匹配前一个字符出现从m到n次

import re

fun = lambda match_obj:print(match_obj.group()) if match_obj else print("匹配失败")

# 1 *匹配符
fun(re.match("t.*o", "to"))  # to
fun(re.match("t.*o", "twwo"))  # twwo
fun(re.match("tw*o", "twsdwo"))  # 匹配失败

# 2 +匹配符
fun(re.match("t.+o", "to"))  # 匹配失败
fun(re.match("t.+o", "twwo"))  # twwo
fun(re.match("tw+o", "twsdwo"))  # 匹配失败

# 3 ?匹配符
fun(re.match("https?", "http"))  # http
fun(re.match("https?", "https"))  # https
fun(re.match("https?", "httpss"))  # https

# 4 {m}匹配符
fun(re.match("ht{2}p", "http"))  # http
fun(re.match("ht{2}p", "htttp"))  # 匹配失败

# 5 {m, n}匹配符
fun(re.match("ht{1,3}p", "hp"))  # 匹配失败
fun(re.match("ht{1,3}p", "htp"))  # htp
fun(re.match("ht{1,3}p", "htttp"))  # htttp
fun(re.match("ht{1,3}p", "httttp"))  # 匹配失败

# 6 {m, }匹配符
fun(re.match("ht{2,}p", "htp"))  # 匹配失败
fun(re.match("ht{2,}p", "http"))  # http
fun(re.match("ht{2,}p", "htttp"))  # htttp

3 匹配开头和结尾

匹配符	功能
^	匹配字符串开头
$	匹配字符串结尾

import re

fun = lambda match_obj:print(match_obj.group()) if match_obj else print("匹配失败")

# 1 ^匹配符
fun(re.match("^\d.*", "3abc"))  # 3abc
fun(re.match("^\d.*", "#abc"))  # 匹配失败

# 2 $匹配符
fun(re.match(".*\d$", "abc3"))  # abc3
fun(re.match(".*\d$", "abc三"))  # 匹配失败

# 3 [^指定字符]匹配符
fun(re.match("标题[^13]", "标题1"))  # 匹配失败
fun(re.match("标题[^13]", "标题2"))  # 标题2
fun(re.match("标题[^13]", "标题3"))  # 匹配失败

4 匹配分组

匹配符	功能
\|	匹配左右任意一个表达式
()	将括号中字符作为一个分组
\num	引用分组num匹配到的字符串
(?P<name>)	分组起别名
(?P=name)	引用别名为name分组匹配到的字符串

import re

fun = lambda match_obj:print(match_obj.group()) if match_obj else print("匹配失败")

# 1 |匹配符
fun(re.match("banana|pear", 'banana'))  # banana
fun(re.match("banana|pear", 'apple'))  # 匹配失败

# 2 ()匹配符
"""
    匹配出163、126、qq等邮箱
    \.: 表示对正则表达式里面的.进行了转义，变成了一个普通点，只能匹配.字符
    (163|126|qq) 表示一个分组，出现一个小括号就表示一个分组，分组是从1开始的
    如果出现多个小括号，分组的顺序是从左到右依次排序
"""
match_obj = re.match("([a-zA-Z0-9_]{4,20})@(163|126|qq)\.com", "hello@163.com")
if match_obj:
    # 获取整个匹配的数据，如果使用分组数的化，默认是0
    print(match_obj.group(0))  # 等价于result = match_obj.group()，输出hello@163.com
    # 获取匹配分组数据
    print(match_obj.group(1))  # hello
    print(match_obj.group(2))  # 163
else:
    print("匹配失败")

# 3 \num匹配符
fun(re.match("<([a-zA-Z1-6]+)>.*</\\1>", "<html>content</html>"))  # <html>content</html>
fun(re.match("<([a-zA-Z1-6]+)>.*</\\1>", "<html>content</div>"))  # 匹配失败

# 4 (?P<name>)和(?P=name)匹配符
fun(re.match("<(?P<name1>[a-zA-Z1-6]+)><(?P<name2>[a-zA-Z1-6]+)>.*</(?P=name2)></(?P=name1)>", "<html><h1>www.itcast.cn</h1></html>"))  # <html><h1>www.itcast.cn</h1></html>
fun(re.match("<(?P<name1>[a-zA-Z1-6]+)><(?P<name2>[a-zA-Z1-6]+)>.*</(?P=name2)></(?P=name1)>", "<html><h1>www.itcast.cn</h2></html>"))  # 匹配失败