1. 基础知识
1.1 特殊字符
-
\\
代表其他语言中的一个\
- 数字
\\d
- 反斜杠
\\\\
- 点
\\.
- 空白字符
\\s
(一个或多个空白字符\\s+
)
- 数字
- 点. 匹配除 '\n' 外的任何单个字符
- 正则特殊符号
* . ? + $ ^ [ ] ( ) { } | \ /
1.2 量词
-
?
{0, 1} -
+
{1, } -
*
{0, }
1.3 贪婪/非贪婪
-
?
在* , + , ? , {n} , {n,} , {n,m}
后面时,非贪婪模式
2. 字符串匹配
public final class String
implements java.io.Serializable, Comparable<String>, CharSequence {
public boolean matches(String regex) {
return Pattern.matches(regex, this);
}
}
2.1 matches(String regex)
// +86-15171120046, 0086-15171120046
boolean matches = str.matches("(\\+86|0086)?-?\\d{11}");
// @163.com, "\\w+@\\w+\\.(com|cn)"
// 还原特殊符号
"\\Q...\\E"
2.2 replaceAll(regex, str)
示例一:删除文本中的所有数字
@Test
public void test() {
String str = "ab12ee44r";
String result = str.replaceAll("\\d+", "");
// result: abeer
}
示例二:通配符匹配
@Test
public void test() {
String str = "123a45bcd";
String result = str.replaceAll("\\d?", "-");
// "\\d?", ----a---b-c-d-
String result2 = str.replaceAll("\\d*", "-");
// "\\d*", --a--b-c-d-
}
data:image/s3,"s3://crabby-images/bc0bf/bc0bfec091d5d6295a8bbe96005d94bbfbdc66a2" alt=""
注意:空字符串也会被匹配到,然后被替换为 -
示例三:超过 10 个数字,改为字符串
@Test
public void test() {
String jsonStr = "{\"order\":[\"itemId\":123456789],\"order\":[\"itemId\":1234567890123]}";
String result = jsonStr.replaceAll("\"itemId\":(\\d{10,})", "\"itemId\":\"$1\"");
System.out.println(result);
}
替换结果:
{
"order": [
"itemId": 123456789
],
"order": [
"itemId": "1234567890123"
]
}
2.2 开始符(^)和结束符($)
@Test
public void test() {
String str = "0023004500";
String startStr = str.replaceAll("^(0+)", "");
// result: 23004500
String endStr = str.replaceAll("(0+)$", "");
// result: 00230045
}
2.3 字符边界
-
\b
匹配一个单词边界 - 光标前面的字符和后面的字符 不全是\w
3. Pattern
3.1 示例模板
public class RegexDemo {
private static final String ENTIRE_REGEX = "(.|\\s)*";
// 子串
private static final String SUB_SEQUENCE_REGEX = "code\\d+";
private static final String GROUP_REGEX = "(name\\d+).*(code\\d+)";
private static final Pattern subSequenceCompile = Pattern.compile(SUB_SEQUENCE_REGEX);
private static final Pattern groupCompile = Pattern.compile(GROUP_REGEX);
private static final Pattern entireCompile = Pattern.compile(ENTIRE_REGEX);
public static void print(Object o) {
System.out.println(o);
}
}
3.2 matches()
public class RegexDemo {
/**
* 先编译这个模式
*/
private static final Pattern pattern = Pattern.compile("[a-z]{3}");
@Test
public void test() {
Matcher matcher = pattern.matcher("xyz");
// matches 永远匹配整个字符串
print(matcher.matches());
// 链式写法:Pattern.compile(regex).matcher(input).matches()
}
public static void print(Object o) {
System.out.println(o);
}
}
3.3 reset()
@Test
public void test() {
String str = "123-3243-564-00";
Pattern pattern = Pattern.compile("\\d{3,5}");
Matcher m = pattern.matcher(str);
System.out.println(m.matches());
// m.reset(); 很重要,让 m.matches() 吃掉的字符吐出来
m.reset();
}
3.4 Pattern 与 Matcher
Matcher 可以理解为“某次具体匹配的结果对象”:把编译好的Pattern 对象“应用”到某个String对象上,就获得了作为“本次匹配结果”的Matcher对象
public final class Pattern implements java.io.Serializable {
public static Pattern compile(String regex) {
return new Pattern(regex, 0);
}
public Matcher matcher(CharSequence input) {
if (!compiled) {
synchronized(this) {
if (!compiled)
compile();
}
}
Matcher m = new Matcher(this, input);
return m;
}
}
public final class Matcher implements MatchResult {\
public boolean matches() {
return match(from, ENDANCHOR);
}
}
4. 贪婪与非贪婪
@Test
public void quantifiers() {
String str = "aaaa5bbbb6";
/**
* regex = "(.{3,10})[0-9]"
* regex = "(.{3,10}?)[0-9]" --> X{n,m}?
* regex = "(.{3,10}+)[0-9]" --> X{n,m}+
* 试试:(.+?)[0-9]
*/
Pattern pattern = Pattern.compile("(.{3,10}+)[0-9]");
Matcher matcher = pattern.matcher(str);
if (matcher.find()) {
print(matcher.start() + "---" + matcher.end());
} else {
print("not match");
}
}
网友评论