美文网首页js css html
Java 正则处理之 Pattern

Java 正则处理之 Pattern

作者: Tinyspot | 来源:发表于2023-01-12 22:58 被阅读0次

1. 基础知识

1.1 特殊字符

  • \\ 代表其他语言中的一个 \
    • 数字 \\d
    • 反斜杠 \\\\
    • \\.
    • 空白字符 \\s(一个或多个空白字符\\s+
  • 点. 匹配除 '\n' 外的任何单个字符
  • 正则特殊符号 * . ? + $ ^ [ ] ( ) { } | \ /

1.2 量词

  • ? {0, 1}
  • + {1, }
  • * {0, }

1.3 贪婪/非贪婪

  • ?* , + , ? , {n} , {n,} , {n,m} 后面时,非贪婪模式

2. 字符串匹配

public final class String
    implements java.io.Serializable, Comparable<String>, CharSequence {

    public boolean matches(String regex) {
      return Pattern.matches(regex, this);
    }
}

2.1 matches(String regex)

// +86-15171120046, 0086-15171120046
boolean matches = str.matches("(\\+86|0086)?-?\\d{11}");
// @163.com,   "\\w+@\\w+\\.(com|cn)"

// 还原特殊符号
"\\Q...\\E"

2.2 replaceAll(regex, str)

示例一:删除文本中的所有数字

@Test
public void test() {
    String str = "ab12ee44r";
    String result = str.replaceAll("\\d+", "");
    // result: abeer
}

示例二:通配符匹配

@Test
public void test() {
    String str = "123a45bcd";
    String result = str.replaceAll("\\d?", "-");
    // "\\d?",   ----a---b-c-d-

    String result2 = str.replaceAll("\\d*", "-");
    // "\\d*",   --a--b-c-d-
}
image.png

注意:空字符串也会被匹配到,然后被替换为 -

示例三:超过 10 个数字,改为字符串

@Test
public void test() {
    String jsonStr = "{\"order\":[\"itemId\":123456789],\"order\":[\"itemId\":1234567890123]}";
    String result = jsonStr.replaceAll("\"itemId\":(\\d{10,})", "\"itemId\":\"$1\"");
    System.out.println(result);
}

替换结果:

{
    "order": [
        "itemId": 123456789
    ],
    "order": [
        "itemId": "1234567890123"
    ]
}

2.2 开始符(^)和结束符($)

@Test
public void test() {
    String str = "0023004500";
    String startStr = str.replaceAll("^(0+)", "");
    // result: 23004500
    String endStr = str.replaceAll("(0+)$", "");
    // result: 00230045
}

2.3 字符边界

  • \b 匹配一个单词边界
  • 光标前面的字符和后面的字符 不全是\w

3. Pattern

3.1 示例模板

public class RegexDemo {
    private static final String ENTIRE_REGEX = "(.|\\s)*";
    // 子串
    private static final String SUB_SEQUENCE_REGEX = "code\\d+";
    private static final String GROUP_REGEX = "(name\\d+).*(code\\d+)";

    private static final Pattern subSequenceCompile = Pattern.compile(SUB_SEQUENCE_REGEX);
    private static final Pattern groupCompile = Pattern.compile(GROUP_REGEX);
    private static final Pattern entireCompile = Pattern.compile(ENTIRE_REGEX);

    public static void print(Object o) {
        System.out.println(o);
    }
}

3.2 matches()

public class RegexDemo {
    /**
     * 先编译这个模式
     */
    private static final Pattern pattern = Pattern.compile("[a-z]{3}");

    @Test
    public void test() {
        Matcher matcher = pattern.matcher("xyz");
        // matches 永远匹配整个字符串
        print(matcher.matches());

        // 链式写法:Pattern.compile(regex).matcher(input).matches()
    }

    public static void print(Object o) {
        System.out.println(o);
    }
}

3.3 reset()

@Test
public void test() {
    String str = "123-3243-564-00";
    Pattern pattern = Pattern.compile("\\d{3,5}");
    Matcher m = pattern.matcher(str);

    System.out.println(m.matches());
    // m.reset(); 很重要,让 m.matches() 吃掉的字符吐出来
    m.reset();
}

3.4 Pattern 与 Matcher

Matcher 可以理解为“某次具体匹配的结果对象”:把编译好的Pattern 对象“应用”到某个String对象上,就获得了作为“本次匹配结果”的Matcher对象

public final class Pattern implements java.io.Serializable {
    public static Pattern compile(String regex) {
        return new Pattern(regex, 0);
    }

    public Matcher matcher(CharSequence input) {
        if (!compiled) {
            synchronized(this) {
                if (!compiled)
                    compile();
            }
        }
        Matcher m = new Matcher(this, input);
        return m;
    }
}
public final class Matcher implements MatchResult {\
    public boolean matches() {
        return match(from, ENDANCHOR);
    }
}

4. 贪婪与非贪婪

@Test
public void quantifiers() {
    String str = "aaaa5bbbb6";
    /**
     * regex = "(.{3,10})[0-9]"
     * regex = "(.{3,10}?)[0-9]"  --> X{n,m}?
     * regex = "(.{3,10}+)[0-9]"  --> X{n,m}+
     * 试试:(.+?)[0-9]
     */
    Pattern pattern = Pattern.compile("(.{3,10}+)[0-9]");
    Matcher matcher = pattern.matcher(str);
    if (matcher.find()) {
        print(matcher.start() + "---" + matcher.end());
    } else {
        print("not match");
    }
}

相关文章

网友评论

    本文标题:Java 正则处理之 Pattern

    本文链接:https://www.haomeiwen.com/subject/hxuicdtx.html