美文网首页js css html
Java 正则处理之 Pattern

Java 正则处理之 Pattern

作者: Tinyspot | 来源:发表于2023-01-12 22:58 被阅读0次

    1. 基础知识

    1.1 特殊字符

    • \\ 代表其他语言中的一个 \
      • 数字 \\d
      • 反斜杠 \\\\
      • \\.
      • 空白字符 \\s(一个或多个空白字符\\s+
    • 点. 匹配除 '\n' 外的任何单个字符
    • 正则特殊符号 * . ? + $ ^ [ ] ( ) { } | \ /

    1.2 量词

    • ? {0, 1}
    • + {1, }
    • * {0, }

    1.3 贪婪/非贪婪

    • ?* , + , ? , {n} , {n,} , {n,m} 后面时,非贪婪模式

    2. 字符串匹配

    public final class String
        implements java.io.Serializable, Comparable<String>, CharSequence {
    
        public boolean matches(String regex) {
          return Pattern.matches(regex, this);
        }
    }
    

    2.1 matches(String regex)

    // +86-15171120046, 0086-15171120046
    boolean matches = str.matches("(\\+86|0086)?-?\\d{11}");
    // @163.com,   "\\w+@\\w+\\.(com|cn)"
    
    // 还原特殊符号
    "\\Q...\\E"
    

    2.2 replaceAll(regex, str)

    示例一:删除文本中的所有数字

    @Test
    public void test() {
        String str = "ab12ee44r";
        String result = str.replaceAll("\\d+", "");
        // result: abeer
    }
    

    示例二:通配符匹配

    @Test
    public void test() {
        String str = "123a45bcd";
        String result = str.replaceAll("\\d?", "-");
        // "\\d?",   ----a---b-c-d-
    
        String result2 = str.replaceAll("\\d*", "-");
        // "\\d*",   --a--b-c-d-
    }
    
    image.png

    注意:空字符串也会被匹配到,然后被替换为 -

    示例三:超过 10 个数字,改为字符串

    @Test
    public void test() {
        String jsonStr = "{\"order\":[\"itemId\":123456789],\"order\":[\"itemId\":1234567890123]}";
        String result = jsonStr.replaceAll("\"itemId\":(\\d{10,})", "\"itemId\":\"$1\"");
        System.out.println(result);
    }
    

    替换结果:

    {
        "order": [
            "itemId": 123456789
        ],
        "order": [
            "itemId": "1234567890123"
        ]
    }
    

    2.2 开始符(^)和结束符($)

    @Test
    public void test() {
        String str = "0023004500";
        String startStr = str.replaceAll("^(0+)", "");
        // result: 23004500
        String endStr = str.replaceAll("(0+)$", "");
        // result: 00230045
    }
    

    2.3 字符边界

    • \b 匹配一个单词边界
    • 光标前面的字符和后面的字符 不全是\w

    3. Pattern

    3.1 示例模板

    public class RegexDemo {
        private static final String ENTIRE_REGEX = "(.|\\s)*";
        // 子串
        private static final String SUB_SEQUENCE_REGEX = "code\\d+";
        private static final String GROUP_REGEX = "(name\\d+).*(code\\d+)";
    
        private static final Pattern subSequenceCompile = Pattern.compile(SUB_SEQUENCE_REGEX);
        private static final Pattern groupCompile = Pattern.compile(GROUP_REGEX);
        private static final Pattern entireCompile = Pattern.compile(ENTIRE_REGEX);
    
        public static void print(Object o) {
            System.out.println(o);
        }
    }
    

    3.2 matches()

    public class RegexDemo {
        /**
         * 先编译这个模式
         */
        private static final Pattern pattern = Pattern.compile("[a-z]{3}");
    
        @Test
        public void test() {
            Matcher matcher = pattern.matcher("xyz");
            // matches 永远匹配整个字符串
            print(matcher.matches());
    
            // 链式写法:Pattern.compile(regex).matcher(input).matches()
        }
    
        public static void print(Object o) {
            System.out.println(o);
        }
    }
    

    3.3 reset()

    @Test
    public void test() {
        String str = "123-3243-564-00";
        Pattern pattern = Pattern.compile("\\d{3,5}");
        Matcher m = pattern.matcher(str);
    
        System.out.println(m.matches());
        // m.reset(); 很重要,让 m.matches() 吃掉的字符吐出来
        m.reset();
    }
    

    3.4 Pattern 与 Matcher

    Matcher 可以理解为“某次具体匹配的结果对象”:把编译好的Pattern 对象“应用”到某个String对象上,就获得了作为“本次匹配结果”的Matcher对象

    public final class Pattern implements java.io.Serializable {
        public static Pattern compile(String regex) {
            return new Pattern(regex, 0);
        }
    
        public Matcher matcher(CharSequence input) {
            if (!compiled) {
                synchronized(this) {
                    if (!compiled)
                        compile();
                }
            }
            Matcher m = new Matcher(this, input);
            return m;
        }
    }
    
    public final class Matcher implements MatchResult {\
        public boolean matches() {
            return match(from, ENDANCHOR);
        }
    }
    

    4. 贪婪与非贪婪

    @Test
    public void quantifiers() {
        String str = "aaaa5bbbb6";
        /**
         * regex = "(.{3,10})[0-9]"
         * regex = "(.{3,10}?)[0-9]"  --> X{n,m}?
         * regex = "(.{3,10}+)[0-9]"  --> X{n,m}+
         * 试试:(.+?)[0-9]
         */
        Pattern pattern = Pattern.compile("(.{3,10}+)[0-9]");
        Matcher matcher = pattern.matcher(str);
        if (matcher.find()) {
            print(matcher.start() + "---" + matcher.end());
        } else {
            print("not match");
        }
    }
    

    相关文章

      网友评论

        本文标题:Java 正则处理之 Pattern

        本文链接:https://www.haomeiwen.com/subject/hxuicdtx.html