美文网首页
Java正则

Java正则

作者: 阿桃_28e7 | 来源:发表于2020-02-25 14:42 被阅读0次

    元字符

    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class TestMetaChar {
        // 对元字符进行转义
        private static Pattern pattern = Pattern.compile("myArray\\[0\\]");
        // 匹配 \ (java里\需要用\\表示)
        private static Pattern pattern02 = Pattern.compile("\\\\");
        // 匹配空白字符 \r\n 是 回车+换行 todo 匹配不到?
        private static Pattern pattern03 = Pattern.compile("\r\n\r\n", Pattern.MULTILINE);
        // 简写元字符:
        /* \d 即 [0-9];          \D 即 [^0-9] 【注意:从小到大】*/
        /* \w 即 [a-zA-Z0-9_];   \W 即 [^a-zA-Z0-9_]*/
        /* \s 即 [\f\n\r\t\v];   \S 即 [^\f\n\r\t\v]*/
        // 这个模式匹配不出myArray[10], 需要加上重复次数才行:myArray\[\d+\]
        private static Pattern pattern04 = Pattern.compile("myArray\\[\\d\\]");
    
        // 这个模式匹配的字符串,必须是6个字符,所以5个纯数字不会匹配(\w\d\w\d\w\d?可以匹配5个纯数字)
        private static Pattern pattern05 = Pattern.compile("\\w\\d\\w\\d\\w\\d");
    
        public static void main(String[] args) {
            String js = "var myArray = new Array(); \n" +
                    "... \n" +
                    "if (myArray[0] == 0 || myArray[10] == 10) { \n" +
                    "... \n" +
                    "}";
            Matcher matcher = pattern.matcher(js);
            if (matcher.find())
                System.out.println(matcher.group());
    
            String url = "\\home\\ben\\sales";
            Matcher matcher1 = pattern02.matcher(url);
            while (matcher1.find())
                System.out.println(matcher1.group());
    
            String txt = "101, ben forta " +
                    "102, jim james " +
                    "       " +
                    "103, roberta robertson ";
            Matcher matcher2 = pattern03.matcher(txt);
            while (matcher2.find())
                System.out.println(matcher2.group() + "ok");
    
            Matcher matcher3 = pattern04.matcher(js);
            while (matcher3.find())
                System.out.println(matcher3.group());
    
            String txt2 = "11213 a1c2e3 48075 48237 m1b4f2 90046 h1h2h3 123456";
            Matcher matcher4 = pattern05.matcher(txt2);
            while (matcher4.find())
                System.out.println(matcher4.group());
        }
    }
    
    // output:
    //        myArray[0]
    //        \
    //        \
    //        \
    //        myArray[0]
    //        a1c2e3
    //        m1b4f2
    //        h1h2h3
    //        123456
    
    

    匹配次数

    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class TestRepeatMatch {
        // "\\w+@\\w+.\\w+"模式匹配不了 ben.forta@forta.com  因为\w是[a-zA-Z0-9_],只能匹配数字字母下划线
        private static Pattern pattern = Pattern.compile("\\w+@\\w+.\\w+");
        private static Pattern pattern04 = Pattern.compile("[\\w.]+@[\\w.]+\\w+");
        //[0-9]+匹配一个或多个数字,等效于\d+  [0-9+]匹配一个数字或者一个+【其实匹配+最好是进行转义\+, 但是在正则的字符集合里面好像可以不转义】
        // + 等效于 {1,}
        private static Pattern pattern02 = Pattern.compile("[0-9]+");
        private static Pattern pattern03 = Pattern.compile("[0-9+]");
    
        // 如果首字符是. 则不是合法的邮箱,pattern04会匹配出.ben@forta.com, 所以改造为pattern05 【* 匹配零次或多次 等价于 {0,}】
        private static Pattern pattern05 = Pattern.compile("\\w+[\\w.]*@[\\w.]+\\w+");
        // ? 匹配0次【false】或1【true】次 等价于{0,1}
        private static Pattern pattern06 = Pattern.compile("https?://[\\w./]+");
        // 精确重复次数
        private static Pattern pattern07 = Pattern.compile("#[\\da-fA-F]{6}");
        // 重复次数区间
        private static Pattern pattern08 = Pattern.compile("\\d{1,2}[/-]\\d{1,2}[/-]\\d{2,4}");
        // 至少重复多少次, 匹配大于100没有的价格
        private static Pattern pattern09 = Pattern.compile("\\$\\d{3,}");
    
        // 【贪婪模式】
        private static Pattern pattern11 = Pattern.compile("<[Bb]>.*</[Bb]>");
        // 防止过度匹配【懒惰模式】
        /**
         *  贪婪模式    懒惰模式
         *  *           *?
         *  +           +?
         *  {n,}        {n,}?
         */
        private static Pattern pattern10 = Pattern.compile("<[Bb]>.*?</[Bb]>");
    
        public static void main(String[] args) {
            String txt = "send personal email to ben@forta.com or ben.forta@forta.com. for questions " +
                    "about a book use support@forta.com or ben@urgent.forta.com. feel free to send" +
                    "unsolicited email to spam@forta.com (wouldn't it be" +
                    "nice if it were that simple, huh?). 88 1 + 6";
    
            Matcher matcher = pattern.matcher(txt);
            while (matcher.find())
                System.out.println(matcher.group());
            System.out.println("===================1====================");
    
            Matcher matcher02 = pattern02.matcher(txt);
            while (matcher02.find()) {
                System.out.println(matcher02.group());
            }
            System.out.println("===================2====================");
    
            Matcher matcher03 = pattern03.matcher(txt);
            while (matcher03.find())
                System.out.println(matcher03.group());
            System.out.println("===================3====================");
    
            Matcher matcher04 = pattern04.matcher(txt);
            while (matcher04.find())
                System.out.println(matcher04.group());
            System.out.println("===================4====================");
    
            String txt02 = "hello .ben@forta.com is my email address";
            Matcher matcher05 = pattern05.matcher(txt02);
            while (matcher05.find())
                System.out.println(matcher05.group());
            System.out.println("===================5====================");
    
            String txt03 = "the url is http://www.forta.com/, to connect" +
                    "securely use https://www.forta.com/ instead.";
            Matcher matcher1 = pattern06.matcher(txt03);
            while (matcher1.find())
                System.out.println(matcher1.group());
            System.out.println("===================6====================");
    
            String html = "<body bgcolor='#336633' text='#ffffff' width='666' height='444'>";
            Matcher matcher2 = pattern07.matcher(html);
            while (matcher2.find())
                System.out.println(matcher2.group());
            System.out.println("===================7====================");
    
            String txt05 = "4/8/03\n" +
                    "10-6-2004\n" +
                    "2/2/2\n" +
                    "01-01-01\n";
            Matcher matcher3 = pattern08.matcher(txt05);
            while (matcher3.find())
                System.out.println(matcher3.group());
            System.out.println("===================8====================");
    
            String price = "$496.80 $1290.69 $26.43 $613.42 $7.61 $414.90 $25.00";
            Matcher matcher4 = pattern09.matcher(price);
            while (matcher4.find())
                System.out.println(matcher4.group());
            System.out.println("===================9====================");
    
            String htm = "<B>i am a B</B> <b>i am a b too</b>";
            Matcher matcher5 = pattern11.matcher(htm);
            while (matcher5.find())
                System.out.println(matcher5.group());
            System.out.println("===================10====================");
    
            Matcher matcher6 = pattern10.matcher(htm);
            while (matcher6.find())
                System.out.println(matcher6.group());
        }
    }
    
    // output:
    //          ben@forta.com
    //          forta@forta.com
    //          support@forta.com
    //          ben@urgent.forta
    //          spam@forta.com
    //          ===================1====================
    //          88
    //          1
    //          6
    //          ===================2====================
    //          8
    //          8
    //          1
    //          +
    //          6
    //          ===================3====================
    //          ben@forta.com
    //          ben.forta@forta.com
    //          support@forta.com
    //          ben@urgent.forta.com
    //          spam@forta.com
    //          ===================4====================
    //          ben@forta.com
    //          ===================5====================
    //          http://www.forta.com/
    //          https://www.forta.com/
    //          ===================6====================
    //          #336633
    //          #ffffff
    //          ===================7====================
    //          4/8/03
    //          10-6-2004
    //          01-01-01
    //          ===================8====================
    //          $496
    //          $1290
    //          $613
    //          $414
    //          ===================9====================
    //          <B>i am a B</B> <b>i am a b too</b>
    //          ===================10====================
    //          <B>i am a B</B>
    //          <b>i am a b too</b>
    
    

    匹配范围

    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class TestRangeMatch {
    
        // pattern会匹配到cat.xls
        private static Pattern pattern = Pattern.compile(".a.\\.xls");
        // pattern02会匹配到usa1.xls
        private static Pattern pattern02 = Pattern.compile("[ns]a.\\.xls");
        // Pattern.MULTILINE表示多行匹配 等同于:Pattern.compile("(?m)^[ns]a.\\.xls")
        private static Pattern pattern03 = Pattern.compile("^[ns]a.\\.xls");
        // 这种用法适合局部忽略大小写的匹配
        private static Pattern pattern04 = Pattern.compile("[Rr]eg[eE]x");
    
        // [0123456789]可以缩写为[0-9] (从小到大),或者用特殊字符:\\d
        /* - 是特殊的元字符,作为元字符它只能用在[]中*/
        private static Pattern pattern05 = Pattern.compile("^[ns]a[0123456789]\\.xls", Pattern.MULTILINE);
    
        private static Pattern pattern06 = Pattern.compile("#[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]");
    
        // 取非匹配
        /* ^ 作用范围:集合里的所有字符或字符区间*/
        private static Pattern pattern07 = Pattern.compile("[ns]a[^0-9]\\.xls");
    
        public static void main(String[] args) {
            String fileLs = "sales1.xls\n" +
                    "orders3.xls\n" +
                    "sales2.xls\n" +
                    "sales.xls\n" +
                    "sales3.xls\n" +
                    "apac1.xls\n" +
                    "europe2.xls\n" +
                    "na1.xls\n" +
                    "na2.xls\n" +
                    "cat.xls\n" +
                    "usa1.xls\n" +
                    "sam.xls\n" +
                    "sa1.xls\n";
            Matcher matcher03 = pattern03.matcher(fileLs);
            while (matcher03.find()) {
                System.out.println(matcher03.group());
            }
    
            System.out.println("================");
    
            Matcher matcher = pattern05.matcher(fileLs);
            while (matcher.find()) {
                System.out.println(matcher.group());
            }
    
            String txt = "The phrase 'regular expression' is often abbreviated as RegEx or regex";
            Matcher matcher1 = pattern04.matcher(txt);
            while (matcher1.find())
                System.out.println(matcher1.group());
    
            String html = "<body bgcolor='#336633' text='#ffffff' width='666' height='444'>";
            Matcher matcher2 = pattern06.matcher(html);
            while (matcher2.find())
                System.out.println(matcher2.group());
    
            Matcher matcher3 = pattern07.matcher(fileLs);
            while (matcher3.find())
                System.out.println(matcher3.group());
        }
    }
    
    // output:
    //================
    //        na1.xls
    //        na2.xls
    //        sa1.xls
    //        RegEx
    //        regex
    //        #336633
    //        #ffffff
    //        sam.xls
    
    

    匹配位置

    import java.util.ArrayList;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class TestLocationMatch {
    
       private static Pattern pattern = Pattern.compile("cat");
       // \s会“消费”空格
       private static Pattern pattern1 = Pattern.compile("\\scat\\s");
       // 使用向后向前查找 定位消费内容
       private static Pattern pattern1_ref = Pattern.compile("(?<=\\s)cat(?=\\s)");
       // \b是单词边界(不会消费空格): \b匹配这样一个位置--位于能够用来构成单词的字符(字母,数字,下划线,也就是与\w匹配的字符)
        // 和一个不能用来构成单词的字符(也就是与\W匹配的字符)之间
        // \b只匹配一个位置,不匹配任何字符,所以"\bcat\b"匹配到的字符串长度是3(c,a,t)
       private static Pattern pattern2 = Pattern.compile("\\bcat\\b");
       private static Pattern pattern3 = Pattern.compile("\\bcap");
       private static Pattern pattern4 = Pattern.compile("cap\\b");
    
        // ^匹配整个字符串的开始位置 注意:^在正则字符集合起到求非作用
        private static Pattern pattern5 = Pattern.compile("<\\?xml.*?\\?>");
        private static Pattern pattern6 = Pattern.compile("\\s*^<\\?xml.*?\\?>");
        // $匹配整个字符串的结尾位置 例:web页面里,</html>标签后面不应该再有任何内容
        // 可以使用这个模式检查: </[Hh][Tt][Mm][Ll]>\s*$
    
        private static Pattern pattern7 = Pattern.compile("(?m)//.*$");
    
        public static void main(String[] args) {
            String txt = "the cat scattered his food all over the room";
    
            String txt2 = "the captain wore his cap and cape proudly as \n" +
                    "he sat listening to the recap of how his" +
                    "crew saved the men from a capsized vessel";
    
            System.out.println(matchAll(pattern, txt));
            System.out.println("=====================1========================");
            System.out.println("【有空格】" + matchAll(pattern1, txt));
            System.out.println("=====================2========================");
            System.out.println("【无空格】" + matchAll(pattern1_ref, txt));
            System.out.println("=====================3========================");
            System.out.println("【无空格】" + matchAll(pattern2, txt));
            System.out.println("=====================4========================");
            System.out.println(replaceAll(pattern3, txt2, "@@@"));
            System.out.println("=====================5========================");
    
            // 如果不对$转义,将会抛出异常:Exception in thread "main" java.lang.IllegalArgumentException: Illegal group reference
            /**
             * https://blog.csdn.net/qq_37502106/article/details/88642840
             * 可以看到这里面对“$”符号和"\\"符号进行了处理。出现以上错误的原因是:String的replaceAll(regex, replacement)方法的第一个参数支持正则表达式,
             * 如果参数replacement中出现符号“$”,会按照$1$2的分组模式进行匹配。当编译器发现“$”后跟的不是整数的时候,就会抛出“Illegal group reference”的异常。
             *
             * 处理办法:用JDK提供的方法,对特殊字符进行处理:
             * replacement = java.util.regex.Matcher.quoteReplacement(replacement);
             * 或者手动转义:\\$\\$\\$
             */
            System.out.println(replaceAll(pattern4, txt2, Matcher.quoteReplacement("$$$")));
            System.out.println("=====================6========================");
    
            String txt3 = "this xml file has a bad start\n" +
                    "  <?xml version='1.0' encoding='UTF-8' ?> \n" +
                    "<beans xmlns=\"http://www.springframework.org/schema/beans\"\n" +
                    "    xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
                    "    xsi:schemaLocation=\"http://www.springframework.org/schema/beans\n" +
                    "                        http://www.springframework.org/schema/beans/spring-beans.xsd\">\n" +
                    "</beans>";
    
            System.out.println(matchAll(pattern5, txt3).size() == 1 ? "合格xml" : "不合格xml");
            System.out.println("=====================7========================");
            System.out.println(matchAll(pattern6, txt3).size() == 1 ? "合格xml" : "不合格xml");
            System.out.println("=====================8========================");
    
            String js = "ar URL= {\n" +
                    "            addBatchTemplate: '/myApp/addBatchTemplate',//添加模板\n" +
                    "            getBackDeviceTypeList:'/myApp/switcher-device-template/getFrontDeviceTypeList', //获取型号列表\n" +
                    "            loadConfig: '/myApp/loadConfig',//post 下发配置\n" +
                    "            isInvalidEdit: '/myApp/isInvalidEdit',//权限判断\n" +
                    "            isInvalidConfig: '/myApp/isInvalidConfig',//权限判断\n" +
                    "            getBatchTemplateInfo:'/myApp/getBatchTemplateInfo', //查询信息\n" +
                    "        };\n";
            System.out.println(matchAll(pattern7, js));
        }
    
        private static List<String> matchAll(Pattern pattern, String source) {
            List<String> matchLs = new ArrayList<>(10);
            Matcher matcher = pattern.matcher(source);
            while (matcher.find())
                matchLs.add(matcher.group());
            return matchLs;
        }
    
        private static String replaceAll(Pattern pattern, String source, String replaceStr) {
            /**
             * String的replaceAll也是使用正则:Pattern.compile(regex).matcher(this).replaceAll(replacement);
             * 而正则replaceAll的底层使用sb拼接结果: return text.toString(); 所以不会对源字符串造成副作用
             */
            Matcher matcher = pattern.matcher(source);
            return matcher.replaceAll(replaceStr);
        }
    }
    
    // output:
    //        [cat, cat]
    //        =====================1========================
    //        【有空格】[ cat ]
    //        =====================2========================
    //        【无空格】[cat]
    //        =====================3========================
    //        【无空格】[cat]
    //        =====================4========================
    //        the @@@tain wore his @@@ and @@@e proudly as
    //        he sat listening to the recap of how hiscrew saved the men from a @@@sized vessel
    //        =====================5========================
    //        the captain wore his $$$ and cape proudly as
    //        he sat listening to the re$$$ of how hiscrew saved the men from a capsized vessel
    //        =====================6========================
    //        合格xml
    //        =====================7========================
    //        不合格xml
    //        =====================8========================
    //        [//添加模板, //获取型号列表, //post 下发配置, //权限判断, //权限判断, //查询信息]
    

    子表达式

    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class TestSubExp {
    
        // &nbsp;{2,} 只能匹配 &nbsp;;;;;; 【因为{2,}只作用于前一个元素】
        private static Pattern pattern = Pattern.compile("&nbsp;{2,}");
        // 【子表达式限定作用】(&nbsp;){2,} 可以匹配 &nbsp;&nbsp; ()括起来的就是一个子表达式
        private static Pattern pattern2 = Pattern.compile("(&nbsp;){2,}");
    
        // 粗略匹配ip
        private static Pattern pattern3 = Pattern.compile("\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3}");
        // 【子表达式简化作用】
        private static Pattern pattern4 = Pattern.compile("(\\d{1,3}.){3}\\d{1,3}");
    
        // "19|20\\d{2}"只会匹配出 19 或 20XX, 因为|把位于它左边和右边的两个部分都作为一个整体看待, todo: |优先级最低?
        // 即:(19)|(20\\d{2}), 所以要匹配出正确的出生年份应该用"(19|20)\\d{2}"
        private static Pattern pattern5 = Pattern.compile("19|20\\d{2}");
        // 【子表达式去提高优先级作用】
        private static Pattern pattern6 = Pattern.compile("(19|20)\\d{2}");
    
        // 精确匹配ip
    
        /**
         * 上面用"(\\d{1,3}.){3}\\d{1,3}"粗略匹配ip是有问题的,这个模式会匹配出不合法的ip, 例:666.77.8.999
         * 合法的ip是由4个字节组成(对应逗号隔开的4组数字),一个字节的所表示范围是0~255,即ip中的每组数字都是0~255
         * 正则不能够直接表示数字范围,所以需要将ip取值限制 转成 正则能够表示的规则:
         *
         *  (一)任何一个1位或2位数字      【1~99】  (\d{1,2})
         *  (二)任何一个以1开头的3位数字   【100~199】    (1\d{2})
         *  (三)任何一个以2开头、第2位在0~4的3位数字    【200~249】   (2[0-4]\d)
         *  (四)任何一个以25开头、第3位在0~5的3位数字   【250~255】   (25[0-5])
         *
         *  三四不能用2[0-5][0-5]表达: 因为匹配不到206 207 208 209
         */
        static String quarter = "(\\d{1,2})|(1\\d{2})|(2[0-4]\\d)|(25[0-5])";
    
        private static Pattern pattern7 = Pattern.compile("(("+quarter+")\\.){3}("+quarter+")");
    
        /*public static void main(String[] args) {
            Pattern compile = Pattern.compile("2[0-5][0-5]");
            for (int i = 200; i < 299; i++) {
                if (!compile.asPredicate().test(String.valueOf(i))) {
                    System.out.println(i);
                }
            }
        }*/
    
        public static void main(String[] args) {
    
            String s = "hello, my name is mike&nbsp;jordan, and i am" +
                    "the best basketball player, rank No&nbsp;&nbsp;1";
    
            System.out.println(replaceAll(pattern, s, "空格"));
            System.out.println("=====================1========================");
            System.out.println(replaceAll(pattern2, s, "空格"));
            System.out.println("=====================2========================");
    
            String s1 = "ping 12.159.46.200 ... ";
            System.out.println(matchAll(pattern3, s1));
            System.out.println("=====================3========================");
            System.out.println(matchAll(pattern4, s1));
            System.out.println("=====================4========================");
    
            String s2 = "ID: 042" +
                    "SEX: M" +
                    "BIRTH: 1967-08-17" +
                    "DIE: 2067-08-17" +
                    "STATUS: Active";
            System.out.println(matchAll(pattern5, s2));
            System.out.println("=====================5========================");
            System.out.println(matchAll(pattern6, s2));
            System.out.println("=====================6========================");
    
            String s3 = "illegal ip: 666.77.8.999" +
                    "legal ip: 12.159.46.200";
            System.out.println(matchAll(pattern7, s3));
            System.out.println("=====================7========================");
        }
    
        private static List<String> matchAll(Pattern pattern, String source) {
            List<String> matchLs = new ArrayList<>(10);
            Matcher matcher = pattern.matcher(source);
            while (matcher.find())
                matchLs.add(matcher.group());
            return matchLs;
        }
    
        private static String replaceAll(Pattern pattern, String source, String replaceStr) {
            /**
             * String的replaceAll也是使用正则:Pattern.compile(regex).matcher(this).replaceAll(replacement);
             * 而正则replaceAll的底层使用sb拼接结果: return text.toString(); 所以不会对源字符串造成副作用
             */
            Matcher matcher = pattern.matcher(source);
            return matcher.replaceAll(replaceStr);
        }
    }
    
    // output:
    //        hello, my name is mike&nbsp;jordan, and i amthe best basketball player, rank No&nbsp;&nbsp;1
    //        =====================1========================
    //        hello, my name is mike&nbsp;jordan, and i amthe best basketball player, rank No空格1
    //        =====================2========================
    //        [12.159.46.200]
    //        =====================3========================
    //        [12.159.46.200]
    //        =====================4========================
    //        [19, 2067]
    //        =====================5========================
    //        [1967, 2067]
    //        =====================6========================
    //        [66.77.8.99, 12.159.46.20]
    //        =====================7========================
    

    前后查找

    import java.util.ArrayList;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class TestLookAround {
    
        // 匹配标签+内容
        private static Pattern pattern = Pattern.compile("<(title)>(.*)</\\1>", Pattern.CASE_INSENSITIVE);
        // 匹配内容
        private static Pattern pattern2 = Pattern.compile("(?<=<(title)>).*(?=</\\1>)", Pattern.CASE_INSENSITIVE);
        // 匹配价格
        private static Pattern pattern3 = Pattern.compile("[0-9.]+");
    
        private static Pattern pattern4 = Pattern.compile("\\$[0-9.]+");
        // 向后查找
        private static Pattern pattern5 = Pattern.compile("(?<=\\$)[0-9.]+");
    
        // (?<=<(title)>).*(?=</\1>) 用来分隔向后查找和向前查找的()不算子表达式组
        public static void main01(String[] args) {
            String source = "<HEAD>" +
                    "<TITLE>BEN FORTA'S HOMEPAGE</title>" +
                    "</HEAD>";
            Matcher matcher = pattern2.matcher(source);
            while (matcher.find()) {
                System.out.println(matcher.group(0));
                System.out.println(matcher.group(1));
    //            System.out.println(matcher.group(2));
            }
    
            System.out.println("======");
    
            // 关于group分层
            Pattern pattern = Pattern.compile("((<(title)>)(.*)(</(title)>))", Pattern.CASE_INSENSITIVE);
            matcher = pattern.matcher(source);
            while (matcher.find()) {
                System.out.println(matcher.group(0));
                System.out.println(matcher.group(1)); // 最外层()
                System.out.println(matcher.group(2)); // 第二层第1个()
                System.out.println(matcher.group(3)); // 第三层第1个()
                System.out.println(matcher.group(4)); // 第二层第2个()
                System.out.println(matcher.group(5)); // 第二层第3个()
                System.out.println(matcher.group(6)); // 第三层第2个()
            }
        }
    
        public static void main(String[] args) {
            String s = "<HEAD>" +
                    "<TITLE>BEN FORTA'S HOMEPAGE</TITLE>" +
                    "</HEAD>";
            // 获取匹配的整个结果
            System.out.println(matchAll(pattern, s));
            System.out.println("=====================1========================");
            // 获取匹配的结果中,某个子表达式的匹配部分(子表达式)
            System.out.println(matchTagContent(pattern, s));
            System.out.println("=====================2========================");
            // 前后查找匹配
            System.out.println(matchAll(pattern2, s));
            System.out.println("=====================3========================");
    
            String s1 = "ABC01: $23.45" +
                    "HGG42: $5.31" +
                    "CFMX1: $899.00" +
                    "XTC99: $69.96" +
                    "Total items found: 4";
            System.out.println(matchAll(pattern3, s1));
            System.out.println("=====================4========================");
            System.out.println(matchAll(pattern4, s1));
            System.out.println("=====================5========================");
            System.out.println(matchAll(pattern5, s1));
        }
    
        private static List<String> matchTagContent(Pattern pattern, String source) {
            List<String> matchLs = new ArrayList<>(10);
            Matcher matcher = pattern.matcher(source);
            while (matcher.find()) {
                // matcher.group(int subExpIdx)
                matchLs.add(matcher.group(2));
            }
            return matchLs;
        }
    
        private static List<String> matchAll(Pattern pattern, String source) {
            List<String> matchLs = new ArrayList<>(10);
            Matcher matcher = pattern.matcher(source);
            while (matcher.find())
                matchLs.add(matcher.group());
            return matchLs;
        }
    
        private static String replaceAll(Pattern pattern, String source, String replaceStr) {
            /**
             * String的replaceAll也是使用正则:Pattern.compile(regex).matcher(this).replaceAll(replacement);
             * 而正则replaceAll的底层使用sb拼接结果: return text.toString(); 所以不会对源字符串造成副作用
             */
            Matcher matcher = pattern.matcher(source);
            return matcher.replaceAll(replaceStr);
        }
    }
    
    // output:
    //        [<TITLE>BEN FORTA'S HOMEPAGE</TITLE>]
    //        =====================1========================
    //        [BEN FORTA'S HOMEPAGE]
    //        =====================2========================
    //        [BEN FORTA'S HOMEPAGE]
    //        =====================3========================
    //        [01, 23.45, 42, 5.31, 1, 899.00, 99, 69.96, 4]
    //        =====================4========================
    //        [$23.45, $5.31, $899.00, $69.96]
    //        =====================5========================
    //        [23.45, 5.31, 899.00, 69.96]
    

    回溯引用

    import java.util.ArrayList;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class TestBackTraceRef {
    
        // 匹配一级标题: <[Hh]1>.*?</[Hh]1>
        private static Pattern pattern = Pattern.compile("<h1>.*?</h1>", Pattern.CASE_INSENSITIVE);
    
        // 匹配任意级别标题: <[Hh][1-6]>.*?</[Hh][1-6]>
        private static Pattern pattern1 = Pattern.compile("<h[1-6]>.*?</h[1-6]>", Pattern.CASE_INSENSITIVE);
    
        // 回溯引用去掉不合法标签: <[Hh]([1-6])>.*?</[Hh]\1> 【回溯引用只能用来引用子表达式】
        private static Pattern pattern2 = Pattern.compile("<(h[1-6])>.*?</\\1>", Pattern.CASE_INSENSITIVE);
    
        // 回溯引用匹配重复单词
        private static Pattern pattern3 = Pattern.compile("\\s(\\w+)\\s\\1");
    
        // 替换中使用回溯
        private static Pattern pattern4 = Pattern.compile("(\\w+[\\w.]*@[\\w.]+\\.\\w+)");
        private static Pattern pattern5 = Pattern.compile("(\\d{3})-(\\d{3})-(\\d{4})");
    
        public static void main(String[] args) {
            String html = "<BODY>" +
                    "<H1>welcome to general expression lesson</H1>" +
                    "<H2>regexp is a good tool to handle string</H2>" +
                    "<H2>you can be a regex master</H2>" +
                    "<H2>this is not valid</H3>" +
                    "</BODY>";
            System.out.println(matchAll(pattern, html));
            System.out.println("=====================1========================");
            System.out.println(matchAll(pattern1, html));
            System.out.println("=====================2========================");
            System.out.println(matchAll(pattern2, html));
            System.out.println("=====================3========================");
    
            String s = "this is a block of of text," +
                    "several words here are are" +
                    "repeated, and and they" +
                    "should not be";
            System.out.println(matchAll(pattern3, s));
            System.out.println("=====================4========================");
    
            /**
             * 在用于替换的字符串中:通过$1,$2引用子表达式匹配结果
             */
            String s1 = "hi, ben@forta.com is my email address";
            System.out.println(replaceAll(pattern4, s1, "<A HREF='mailto:$1'>$1</A>"));
            System.out.println("=====================5========================");
    
            String s2 = "313-555-1234 \n" +
                    "248-555-9999 \n" +
                    "810-555-9000 \n";
            System.out.println(replaceAll(pattern5, s2, "($1) $2-$3"));
        }
    
        private static List<String> matchAll(Pattern pattern, String source) {
            List<String> matchLs = new ArrayList<>(10);
            Matcher matcher = pattern.matcher(source);
            while (matcher.find())
                matchLs.add(matcher.group());
            return matchLs;
        }
    
        private static String replaceAll(Pattern pattern, String source, String replaceStr) {
            /**
             * String的replaceAll也是使用正则:Pattern.compile(regex).matcher(this).replaceAll(replacement);
             * 而正则replaceAll的底层使用sb拼接结果: return text.toString(); 所以不会对源字符串造成副作用
             */
            Matcher matcher = pattern.matcher(source);
            return matcher.replaceAll(replaceStr);
        }
    }
    
    // output:
    //        [<H1>welcome to general expression lesson</H1>]
    //        =====================1========================
    //        [<H1>welcome to general expression lesson</H1>, <H2>regexp is a good tool to handle string</H2>, <H2>you can be a regex master</H2>, <H2>this is not valid</H3>]
    //        =====================2========================
    //        [<H1>welcome to general expression lesson</H1>, <H2>regexp is a good tool to handle string</H2>, <H2>you can be a regex master</H2>]
    //        =====================3========================
    //        [ of of,  are are,  and and]
    //        =====================4========================
    //        hi, <A HREF='mailto:ben@forta.com'>ben@forta.com</A> is my email address
    //        =====================5========================
    //        (313) 555-1234
    //        (248) 555-9999
    //        (810) 555-9000 
    

    特殊替换

    import java.util.regex.Matcher;
            import java.util.regex.Pattern;
    
    public class SpecialReplace {
        public static void main(String[] args) {
            Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
            Matcher m = p.matcher("java Java JAVA JAva I love Java and you ?");
            StringBuffer sb = new StringBuffer();
            int index = 1;
            while(m.find()){
                // 当前成功匹配,是偶数次,就替换为小写,否则大写
                m.appendReplacement(sb, (index++ & 1) == 0 ? "java" : "JAVA");
            }
            m.appendTail(sb);//把剩余的字符串加入
            System.out.println(sb);
        }
    }
    
    // output:
    //          JAVA java JAVA java I love JAVA and you ?
    

    多行匹配

    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class TestMultilineMatch {
    
        public static void main(String[] args) {
            String fileLs = "sales1.xls\n" +
                    "orders3.xls\n" +
                    "sales2.xls\n" +
                    "sales.xls\n" +
                    "sales3.xls\n" +
                    "apac1.xls\n" +
                    "europe2.xls\n" +
                    "na1.xls\n" +
                    "na2.xls\n" +
                    "cat.xls\n" +
                    "usa1.xls\n" +
                    "sam.xls\n" +
                    "sa1.xls\n";
    
            // Pattern.MULTILINE表示多行匹配 等同于:Pattern.compile("(?m)^[ns]a.\\.xls")
            Pattern pattern = Pattern.compile("^[ns]a.\\.xls", Pattern.MULTILINE);
            Matcher matcher = pattern.matcher(fileLs);
            String replaceResult = matcher.replaceAll("多行匹配");
            System.out.println(replaceResult);
    
            Pattern pattern02 = Pattern.compile("^[ns]a.\\.xls");
            Matcher matcher02 = pattern02.matcher(fileLs);
            String replaceResult02 = matcher02.replaceAll("整串匹配");
            System.out.println(replaceResult02);
        }
    }
    
    // output:
    //        sales1.xls
    //        orders3.xls
    //        sales2.xls
    //        sales.xls
    //        sales3.xls
    //        apac1.xls
    //        europe2.xls
    //        多行匹配
    //        多行匹配
    //        cat.xls
    //        usa1.xls
    //        多行匹配
    //        多行匹配
    //
    //        sales1.xls
    //        orders3.xls
    //        sales2.xls
    //        sales.xls
    //        sales3.xls
    //        apac1.xls
    //        europe2.xls
    //        na1.xls
    //        na2.xls
    //        cat.xls
    //        usa1.xls
    //        sam.xls
    //        sa1.xls
    
    

    贪婪模式

    import java.util.regex.MatchResult;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class TestGreedyAndLazyMode {
        public static void main(String[] args) {
            String string = "<books><book>西游记</book><book>三国演义</book><book>水浒传</book></books>";
    
            Pattern pattern = Pattern.compile("<(book)>.*</\\1>", Pattern.CASE_INSENSITIVE);
            Matcher matcher = pattern.matcher(string);
            String replaceResult = matcher.replaceFirst("贪婪模式");
            System.out.println(replaceResult);
    
            Pattern pattern02 = Pattern.compile("<(book)>.*?</\\1>");
            Matcher matcher02 = pattern02.matcher(string);
            String replaceResult02 = matcher02.replaceFirst("懒惰模式");
            System.out.println(replaceResult02);
        }
    }
    
    // output: 
    //      <books>贪婪模式</books>
    //      <books>懒惰模式<book>三国演义</book><book>水浒传</book></books>
    
    

    纯文本和点

    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class TestPureTextAndDot {
    
        private static final Pattern pattern = Pattern.compile("Ben");
        private static final Pattern pattern02 = Pattern.compile("my");
        /**正则默认是大小写敏感的,预编译模式时,可以指定大小写不敏感选项*/
        private static final Pattern pattern03 = Pattern.compile("ben", Pattern.CASE_INSENSITIVE);
    
        /**.在正则中可以匹配任意一个字符:字符,数字,字母包括.自身【除了换行符】(SQL中有相同功能的是_); */
        private static final Pattern pattern04 = Pattern.compile("sales.");
        private static final Pattern pattern05 = Pattern.compile(".a.\\.xls");
    
        public static void main(String[] args) {
            String txt = "Hello, my name is Ben. Please visit my01 website at http://www.forta.com/.";
            Matcher matcher = pattern03.matcher(txt);
            while (matcher.find()) {
                System.out.println(matcher.group());
            }
    
            String fileLs = "sales1.xls\n" +
                            "orders3.xls\n" +
                            "sales2.xls\n" +
                            "sales.xls\n" +
                            "sales3.xls\n" +
                            "apac1.xls\n" +
                            "europe2.xls\n" +
                            "na1.xls\n" +
                            "na2.xls\n" +
                            "sa1.xls\n";
            Matcher matcher1 = pattern04.matcher(fileLs);
            while (matcher1.find()) {
                System.out.println(matcher1.group());
            }
    
            System.out.println("==================================================================");
    
            Matcher matcher2 = pattern05.matcher(fileLs);
            while (matcher2.find()) {
                System.out.println(matcher2.group());
            }
        }
    }
    
    // output:
    //        Ben
    //        sales1
    //        sales2
    //        sales.
    //        sales3
    //        ==================================================================
    //        na1.xls
    //        na2.xls
    //        sa1.xls
    
    

    java正则api

    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class MathcesAndFindDiff {
        public static void main1(String[] args){
            Pattern pattern = Pattern.compile("\\d{3,5}");
            String charSequence = "123-34345-234-00";
            Matcher matcher = pattern.matcher(charSequence);
    
            //虽然匹配失败,但由于charSequence里面的"123"和pattern是匹配的,所以下次的匹配从位置4开始
            print(matcher.matches());  //虽然整个匹配失败了,但是有部分匹配(123,所以于是重置了下次匹配的位置从3开始),所以后面调用matcher.find()的时候,调用matcher.start()匹配成功是从4开始
            System.out.println("==============================================");
            //测试匹配位置
            matcher.find();
            print(matcher.start());   //如果注释掉前面的print(matcher.matches());,这里会打印出0
            System.out.println("==============================================");
    
            //使用reset方法重置匹配位置
            matcher.reset();
    
            //第一次find匹配以及匹配的目标和匹配的起始位置
            print(matcher.find());
            System.out.println("=====================1========================");
            print(matcher.group()+" - "+matcher.start());
            System.out.println("=====================2========================");
            //第二次find匹配以及匹配的目标和匹配的起始位置
            print(matcher.find());
            System.out.println("=====================3========================");
            print(matcher.group()+" - "+matcher.start());
            System.out.println("=====================4========================");
    
            /**
             * matcher.lookingAt() : Attempts to match the input sequence, starting at the beginning of the
             *      * region, against the pattern (只进行一次前缀匹配)
             */
            //第一次lookingAt匹配以及匹配的目标和匹配的起始位置
            print(matcher.lookingAt());
            System.out.println("=====================5========================");
            print(matcher.group()+" - "+matcher.start());
            System.out.println("=====================6========================");
    
            //第二次lookingAt匹配以及匹配的目标和匹配的起始位置
            print(matcher.lookingAt());
            System.out.println("=====================7========================");
            print(matcher.group()+" - "+matcher.start());
            System.out.println("=====================8========================");
        }
    
        private static void print(Object o){
            System.out.println(o);
        }
    
    
    
    
        public static void main(String[] args) {
            String input = "I dream of engines\nmore engines, all day long";
            System.out.println("INPUT:" + input);
            System.out.println();
            String[] patt = {"engines.more engines", "ines\nmore", "engines$"};
            for (int i = 0; i < patt.length; i++) {
                System.out.println("PATTERN:" + patt[i]);
                boolean found;
                Pattern p1l = Pattern.compile(patt[i]);
                found = p1l.matcher(input).find();
                System.out.println("DEFAULT match " + found);
                // .代表任何符号(DOT ALL),
                Pattern pml = Pattern.compile(patt[i], Pattern.DOTALL | Pattern.MULTILINE);
                found = pml.matcher(input).find();
                System.out.println("Multiline match " + found);
                System.out.println();
            }
        }
    }
    
    // output 1:
    //        false
    //        ==============================================
    //        4
    //        ==============================================
    //        true
    //        =====================1========================
    //        123 - 0
    //        =====================2========================
    //        true
    //        =====================3========================
    //        34345 - 4
    //        =====================4========================
    //        true
    //        =====================5========================
    //        123 - 0
    //        =====================6========================
    //        true
    //        =====================7========================
    //        123 - 0
    //        =====================8========================
    
    // output2:
    //        INPUT:I dream of engines
    //        more engines, all day long
    //
    //        PATTERN:engines.more engines
    //        DEFAULT match false
    //        Multiline match true
    //
    //        PATTERN:ines
    //        more
    //        DEFAULT match true
    //        Multiline match true
    //
    //        PATTERN:engines$
    //        DEFAULT match false
    //        Multiline match true
    

    相关文章

      网友评论

          本文标题:Java正则

          本文链接:https://www.haomeiwen.com/subject/azktchtx.html