元字符
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestMetaChar {
// 对元字符进行转义
private static Pattern pattern = Pattern.compile("myArray\\[0\\]");
// 匹配 \ (java里\需要用\\表示)
private static Pattern pattern02 = Pattern.compile("\\\\");
// 匹配空白字符 \r\n 是 回车+换行 todo 匹配不到?
private static Pattern pattern03 = Pattern.compile("\r\n\r\n", Pattern.MULTILINE);
// 简写元字符:
/* \d 即 [0-9]; \D 即 [^0-9] 【注意:从小到大】*/
/* \w 即 [a-zA-Z0-9_]; \W 即 [^a-zA-Z0-9_]*/
/* \s 即 [\f\n\r\t\v]; \S 即 [^\f\n\r\t\v]*/
// 这个模式匹配不出myArray[10], 需要加上重复次数才行:myArray\[\d+\]
private static Pattern pattern04 = Pattern.compile("myArray\\[\\d\\]");
// 这个模式匹配的字符串,必须是6个字符,所以5个纯数字不会匹配(\w\d\w\d\w\d?可以匹配5个纯数字)
private static Pattern pattern05 = Pattern.compile("\\w\\d\\w\\d\\w\\d");
public static void main(String[] args) {
String js = "var myArray = new Array(); \n" +
"... \n" +
"if (myArray[0] == 0 || myArray[10] == 10) { \n" +
"... \n" +
"}";
Matcher matcher = pattern.matcher(js);
if (matcher.find())
System.out.println(matcher.group());
String url = "\\home\\ben\\sales";
Matcher matcher1 = pattern02.matcher(url);
while (matcher1.find())
System.out.println(matcher1.group());
String txt = "101, ben forta " +
"102, jim james " +
" " +
"103, roberta robertson ";
Matcher matcher2 = pattern03.matcher(txt);
while (matcher2.find())
System.out.println(matcher2.group() + "ok");
Matcher matcher3 = pattern04.matcher(js);
while (matcher3.find())
System.out.println(matcher3.group());
String txt2 = "11213 a1c2e3 48075 48237 m1b4f2 90046 h1h2h3 123456";
Matcher matcher4 = pattern05.matcher(txt2);
while (matcher4.find())
System.out.println(matcher4.group());
}
}
// output:
// myArray[0]
// \
// \
// \
// myArray[0]
// a1c2e3
// m1b4f2
// h1h2h3
// 123456
匹配次数
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestRepeatMatch {
// "\\w+@\\w+.\\w+"模式匹配不了 ben.forta@forta.com 因为\w是[a-zA-Z0-9_],只能匹配数字字母下划线
private static Pattern pattern = Pattern.compile("\\w+@\\w+.\\w+");
private static Pattern pattern04 = Pattern.compile("[\\w.]+@[\\w.]+\\w+");
//[0-9]+匹配一个或多个数字,等效于\d+ [0-9+]匹配一个数字或者一个+【其实匹配+最好是进行转义\+, 但是在正则的字符集合里面好像可以不转义】
// + 等效于 {1,}
private static Pattern pattern02 = Pattern.compile("[0-9]+");
private static Pattern pattern03 = Pattern.compile("[0-9+]");
// 如果首字符是. 则不是合法的邮箱,pattern04会匹配出.ben@forta.com, 所以改造为pattern05 【* 匹配零次或多次 等价于 {0,}】
private static Pattern pattern05 = Pattern.compile("\\w+[\\w.]*@[\\w.]+\\w+");
// ? 匹配0次【false】或1【true】次 等价于{0,1}
private static Pattern pattern06 = Pattern.compile("https?://[\\w./]+");
// 精确重复次数
private static Pattern pattern07 = Pattern.compile("#[\\da-fA-F]{6}");
// 重复次数区间
private static Pattern pattern08 = Pattern.compile("\\d{1,2}[/-]\\d{1,2}[/-]\\d{2,4}");
// 至少重复多少次, 匹配大于100没有的价格
private static Pattern pattern09 = Pattern.compile("\\$\\d{3,}");
// 【贪婪模式】
private static Pattern pattern11 = Pattern.compile("<[Bb]>.*</[Bb]>");
// 防止过度匹配【懒惰模式】
/**
* 贪婪模式 懒惰模式
* * *?
* + +?
* {n,} {n,}?
*/
private static Pattern pattern10 = Pattern.compile("<[Bb]>.*?</[Bb]>");
public static void main(String[] args) {
String txt = "send personal email to ben@forta.com or ben.forta@forta.com. for questions " +
"about a book use support@forta.com or ben@urgent.forta.com. feel free to send" +
"unsolicited email to spam@forta.com (wouldn't it be" +
"nice if it were that simple, huh?). 88 1 + 6";
Matcher matcher = pattern.matcher(txt);
while (matcher.find())
System.out.println(matcher.group());
System.out.println("===================1====================");
Matcher matcher02 = pattern02.matcher(txt);
while (matcher02.find()) {
System.out.println(matcher02.group());
}
System.out.println("===================2====================");
Matcher matcher03 = pattern03.matcher(txt);
while (matcher03.find())
System.out.println(matcher03.group());
System.out.println("===================3====================");
Matcher matcher04 = pattern04.matcher(txt);
while (matcher04.find())
System.out.println(matcher04.group());
System.out.println("===================4====================");
String txt02 = "hello .ben@forta.com is my email address";
Matcher matcher05 = pattern05.matcher(txt02);
while (matcher05.find())
System.out.println(matcher05.group());
System.out.println("===================5====================");
String txt03 = "the url is http://www.forta.com/, to connect" +
"securely use https://www.forta.com/ instead.";
Matcher matcher1 = pattern06.matcher(txt03);
while (matcher1.find())
System.out.println(matcher1.group());
System.out.println("===================6====================");
String html = "<body bgcolor='#336633' text='#ffffff' width='666' height='444'>";
Matcher matcher2 = pattern07.matcher(html);
while (matcher2.find())
System.out.println(matcher2.group());
System.out.println("===================7====================");
String txt05 = "4/8/03\n" +
"10-6-2004\n" +
"2/2/2\n" +
"01-01-01\n";
Matcher matcher3 = pattern08.matcher(txt05);
while (matcher3.find())
System.out.println(matcher3.group());
System.out.println("===================8====================");
String price = "$496.80 $1290.69 $26.43 $613.42 $7.61 $414.90 $25.00";
Matcher matcher4 = pattern09.matcher(price);
while (matcher4.find())
System.out.println(matcher4.group());
System.out.println("===================9====================");
String htm = "<B>i am a B</B> <b>i am a b too</b>";
Matcher matcher5 = pattern11.matcher(htm);
while (matcher5.find())
System.out.println(matcher5.group());
System.out.println("===================10====================");
Matcher matcher6 = pattern10.matcher(htm);
while (matcher6.find())
System.out.println(matcher6.group());
}
}
// output:
// ben@forta.com
// forta@forta.com
// support@forta.com
// ben@urgent.forta
// spam@forta.com
// ===================1====================
// 88
// 1
// 6
// ===================2====================
// 8
// 8
// 1
// +
// 6
// ===================3====================
// ben@forta.com
// ben.forta@forta.com
// support@forta.com
// ben@urgent.forta.com
// spam@forta.com
// ===================4====================
// ben@forta.com
// ===================5====================
// http://www.forta.com/
// https://www.forta.com/
// ===================6====================
// #336633
// #ffffff
// ===================7====================
// 4/8/03
// 10-6-2004
// 01-01-01
// ===================8====================
// $496
// $1290
// $613
// $414
// ===================9====================
// <B>i am a B</B> <b>i am a b too</b>
// ===================10====================
// <B>i am a B</B>
// <b>i am a b too</b>
匹配范围
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestRangeMatch {
// pattern会匹配到cat.xls
private static Pattern pattern = Pattern.compile(".a.\\.xls");
// pattern02会匹配到usa1.xls
private static Pattern pattern02 = Pattern.compile("[ns]a.\\.xls");
// Pattern.MULTILINE表示多行匹配 等同于:Pattern.compile("(?m)^[ns]a.\\.xls")
private static Pattern pattern03 = Pattern.compile("^[ns]a.\\.xls");
// 这种用法适合局部忽略大小写的匹配
private static Pattern pattern04 = Pattern.compile("[Rr]eg[eE]x");
// [0123456789]可以缩写为[0-9] (从小到大),或者用特殊字符:\\d
/* - 是特殊的元字符,作为元字符它只能用在[]中*/
private static Pattern pattern05 = Pattern.compile("^[ns]a[0123456789]\\.xls", Pattern.MULTILINE);
private static Pattern pattern06 = Pattern.compile("#[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]");
// 取非匹配
/* ^ 作用范围:集合里的所有字符或字符区间*/
private static Pattern pattern07 = Pattern.compile("[ns]a[^0-9]\\.xls");
public static void main(String[] args) {
String fileLs = "sales1.xls\n" +
"orders3.xls\n" +
"sales2.xls\n" +
"sales.xls\n" +
"sales3.xls\n" +
"apac1.xls\n" +
"europe2.xls\n" +
"na1.xls\n" +
"na2.xls\n" +
"cat.xls\n" +
"usa1.xls\n" +
"sam.xls\n" +
"sa1.xls\n";
Matcher matcher03 = pattern03.matcher(fileLs);
while (matcher03.find()) {
System.out.println(matcher03.group());
}
System.out.println("================");
Matcher matcher = pattern05.matcher(fileLs);
while (matcher.find()) {
System.out.println(matcher.group());
}
String txt = "The phrase 'regular expression' is often abbreviated as RegEx or regex";
Matcher matcher1 = pattern04.matcher(txt);
while (matcher1.find())
System.out.println(matcher1.group());
String html = "<body bgcolor='#336633' text='#ffffff' width='666' height='444'>";
Matcher matcher2 = pattern06.matcher(html);
while (matcher2.find())
System.out.println(matcher2.group());
Matcher matcher3 = pattern07.matcher(fileLs);
while (matcher3.find())
System.out.println(matcher3.group());
}
}
// output:
//================
// na1.xls
// na2.xls
// sa1.xls
// RegEx
// regex
// #336633
// #ffffff
// sam.xls
匹配位置
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestLocationMatch {
private static Pattern pattern = Pattern.compile("cat");
// \s会“消费”空格
private static Pattern pattern1 = Pattern.compile("\\scat\\s");
// 使用向后向前查找 定位消费内容
private static Pattern pattern1_ref = Pattern.compile("(?<=\\s)cat(?=\\s)");
// \b是单词边界(不会消费空格): \b匹配这样一个位置--位于能够用来构成单词的字符(字母,数字,下划线,也就是与\w匹配的字符)
// 和一个不能用来构成单词的字符(也就是与\W匹配的字符)之间
// \b只匹配一个位置,不匹配任何字符,所以"\bcat\b"匹配到的字符串长度是3(c,a,t)
private static Pattern pattern2 = Pattern.compile("\\bcat\\b");
private static Pattern pattern3 = Pattern.compile("\\bcap");
private static Pattern pattern4 = Pattern.compile("cap\\b");
// ^匹配整个字符串的开始位置 注意:^在正则字符集合起到求非作用
private static Pattern pattern5 = Pattern.compile("<\\?xml.*?\\?>");
private static Pattern pattern6 = Pattern.compile("\\s*^<\\?xml.*?\\?>");
// $匹配整个字符串的结尾位置 例:web页面里,</html>标签后面不应该再有任何内容
// 可以使用这个模式检查: </[Hh][Tt][Mm][Ll]>\s*$
private static Pattern pattern7 = Pattern.compile("(?m)//.*$");
public static void main(String[] args) {
String txt = "the cat scattered his food all over the room";
String txt2 = "the captain wore his cap and cape proudly as \n" +
"he sat listening to the recap of how his" +
"crew saved the men from a capsized vessel";
System.out.println(matchAll(pattern, txt));
System.out.println("=====================1========================");
System.out.println("【有空格】" + matchAll(pattern1, txt));
System.out.println("=====================2========================");
System.out.println("【无空格】" + matchAll(pattern1_ref, txt));
System.out.println("=====================3========================");
System.out.println("【无空格】" + matchAll(pattern2, txt));
System.out.println("=====================4========================");
System.out.println(replaceAll(pattern3, txt2, "@@@"));
System.out.println("=====================5========================");
// 如果不对$转义,将会抛出异常:Exception in thread "main" java.lang.IllegalArgumentException: Illegal group reference
/**
* https://blog.csdn.net/qq_37502106/article/details/88642840
* 可以看到这里面对“$”符号和"\\"符号进行了处理。出现以上错误的原因是:String的replaceAll(regex, replacement)方法的第一个参数支持正则表达式,
* 如果参数replacement中出现符号“$”,会按照$1$2的分组模式进行匹配。当编译器发现“$”后跟的不是整数的时候,就会抛出“Illegal group reference”的异常。
*
* 处理办法:用JDK提供的方法,对特殊字符进行处理:
* replacement = java.util.regex.Matcher.quoteReplacement(replacement);
* 或者手动转义:\\$\\$\\$
*/
System.out.println(replaceAll(pattern4, txt2, Matcher.quoteReplacement("$$$")));
System.out.println("=====================6========================");
String txt3 = "this xml file has a bad start\n" +
" <?xml version='1.0' encoding='UTF-8' ?> \n" +
"<beans xmlns=\"http://www.springframework.org/schema/beans\"\n" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
" xsi:schemaLocation=\"http://www.springframework.org/schema/beans\n" +
" http://www.springframework.org/schema/beans/spring-beans.xsd\">\n" +
"</beans>";
System.out.println(matchAll(pattern5, txt3).size() == 1 ? "合格xml" : "不合格xml");
System.out.println("=====================7========================");
System.out.println(matchAll(pattern6, txt3).size() == 1 ? "合格xml" : "不合格xml");
System.out.println("=====================8========================");
String js = "ar URL= {\n" +
" addBatchTemplate: '/myApp/addBatchTemplate',//添加模板\n" +
" getBackDeviceTypeList:'/myApp/switcher-device-template/getFrontDeviceTypeList', //获取型号列表\n" +
" loadConfig: '/myApp/loadConfig',//post 下发配置\n" +
" isInvalidEdit: '/myApp/isInvalidEdit',//权限判断\n" +
" isInvalidConfig: '/myApp/isInvalidConfig',//权限判断\n" +
" getBatchTemplateInfo:'/myApp/getBatchTemplateInfo', //查询信息\n" +
" };\n";
System.out.println(matchAll(pattern7, js));
}
private static List<String> matchAll(Pattern pattern, String source) {
List<String> matchLs = new ArrayList<>(10);
Matcher matcher = pattern.matcher(source);
while (matcher.find())
matchLs.add(matcher.group());
return matchLs;
}
private static String replaceAll(Pattern pattern, String source, String replaceStr) {
/**
* String的replaceAll也是使用正则:Pattern.compile(regex).matcher(this).replaceAll(replacement);
* 而正则replaceAll的底层使用sb拼接结果: return text.toString(); 所以不会对源字符串造成副作用
*/
Matcher matcher = pattern.matcher(source);
return matcher.replaceAll(replaceStr);
}
}
// output:
// [cat, cat]
// =====================1========================
// 【有空格】[ cat ]
// =====================2========================
// 【无空格】[cat]
// =====================3========================
// 【无空格】[cat]
// =====================4========================
// the @@@tain wore his @@@ and @@@e proudly as
// he sat listening to the recap of how hiscrew saved the men from a @@@sized vessel
// =====================5========================
// the captain wore his $$$ and cape proudly as
// he sat listening to the re$$$ of how hiscrew saved the men from a capsized vessel
// =====================6========================
// 合格xml
// =====================7========================
// 不合格xml
// =====================8========================
// [//添加模板, //获取型号列表, //post 下发配置, //权限判断, //权限判断, //查询信息]
子表达式
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestSubExp {
// {2,} 只能匹配 ;;;;; 【因为{2,}只作用于前一个元素】
private static Pattern pattern = Pattern.compile(" {2,}");
// 【子表达式限定作用】( ){2,} 可以匹配 ()括起来的就是一个子表达式
private static Pattern pattern2 = Pattern.compile("( ){2,}");
// 粗略匹配ip
private static Pattern pattern3 = Pattern.compile("\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3}");
// 【子表达式简化作用】
private static Pattern pattern4 = Pattern.compile("(\\d{1,3}.){3}\\d{1,3}");
// "19|20\\d{2}"只会匹配出 19 或 20XX, 因为|把位于它左边和右边的两个部分都作为一个整体看待, todo: |优先级最低?
// 即:(19)|(20\\d{2}), 所以要匹配出正确的出生年份应该用"(19|20)\\d{2}"
private static Pattern pattern5 = Pattern.compile("19|20\\d{2}");
// 【子表达式去提高优先级作用】
private static Pattern pattern6 = Pattern.compile("(19|20)\\d{2}");
// 精确匹配ip
/**
* 上面用"(\\d{1,3}.){3}\\d{1,3}"粗略匹配ip是有问题的,这个模式会匹配出不合法的ip, 例:666.77.8.999
* 合法的ip是由4个字节组成(对应逗号隔开的4组数字),一个字节的所表示范围是0~255,即ip中的每组数字都是0~255
* 正则不能够直接表示数字范围,所以需要将ip取值限制 转成 正则能够表示的规则:
*
* (一)任何一个1位或2位数字 【1~99】 (\d{1,2})
* (二)任何一个以1开头的3位数字 【100~199】 (1\d{2})
* (三)任何一个以2开头、第2位在0~4的3位数字 【200~249】 (2[0-4]\d)
* (四)任何一个以25开头、第3位在0~5的3位数字 【250~255】 (25[0-5])
*
* 三四不能用2[0-5][0-5]表达: 因为匹配不到206 207 208 209
*/
static String quarter = "(\\d{1,2})|(1\\d{2})|(2[0-4]\\d)|(25[0-5])";
private static Pattern pattern7 = Pattern.compile("(("+quarter+")\\.){3}("+quarter+")");
/*public static void main(String[] args) {
Pattern compile = Pattern.compile("2[0-5][0-5]");
for (int i = 200; i < 299; i++) {
if (!compile.asPredicate().test(String.valueOf(i))) {
System.out.println(i);
}
}
}*/
public static void main(String[] args) {
String s = "hello, my name is mike jordan, and i am" +
"the best basketball player, rank No 1";
System.out.println(replaceAll(pattern, s, "空格"));
System.out.println("=====================1========================");
System.out.println(replaceAll(pattern2, s, "空格"));
System.out.println("=====================2========================");
String s1 = "ping 12.159.46.200 ... ";
System.out.println(matchAll(pattern3, s1));
System.out.println("=====================3========================");
System.out.println(matchAll(pattern4, s1));
System.out.println("=====================4========================");
String s2 = "ID: 042" +
"SEX: M" +
"BIRTH: 1967-08-17" +
"DIE: 2067-08-17" +
"STATUS: Active";
System.out.println(matchAll(pattern5, s2));
System.out.println("=====================5========================");
System.out.println(matchAll(pattern6, s2));
System.out.println("=====================6========================");
String s3 = "illegal ip: 666.77.8.999" +
"legal ip: 12.159.46.200";
System.out.println(matchAll(pattern7, s3));
System.out.println("=====================7========================");
}
private static List<String> matchAll(Pattern pattern, String source) {
List<String> matchLs = new ArrayList<>(10);
Matcher matcher = pattern.matcher(source);
while (matcher.find())
matchLs.add(matcher.group());
return matchLs;
}
private static String replaceAll(Pattern pattern, String source, String replaceStr) {
/**
* String的replaceAll也是使用正则:Pattern.compile(regex).matcher(this).replaceAll(replacement);
* 而正则replaceAll的底层使用sb拼接结果: return text.toString(); 所以不会对源字符串造成副作用
*/
Matcher matcher = pattern.matcher(source);
return matcher.replaceAll(replaceStr);
}
}
// output:
// hello, my name is mike jordan, and i amthe best basketball player, rank No 1
// =====================1========================
// hello, my name is mike jordan, and i amthe best basketball player, rank No空格1
// =====================2========================
// [12.159.46.200]
// =====================3========================
// [12.159.46.200]
// =====================4========================
// [19, 2067]
// =====================5========================
// [1967, 2067]
// =====================6========================
// [66.77.8.99, 12.159.46.20]
// =====================7========================
前后查找
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestLookAround {
// 匹配标签+内容
private static Pattern pattern = Pattern.compile("<(title)>(.*)</\\1>", Pattern.CASE_INSENSITIVE);
// 匹配内容
private static Pattern pattern2 = Pattern.compile("(?<=<(title)>).*(?=</\\1>)", Pattern.CASE_INSENSITIVE);
// 匹配价格
private static Pattern pattern3 = Pattern.compile("[0-9.]+");
private static Pattern pattern4 = Pattern.compile("\\$[0-9.]+");
// 向后查找
private static Pattern pattern5 = Pattern.compile("(?<=\\$)[0-9.]+");
// (?<=<(title)>).*(?=</\1>) 用来分隔向后查找和向前查找的()不算子表达式组
public static void main01(String[] args) {
String source = "<HEAD>" +
"<TITLE>BEN FORTA'S HOMEPAGE</title>" +
"</HEAD>";
Matcher matcher = pattern2.matcher(source);
while (matcher.find()) {
System.out.println(matcher.group(0));
System.out.println(matcher.group(1));
// System.out.println(matcher.group(2));
}
System.out.println("======");
// 关于group分层
Pattern pattern = Pattern.compile("((<(title)>)(.*)(</(title)>))", Pattern.CASE_INSENSITIVE);
matcher = pattern.matcher(source);
while (matcher.find()) {
System.out.println(matcher.group(0));
System.out.println(matcher.group(1)); // 最外层()
System.out.println(matcher.group(2)); // 第二层第1个()
System.out.println(matcher.group(3)); // 第三层第1个()
System.out.println(matcher.group(4)); // 第二层第2个()
System.out.println(matcher.group(5)); // 第二层第3个()
System.out.println(matcher.group(6)); // 第三层第2个()
}
}
public static void main(String[] args) {
String s = "<HEAD>" +
"<TITLE>BEN FORTA'S HOMEPAGE</TITLE>" +
"</HEAD>";
// 获取匹配的整个结果
System.out.println(matchAll(pattern, s));
System.out.println("=====================1========================");
// 获取匹配的结果中,某个子表达式的匹配部分(子表达式)
System.out.println(matchTagContent(pattern, s));
System.out.println("=====================2========================");
// 前后查找匹配
System.out.println(matchAll(pattern2, s));
System.out.println("=====================3========================");
String s1 = "ABC01: $23.45" +
"HGG42: $5.31" +
"CFMX1: $899.00" +
"XTC99: $69.96" +
"Total items found: 4";
System.out.println(matchAll(pattern3, s1));
System.out.println("=====================4========================");
System.out.println(matchAll(pattern4, s1));
System.out.println("=====================5========================");
System.out.println(matchAll(pattern5, s1));
}
private static List<String> matchTagContent(Pattern pattern, String source) {
List<String> matchLs = new ArrayList<>(10);
Matcher matcher = pattern.matcher(source);
while (matcher.find()) {
// matcher.group(int subExpIdx)
matchLs.add(matcher.group(2));
}
return matchLs;
}
private static List<String> matchAll(Pattern pattern, String source) {
List<String> matchLs = new ArrayList<>(10);
Matcher matcher = pattern.matcher(source);
while (matcher.find())
matchLs.add(matcher.group());
return matchLs;
}
private static String replaceAll(Pattern pattern, String source, String replaceStr) {
/**
* String的replaceAll也是使用正则:Pattern.compile(regex).matcher(this).replaceAll(replacement);
* 而正则replaceAll的底层使用sb拼接结果: return text.toString(); 所以不会对源字符串造成副作用
*/
Matcher matcher = pattern.matcher(source);
return matcher.replaceAll(replaceStr);
}
}
// output:
// [<TITLE>BEN FORTA'S HOMEPAGE</TITLE>]
// =====================1========================
// [BEN FORTA'S HOMEPAGE]
// =====================2========================
// [BEN FORTA'S HOMEPAGE]
// =====================3========================
// [01, 23.45, 42, 5.31, 1, 899.00, 99, 69.96, 4]
// =====================4========================
// [$23.45, $5.31, $899.00, $69.96]
// =====================5========================
// [23.45, 5.31, 899.00, 69.96]
回溯引用
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestBackTraceRef {
// 匹配一级标题: <[Hh]1>.*?</[Hh]1>
private static Pattern pattern = Pattern.compile("<h1>.*?</h1>", Pattern.CASE_INSENSITIVE);
// 匹配任意级别标题: <[Hh][1-6]>.*?</[Hh][1-6]>
private static Pattern pattern1 = Pattern.compile("<h[1-6]>.*?</h[1-6]>", Pattern.CASE_INSENSITIVE);
// 回溯引用去掉不合法标签: <[Hh]([1-6])>.*?</[Hh]\1> 【回溯引用只能用来引用子表达式】
private static Pattern pattern2 = Pattern.compile("<(h[1-6])>.*?</\\1>", Pattern.CASE_INSENSITIVE);
// 回溯引用匹配重复单词
private static Pattern pattern3 = Pattern.compile("\\s(\\w+)\\s\\1");
// 替换中使用回溯
private static Pattern pattern4 = Pattern.compile("(\\w+[\\w.]*@[\\w.]+\\.\\w+)");
private static Pattern pattern5 = Pattern.compile("(\\d{3})-(\\d{3})-(\\d{4})");
public static void main(String[] args) {
String html = "<BODY>" +
"<H1>welcome to general expression lesson</H1>" +
"<H2>regexp is a good tool to handle string</H2>" +
"<H2>you can be a regex master</H2>" +
"<H2>this is not valid</H3>" +
"</BODY>";
System.out.println(matchAll(pattern, html));
System.out.println("=====================1========================");
System.out.println(matchAll(pattern1, html));
System.out.println("=====================2========================");
System.out.println(matchAll(pattern2, html));
System.out.println("=====================3========================");
String s = "this is a block of of text," +
"several words here are are" +
"repeated, and and they" +
"should not be";
System.out.println(matchAll(pattern3, s));
System.out.println("=====================4========================");
/**
* 在用于替换的字符串中:通过$1,$2引用子表达式匹配结果
*/
String s1 = "hi, ben@forta.com is my email address";
System.out.println(replaceAll(pattern4, s1, "<A HREF='mailto:$1'>$1</A>"));
System.out.println("=====================5========================");
String s2 = "313-555-1234 \n" +
"248-555-9999 \n" +
"810-555-9000 \n";
System.out.println(replaceAll(pattern5, s2, "($1) $2-$3"));
}
private static List<String> matchAll(Pattern pattern, String source) {
List<String> matchLs = new ArrayList<>(10);
Matcher matcher = pattern.matcher(source);
while (matcher.find())
matchLs.add(matcher.group());
return matchLs;
}
private static String replaceAll(Pattern pattern, String source, String replaceStr) {
/**
* String的replaceAll也是使用正则:Pattern.compile(regex).matcher(this).replaceAll(replacement);
* 而正则replaceAll的底层使用sb拼接结果: return text.toString(); 所以不会对源字符串造成副作用
*/
Matcher matcher = pattern.matcher(source);
return matcher.replaceAll(replaceStr);
}
}
// output:
// [<H1>welcome to general expression lesson</H1>]
// =====================1========================
// [<H1>welcome to general expression lesson</H1>, <H2>regexp is a good tool to handle string</H2>, <H2>you can be a regex master</H2>, <H2>this is not valid</H3>]
// =====================2========================
// [<H1>welcome to general expression lesson</H1>, <H2>regexp is a good tool to handle string</H2>, <H2>you can be a regex master</H2>]
// =====================3========================
// [ of of, are are, and and]
// =====================4========================
// hi, <A HREF='mailto:ben@forta.com'>ben@forta.com</A> is my email address
// =====================5========================
// (313) 555-1234
// (248) 555-9999
// (810) 555-9000
特殊替换
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class SpecialReplace {
public static void main(String[] args) {
Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher("java Java JAVA JAva I love Java and you ?");
StringBuffer sb = new StringBuffer();
int index = 1;
while(m.find()){
// 当前成功匹配,是偶数次,就替换为小写,否则大写
m.appendReplacement(sb, (index++ & 1) == 0 ? "java" : "JAVA");
}
m.appendTail(sb);//把剩余的字符串加入
System.out.println(sb);
}
}
// output:
// JAVA java JAVA java I love JAVA and you ?
多行匹配
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestMultilineMatch {
public static void main(String[] args) {
String fileLs = "sales1.xls\n" +
"orders3.xls\n" +
"sales2.xls\n" +
"sales.xls\n" +
"sales3.xls\n" +
"apac1.xls\n" +
"europe2.xls\n" +
"na1.xls\n" +
"na2.xls\n" +
"cat.xls\n" +
"usa1.xls\n" +
"sam.xls\n" +
"sa1.xls\n";
// Pattern.MULTILINE表示多行匹配 等同于:Pattern.compile("(?m)^[ns]a.\\.xls")
Pattern pattern = Pattern.compile("^[ns]a.\\.xls", Pattern.MULTILINE);
Matcher matcher = pattern.matcher(fileLs);
String replaceResult = matcher.replaceAll("多行匹配");
System.out.println(replaceResult);
Pattern pattern02 = Pattern.compile("^[ns]a.\\.xls");
Matcher matcher02 = pattern02.matcher(fileLs);
String replaceResult02 = matcher02.replaceAll("整串匹配");
System.out.println(replaceResult02);
}
}
// output:
// sales1.xls
// orders3.xls
// sales2.xls
// sales.xls
// sales3.xls
// apac1.xls
// europe2.xls
// 多行匹配
// 多行匹配
// cat.xls
// usa1.xls
// 多行匹配
// 多行匹配
//
// sales1.xls
// orders3.xls
// sales2.xls
// sales.xls
// sales3.xls
// apac1.xls
// europe2.xls
// na1.xls
// na2.xls
// cat.xls
// usa1.xls
// sam.xls
// sa1.xls
贪婪模式
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestGreedyAndLazyMode {
public static void main(String[] args) {
String string = "<books><book>西游记</book><book>三国演义</book><book>水浒传</book></books>";
Pattern pattern = Pattern.compile("<(book)>.*</\\1>", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(string);
String replaceResult = matcher.replaceFirst("贪婪模式");
System.out.println(replaceResult);
Pattern pattern02 = Pattern.compile("<(book)>.*?</\\1>");
Matcher matcher02 = pattern02.matcher(string);
String replaceResult02 = matcher02.replaceFirst("懒惰模式");
System.out.println(replaceResult02);
}
}
// output:
// <books>贪婪模式</books>
// <books>懒惰模式<book>三国演义</book><book>水浒传</book></books>
纯文本和点
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestPureTextAndDot {
private static final Pattern pattern = Pattern.compile("Ben");
private static final Pattern pattern02 = Pattern.compile("my");
/**正则默认是大小写敏感的,预编译模式时,可以指定大小写不敏感选项*/
private static final Pattern pattern03 = Pattern.compile("ben", Pattern.CASE_INSENSITIVE);
/**.在正则中可以匹配任意一个字符:字符,数字,字母包括.自身【除了换行符】(SQL中有相同功能的是_); */
private static final Pattern pattern04 = Pattern.compile("sales.");
private static final Pattern pattern05 = Pattern.compile(".a.\\.xls");
public static void main(String[] args) {
String txt = "Hello, my name is Ben. Please visit my01 website at http://www.forta.com/.";
Matcher matcher = pattern03.matcher(txt);
while (matcher.find()) {
System.out.println(matcher.group());
}
String fileLs = "sales1.xls\n" +
"orders3.xls\n" +
"sales2.xls\n" +
"sales.xls\n" +
"sales3.xls\n" +
"apac1.xls\n" +
"europe2.xls\n" +
"na1.xls\n" +
"na2.xls\n" +
"sa1.xls\n";
Matcher matcher1 = pattern04.matcher(fileLs);
while (matcher1.find()) {
System.out.println(matcher1.group());
}
System.out.println("==================================================================");
Matcher matcher2 = pattern05.matcher(fileLs);
while (matcher2.find()) {
System.out.println(matcher2.group());
}
}
}
// output:
// Ben
// sales1
// sales2
// sales.
// sales3
// ==================================================================
// na1.xls
// na2.xls
// sa1.xls
java正则api
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MathcesAndFindDiff {
public static void main1(String[] args){
Pattern pattern = Pattern.compile("\\d{3,5}");
String charSequence = "123-34345-234-00";
Matcher matcher = pattern.matcher(charSequence);
//虽然匹配失败,但由于charSequence里面的"123"和pattern是匹配的,所以下次的匹配从位置4开始
print(matcher.matches()); //虽然整个匹配失败了,但是有部分匹配(123,所以于是重置了下次匹配的位置从3开始),所以后面调用matcher.find()的时候,调用matcher.start()匹配成功是从4开始
System.out.println("==============================================");
//测试匹配位置
matcher.find();
print(matcher.start()); //如果注释掉前面的print(matcher.matches());,这里会打印出0
System.out.println("==============================================");
//使用reset方法重置匹配位置
matcher.reset();
//第一次find匹配以及匹配的目标和匹配的起始位置
print(matcher.find());
System.out.println("=====================1========================");
print(matcher.group()+" - "+matcher.start());
System.out.println("=====================2========================");
//第二次find匹配以及匹配的目标和匹配的起始位置
print(matcher.find());
System.out.println("=====================3========================");
print(matcher.group()+" - "+matcher.start());
System.out.println("=====================4========================");
/**
* matcher.lookingAt() : Attempts to match the input sequence, starting at the beginning of the
* * region, against the pattern (只进行一次前缀匹配)
*/
//第一次lookingAt匹配以及匹配的目标和匹配的起始位置
print(matcher.lookingAt());
System.out.println("=====================5========================");
print(matcher.group()+" - "+matcher.start());
System.out.println("=====================6========================");
//第二次lookingAt匹配以及匹配的目标和匹配的起始位置
print(matcher.lookingAt());
System.out.println("=====================7========================");
print(matcher.group()+" - "+matcher.start());
System.out.println("=====================8========================");
}
private static void print(Object o){
System.out.println(o);
}
public static void main(String[] args) {
String input = "I dream of engines\nmore engines, all day long";
System.out.println("INPUT:" + input);
System.out.println();
String[] patt = {"engines.more engines", "ines\nmore", "engines$"};
for (int i = 0; i < patt.length; i++) {
System.out.println("PATTERN:" + patt[i]);
boolean found;
Pattern p1l = Pattern.compile(patt[i]);
found = p1l.matcher(input).find();
System.out.println("DEFAULT match " + found);
// .代表任何符号(DOT ALL),
Pattern pml = Pattern.compile(patt[i], Pattern.DOTALL | Pattern.MULTILINE);
found = pml.matcher(input).find();
System.out.println("Multiline match " + found);
System.out.println();
}
}
}
// output 1:
// false
// ==============================================
// 4
// ==============================================
// true
// =====================1========================
// 123 - 0
// =====================2========================
// true
// =====================3========================
// 34345 - 4
// =====================4========================
// true
// =====================5========================
// 123 - 0
// =====================6========================
// true
// =====================7========================
// 123 - 0
// =====================8========================
// output2:
// INPUT:I dream of engines
// more engines, all day long
//
// PATTERN:engines.more engines
// DEFAULT match false
// Multiline match true
//
// PATTERN:ines
// more
// DEFAULT match true
// Multiline match true
//
// PATTERN:engines$
// DEFAULT match false
// Multiline match true
网友评论