//需求:在分组1中匹配meta中author属性的值
//源串:
//
//another author="Zjmainstay too"
//预期:分组1得到Zjmainstay
//正则:
@Test
public void test1(){
String source="n" +
"another author="Zjmainstay too"";
StringBuffer result=new StringBuffer();
Pattern pattern = Pattern.compile(
"]+>$",
Pattern.MULTILINE);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(matcher.group(1));
}
}
//(?<=[?&])(w+)=(w+)
@Test
public void test2(){
String source="https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=44004473_2_oem_dg&wd=libreOffice&fenlei=256&rsv_pq=b08801ed0018207f&rsv_t=2577CzDgz4Kf9tObFCys6YC4CE1XepzLb1nvO8zmcYnrEwZPQxwB2GPcE8qgfx74Xu0AOEESbrA&rqlang=cn&rsv_enter=0&rsv_dl=tb&rsv_sug3=2&rsv_sug1=2&rsv_sug7=101&rsv_btype=i&prefixsug=libreOffice&rsp=3&rsv_sug4=5490";
HashMap resultMap = new HashMap<>();
Pattern pattern = Pattern.compile(
"(?<=[?&])(\w+)=(\w+)",
Pattern.DOTALL);
Matcher matcher = pattern.matcher(source);
int index=1;
while(matcher.find()){
System.out.println(index+++"."+matcher.group(1)+"="+matcher.group(2));
resultMap.put(matcher.group(1),matcher.group(2));
}
}
@Test
public void test3(){
// String source="https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=44004473_2_oem_dg&wd=libreOffice&fenlei=256&rsv_pq=b08801ed0018207f&rsv_t=2577CzDgz4Kf9tObFCys6YC4CE1XepzLb1nvO8zmcYnrEwZPQxwB2GPcE8qgfx74Xu0AOEESbrA&rqlang=cn&rsv_enter=0&rsv_dl=tb&rsv_sug3=2&rsv_sug1=2&rsv_sug7=101&rsv_btype=i&prefixsug=libreOffice&rsp=3&rsv_sug4=5490";
String source="https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=44004473_2_oem_dg&wd=libreOffice&fenl" +
"ei=256&rsv_pq=b08801ed0018207f&rsv_t_a_C=2577CzDgz4Kf9tObFCys6YC4CE1XepzLb1nvO8zmcYnrEwZPQxwB2GPcE8qgf" +
"x74Xu0AOEESbrA&rqlang=cn&rsv_enter=0&rsv_dl=tb&rsv_sug3=2&rsv_sug1=2&rsv_sug7=101&rsv_btype=i&prefixsug=libreOffice&rsp=3&rsv_sug4=5490&a_b_c=123";
StringBuffer result=new StringBuffer();
Pattern pattern = Pattern.compile(
"(_\w)",
Pattern.DOTALL);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
String valueBe=matcher.group(1);
String valueAf=valueBe.substring(1).toUpperCase();
matcher.appendReplacement(result,valueAf);
}
matcher.appendTail(result);
System.out.println(result.toString());
}
//需求:匹配每行字母个数是偶数个的数据,每行数据不为空,正则不能存在分组1
//源串:
//a
//ab
//abc
//abcd
//预期:
//匹配得到 ab 和 abcd,不包含分组1
@Test
public void test4(){
String source="an" +
"abn" +
"abcn" +
"abcdn" +
"alksadn" +
"asdfan" +
"iiksn";
Pattern pattern = Pattern.compile(
"^(?:[\w][\w])+$",
Pattern.MULTILINE
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
//匹配无分组
System.out.println(source.substring(matcher.start(),matcher.end()));
}
}
//需求:匹配由 A/S/D/F 4个字母(区分大小写)组成的长度为3字符串
//源串:
//ABC
//ASD
//ADS
//ASF
//BBC
//A|S
//A|D
//ASDF
@Test
public void test5(){
String source="ABCn" +
"ASDn" +
"ADSn" +
"ASFn" +
"BBCn" +
"A|Sn" +
"A|Dn" +
"ASDF";
Pattern pattern = Pattern.compile(
"^[ASDF][ASDF][ASDF]$",
Pattern.MULTILINE
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(source.substring(matcher.start(),matcher.end()));
}
}
//sadf|affa&*^ds|asdf上课ds|2343|测试
//取第一个|跟最后一个|中间已|分隔的数据内容
@Test
public void test6(){
String source="sadf|affa&*^ds|asdf上课ds|2343|测试";
Pattern pattern = Pattern.compile(
"(?<=\|)(.*?)(?=\|)",
Pattern.DOTALL
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(matcher.group(1));
}
}
//需求:匹配每行数据中以.jpg/.jpeg/.png/.gif结尾的图片名称(含后缀)
//源串:
//image.jpg
//image.jpeg
//image.png
//image.gif
//not_image.txt
//not_image.doc
//not_image.xls
//not_image.ppt
//预期:匹配 image.jpg/image.jpeg/image.png/image.gif 4个结果
@Test
public void test7(){
String source="image.jpgn" +
"image.jpegn" +
"image.pngn" +
"image.gifn" +
"not_image.txtn" +
"not_image.docn" +
"not_image.xlsn" +
"not_image.ppt";
Pattern pattern = Pattern.compile(
"^([\w\W]+?)(?<=\.jpg|\.jpeg|\.png|\.gif)$",
Pattern.MULTILINE
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(matcher.group(1));
}
}
//匹配连续相同3次的数字
//源串:
//111
//121
//112
//222
//预期:匹配 111/222 两组数据
@Test
public void test8(){
String source="111n" +
"121n" +
"112n" +
"222";
Pattern pattern = Pattern.compile(
"^(\d)\1\1$",
Pattern.MULTILINE
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(source.substring(matcher.start(),matcher.end()));
}
}
//分别使用单行模式和普通模式匹配id="author"的div中数据,div标签不在同一行
//源串:
//
//Zjmainstay
//
//预期:Zjmainstay
@Test
public void test9(){
String source="n" +
"Zjmainstayn" +
"n" +
"n" +
"Zjmainstay1n" +
"";
Pattern pattern = Pattern.compile(
"(?<=id="author"\s{0,100}>\s{0,100})\w+(?=\s*<\/div>)"
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(source.substring(matcher.start(),matcher.end()));
}
}
//需求:匹配每行中包含“作者”或者“读者”的数据
//源串:
//本文的作者是Zjmainstay
//本文有很多读者
//读者可以是任何一个地方的人
//这里的任何一个地方说明读者也能在国外
//什么乱七八糟的推理
//你不匹配我,凭什么要我推荐你的博客 www.zjmainstay.cn
//预期:匹配
//本文的作者是Zjmainstay
//本文有很多读者
//读者可以是任何一个地方的人
//这里的任何一个地方说明读者也能在国外
@Test
public void test10(){
String source="本文的作者是Zjmainstayn" +
"本文有很多读者n" +
"读者可以是任何一个地方的人n" +
"这里的任何一个地方说明读者也能在国外n" +
"什么乱七八糟的推理n" +
"你不匹配我,凭什么要我推荐你的博客 www.zjmainstay.cn";
Pattern pattern = Pattern.compile(
"^.*(作者|读者).*$",
Pattern.MULTILINE
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(source.substring(matcher.start(),matcher.end()));
}
}
//匹配多种或条件的数据,有特殊限制(不使用环视)
//需求:匹配每行中“读者”在开头或结尾的数据
//源串:
//本文作者是Zjmainstay,有很多读者
//读者可以是任何一个地方的人
//这里的任何一个地方说明读者也能在国外
//预期:匹配
//本文作者是Zjmainstay,有很多读者
//读者可以是任何一个地方的人
@Test
public void test11(){
String source="本文作者是Zjmainstay,有很多读者n" +
"读者可以是任何一个地方的人n" +
"这里的任何一个地方说明读者也能在国外";
Pattern pattern = Pattern.compile(
"(^读者.*$|^.*读者$)",
Pattern.MULTILINE
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(matcher.group(1));
}
}
//匹配多种或条件的数据,有特殊限制(使用环视)
//需求:匹配每行中“读者”在开头或结尾的数据
//源串:
//本文作者是Zjmainstay,有很多读者
//读者可以是任何一个地方的人
//这里的任何一个地方说明读者也能在国外
//预期:匹配
//本文作者是Zjmainstay,有很多读者
//读者可以是任何一个地方的人
@Test
public void test12(){
String source="本文作者是Zjmainstay,有很多读者n" +
"读者可以是任何一个地方的人n" +
"这里的任何一个地方说明读者也能在国外";
Pattern pattern = Pattern.compile(
"^((?=.*^读者).*$|.*(?<=读者$))$",
Pattern.MULTILINE
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(matcher.group(1));
}
}
//需求:校验密码必须包含字母、数字和特殊字符,6-16位,假定特殊字符为 -_= 三个字符
//源串:
//12345
//123456
//1234561234561234
//12345612345612345
//a1234
//a12345
//-1234
//-12345
//a-123
//a-1234
//a-1234a-1234a-12
//a-1234a-1234a-1234
//aaaaa
//aaaaaa
//-_=-_
//-_=-_=
//预期:匹配
//a-1234
//a-1234a-1234a-12
@Test
public void test13(){
String source="12345n" +
"123456n" +
"1234561234561234n" +
"12345612345612345n" +
"a1234n" +
"a12345n" +
"-1234n" +
"-12345n" +
"a-123n" +
"a-1234n" +
"a-1234a-1234a-12n" +
"a-1234a-1234a-1234n" +
"aaaaan" +
"aaaaaan" +
"-_=-_n" +
"-_=-_=";
Pattern pattern = Pattern.compile(
"(?=.*[a-zA-Z])(?=.*\d)(?=.*[-_=])^.{6,16}$",
Pattern.MULTILINE
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(source.substring(matcher.start(),matcher.end()));
}
}
//特殊限制(环视否定)
//(8.1)使用d{1,3}匹配1-999的数据,不能以0开头
//需求:使用d{1,3}匹配每行中1-999的数据,不能以0开头
//源串:
//1
//10
//100
//999
//1000
//01
//001
//预期:匹配
//1
//10
//100
//999
@Test
public void test14(){
String source="1n" +
"10n" +
"100n" +
"999n" +
"1000n" +
"01n" +
"001";
Pattern pattern = Pattern.compile(
"(?!.*^0)^\d{1,3}$",
Pattern.MULTILINE
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(source.substring(matcher.start(),matcher.end()));
}
}
//匹配除了span标签外的所有标签
//需求:匹配除了内容标签外的所有内容 格式标签
//源串:
//匹配我
//不匹配我
//匹配我
//匹配我
//预期:匹配
//匹配我
//匹配我
//匹配我
@Test
public void test15(){
String source="匹配我n" +
"不匹配我n" +
"匹配我
n" +
"匹配我n";
Pattern pattern = Pattern.compile(
"^<(?!.*span)(.*)$",
Pattern.MULTILINE
);
Matcher matcher = pattern.matcher(source);
while(matcher.find()){
System.out.println(matcher.group(1));
}
}
//需求:给源串每个链接加上http://www.zjmainstay.cn前缀
//源串:
//正则文章合集(All In One)
//正则入门教程
//正则高级教程
//正则环视详解
//PHP cURL应用
//预期:替换得到
//正则文章合集(All In One)
//正则入门教程
//正则高级教程
//正则环视详解
//PHP cURL应用
@Test
public void test16(){
String source= "正则文章合集(All In One)n" +
"正则入门教程n" +
"正则高级教程n" +
"正则环视详解n" +
"PHP cURL应用";
StringBuffer result=new StringBuffer();
Pattern pattern = Pattern.compile(
"(?<=href=")(\/)",
Pattern.MULTILINE
);
Matcher matcher = pattern.matcher(source);
String replaceStr="http://www.zjmainstay.cn/";
while(matcher.find()){
matcher.appendReplacement(result,replaceStr);
}
matcher.appendTail(result);
System.out.println(result);
}
//需求:将每行特定格式数据格式化为SQL语句
//源串:
//1 2017-04-11 Zjmainstay
//2 2017-04-12 Nobody
//3 2017-04-13 Somebody
//预期:替换得到
//INSERT INTO table_log(`id`, `created_at`, `author`) values('1', '2017-04-11', 'Zjmainstay');
//INSERT INTO table_log(`id`, `created_at`, `author`) values('2', '2017-04-12', 'Nobody');
//INSERT INTO table_log(`id`, `created_at`, `author`) values('3', '2017-04-13', 'Somebody');
@Test
public void test17(){
String source="1 2017-04-11 Zjmainstayn" +
"2 2017-04-12 Nobodyn" +
"3 2017-04-13 Somebody";
Matcher matcher = Pattern.compile(
"^([\w])\s([\w-]+)\s([\w]+)",
Pattern.MULTILINE
).matcher(source);
String format="INSERT INTO table_log(`id`, `created_at`, `author`) values('$1', '$2', '$3');";
while(matcher.find()){
System.out.println(matcher.replaceAll(format));
}
}
//需求:匹配html标签的属性值,属性值可以由双引号、单引号、无单双引号定界
//源串:
//
//预期:分组匹配
//I'm Zjmainstay
//author
//2017
//27
@Test
public void test18(){
String source="";
Matcher matcher = Pattern.compile(
"(?<==)(["']?)(.*?)\1[\s>]",
Pattern.DOTALL
).matcher(source);
while(matcher.find()){
System.out.println(matcher.group(2));
}
}
//需求:匹配0.00-100.00的数值,可以有0-2位小数,不能以小数点结尾,不能以2个以上的0开头
//思路:(100|10-99|0-9) + 0-2小数位 + 排除小数点结尾、2个以上0开头的情况
//源串:
//0
//1
//0.0
//0.00
//9.00
//18.00
//27.0
//36.00
//45.00
//54.00
//63.00
//72.00
//81.00
//90.00
//99.99
//100.00
//0.
//001
//100.01
//100.001
//101
//预期:匹配0.00~100.00
@Test
public void test19(){
String source="0n" +
"1n" +
"0.0n" +
"0.00n" +
"9.00n" +
"18.00n" +
"27.0n" +
"36.00n" +
"35.n" +
"45.00n" +
"54.00n" +
"63.00n" +
"72.00n" +
"81.00n" +
"90.00n" +
"99.99n" +
"100.00n" +
"0.n" +
"001n" +
"100.01n" +
"100.001n" +
"101";
Matcher matcher = Pattern.compile(
"(?!.*^00)^(([0-9]|(?:[1-9]\d))(?:\.\d{1,2})?|100.00)(?!<\.$)$",
Pattern.MULTILINE
).matcher(source);
while(matcher.find()){
System.out.println(source.substring(matcher.start(),matcher.end()));
}
}
//匹配链接中的文件名
//需求:利用贪婪模式,分组1得到每行链接中的文件名
//源串:
//http://localhost.com/a/b/c/d/file1.txt
//https://localhost.com/a/b/file2long.jpg
//预期:分组0匹配行数据,分组1匹配文件名
//file1.txt
//file2long.jpg
@Test
public void test20(){
String source="http://localhost.com/a/b/c/d/file1.txtn" +
"https://localhost.com/a/b/file2long.jpg";
Matcher matcher = Pattern.compile(
"\/([\w\.]+)$",
Pattern.MULTILINE
).matcher(source);
while(matcher.find()){
System.out.println(matcher.group(1));
}
}
//限定字符贪婪优化匹配性能
//需求:匹配div id="author"的标签内容
//源串:
//Zjmainstay
//预期:利用贪婪模式去掉div中的噪点(无关数据),分组1匹配到Zjmainstay
//正则:
@Test
public void test21(){
String source="Zjmainstay";
Matcher matcher = Pattern.compile(
"]+>(.*)<\/div>",
Pattern.DOTALL
).matcher(source);
while(matcher.find()){
System.out.println(matcher.group(1));
}
}
//需求:匹配p标签内容
//源串:
//内容1
内容2
//预期:
//在分组1中匹配到内容1和内容2
@Test
public void test22(){
String source="内容1
内容2
";
Matcher matcher = Pattern.compile(
"(.*?)<\/p>",
Pattern.DOTALL
).matcher(source);
while(matcher.find()){
System.out.println(matcher.group(1));
}
}
//需求:在分组1中匹配css或script的链接
//源串:
//
//
//预期:
//main.min.js
//main.css
@Test
public void test23(){
String source="n" +
"";
Matcher matcher = Pattern.compile(
"(?:src="|href=")([\w\W]+?)(?=")",
Pattern.MULTILINE
).matcher(source);
while(matcher.find()){
System.out.println(matcher.group(1));
}
}
//需求:表达式格式固定,提取其中的数值
//源串:
//(20+170)-5*1/5=?
//预期:
//A:20
//B:170
//C:5
//D:1
//E:5
//F:?
@Test
public void test24(){
String source="(20+170)-5*1/5=?";
Matcher matcher = Pattern.compile(
"\((\d+)\+(\d+)\)\-(\d+)\*(\d+)\/(\d+)=(\?)",
Pattern.DOTALL
).matcher(source);
while(matcher.find()){
System.out.println("A:"+matcher.group(1));
System.out.println("B:"+matcher.group(2));
System.out.println("C:"+matcher.group(3));
System.out.println("D:"+matcher.group(4));
System.out.println("E:"+matcher.group(5));
System.out.println("F:"+matcher.group(6));
}
}
//需求:在不对/转义的情况下匹配p标签内容
//源串:
//
内容1
内容2
//预期:
//在分组1中匹配到内容1和内容2
@Test
public void test25(){
String source="内容1
内容2
";
Matcher matcher = Pattern.compile(
"(.*?)(?=<)",
Pattern.DOTALL
).matcher(source);
while(matcher.find()){
System.out.println(matcher.group(1));
}
}
//需求:匹配内容为数字的div
//源串:
//ABC123
//预期:
//123
//错误正则:/d+



