如果我们有如下一段数据,我们想获取img中src的内容。
<p>桥边姑娘,我把你放心上</p><p><img src="https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg" _src="https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg" style="width: 550px; height: 317px;"/></p>
具体实现
这里我们分两步走,先匹配img标签,然后再获取src后面的内容。具体代码如下:
private static final Pattern IMAGE_TAG_PATTERN = Pattern.compile("<(img|IMG)(.*?)>");
private static Pattern IMAGE_SRC_PATTERN = Pattern.compile("(src|SRC)=\"(.*?)\"");
private static Pattern IMAGE__SRC_PATTERN = Pattern.compile("(_src|_SRC)=\"(.*?)\"");
public static void matchImgSrcTag(String srcStr) {
List<String> targets = new ArrayList<>();
// 针对src标签
// 先匹配img标签
Matcher imageTagMatcher = IMAGE_TAG_PATTERN.matcher(srcStr);
while (imageTagMatcher.find()) {
String image = imageTagMatcher.group(2).trim();
// 获取src后面的内容
Matcher imageSrcMatcher = IMAGE_SRC_PATTERN.matcher(image);
String src = null;
if (imageSrcMatcher.find()) {
src = imageSrcMatcher.group(2).trim();
}
if (src == null || src.isEmpty()) {
continue;
}
System.out.println("src:" + src);
targets.add(src);
}
// 针对_src标签
while (imageTagMatcher.find()) {
String image = imageTagMatcher.group(2).trim();
Matcher imageSrcMatcher = IMAGE__SRC_PATTERN.matcher(image);
String src = null;
if (imageSrcMatcher.find()) {
src = imageSrcMatcher.group(2).trim();
}
if (src == null || src.isEmpty()) {
continue;
}
System.out.println("_src_:" + src);
targets.add(src);
}
}
测试验证
public static void main(String[] args) {
String src = "<p>桥边姑娘,我把你放心上</p><p><img src=\"https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg\" _src=\"https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg\" style=\"width: 550px; height: 317px;\"/></p>";
matchImgSrcTag(src);
}
output:
src:https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg
_src_:https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg
网友评论