public static String delHTMLTag(String htmlStr) {
//定义script的正则表达式
String regExscript = "";
//定义style的正则表达式
String regExStyle = "]*?>[\s\S]*?";
//定义HTML标签的正则表达式
String regExHtml = "<[^>]+>";
Pattern pscript = Pattern.compile(regExscript, Pattern.CASE_INSENSITIVE);
Matcher mscript = pscript.matcher(htmlStr);
//过滤script标签
htmlStr = mscript.replaceAll("");
Pattern pStyle = Pattern.compile(regExStyle, Pattern.CASE_INSENSITIVE);
Matcher mStyle = pStyle.matcher(htmlStr);
//过滤style标签
htmlStr = mStyle.replaceAll("");
Pattern pHtml = Pattern.compile(regExHtml, Pattern.CASE_INSENSITIVE);
Matcher mHtml = pHtml.matcher(htmlStr);
//过滤html标签
htmlStr = mHtml.replaceAll("");
//返回文本字符串
return htmlStr.trim();
}
参考文章:Java去掉html标签的各种姿势



