- 一、代码示例
- 1.引入库
提示:private static final String filePath = “/usr/share/digital/files/dictionary.txt”; 当前地址下为字典文件。
一、代码示例 1.引入库代码如下:
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.util.*;
public class SensitiveWordUtil {
private static Logger logger = LoggerFactory.getLogger(SensitiveWordUtil.class);
private static final String KEY_IS_END = "isEnd";
private static final String IS_END = "1";
private static final String NOT_END = "0";
// private static final String filePath = "src/main/resources/dictionary.txt";
private static final String filePath = "/usr/share/digital/files/dictionary.txt";
// private static final String filePath = "/usr/share/digital/files/cuoci.txt";
public static final int MIN_MATCH_TYPE = 1;
public static final int MAX_MaTCH_TYPE = 2;
private static Map sensitiveWordMap;
static {
// 初始化操作
initSensitiveWordMap(getSensitiveWordSet());
}
private static Set getSensitiveWordSet() {
InputStream inputStream = null;
InputStreamReader inputStreamReader = null;
BufferedReader bufferedReader = null;
try {
File file = new File(filePath);
inputStream = new FileInputStream(file);
inputStreamReader = new InputStreamReader(inputStream, "UTf-8");
bufferedReader = new BufferedReader(inputStreamReader);
Set sensitiveWordSet = new HashSet<>();
String line;
while ((line = bufferedReader.readLine()) != null) {
sensitiveWordSet.add(line);
}
logger.info("敏感词的数量:" + sensitiveWordSet.size());
return sensitiveWordSet;
} catch (Exception e) {
logger.error("获取本地敏感词库出错",e);
} finally {
// 关闭资源
try {
if (bufferedReader != null) {
bufferedReader.close();
}
if (inputStreamReader != null) {
inputStreamReader.close();
}
if (inputStream != null) {
inputStream.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
return null;
}
private static void initSensitiveWordMap(Set sensitiveWordSet) {
// 校验
if (sensitiveWordSet == null || sensitiveWordSet.size() <= 0) {
return;
}
//初始化敏感词容器,减少扩容操作
sensitiveWordMap = new HashMap(sensitiveWordSet.size());
String key;
Map nowMap;
Map newWorMap;
//迭代sensitiveWordSet
Iterator iterator = sensitiveWordSet.iterator();
while (iterator.hasNext()) {
//关键字
key = iterator.next();
nowMap = sensitiveWordMap;
for (int i = 0; i < key.length(); i++) {
//转换成char型
char keyChar = key.charAt(i);
//库中获取关键字
Object wordMap = nowMap.get(keyChar);
//如果存在该key,直接赋值,用于下一个循环获取
if (wordMap != null) {
nowMap = (Map) wordMap;
} else {
//不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个
newWorMap = new HashMap<>(2);
//不是最后一个
newWorMap.put(KEY_IS_END, NOT_END);
nowMap.put(keyChar, newWorMap);
nowMap = newWorMap;
}
if (i == key.length() - 1) {
//最后一个
nowMap.put(KEY_IS_END, IS_END);
}
}
}
}
private static int checkSensitiveWord(String txt, int beginIndex, int matchType) {
//敏感词结束标识位:用于敏感词只有1位的情况
boolean flag = false;
//匹配标识数默认为0
int matchFlag = 0;
char word;
Map nowMap = sensitiveWordMap;
for (int i = beginIndex; i < txt.length(); i++) {
word = txt.charAt(i);
//获取指定key
nowMap = (Map) nowMap.get(word);
if (nowMap != null) {
//存在,则判断是否为最后一个
//找到相应key,匹配标识+1
matchFlag++;
//如果为最后一个匹配规则,结束循环,返回匹配标识数
if ("1".equals(nowMap.get("isEnd"))) {
//结束标志位为true
flag = true;
//最小规则,直接返回,最大规则还需继续查找
if (MIN_MATCH_TYPE == matchType) {
break;
}
}
} else {//不存在,直接返回
break;
}
}
if (matchFlag < 2 || !flag) {
//长度必须大于等于1,为词
matchFlag = 0;
}
return matchFlag;
}
private static Set getSensitiveWord(String txt, int matchType) {
Set sensitiveWordSet = new HashSet<>();
for (int i = 0; i < txt.length(); i++) {
//判断是否包含敏感字符
int length = checkSensitiveWord(txt, i, matchType);
if (length > 0) {
//存在,加入set中
sensitiveWordSet.add(txt.substring(i, i + length));
//减1的原因,是因为for会自增
i = i + length - 1;
}
}
// logger.info("语句中包含敏感词的个数为:" + sensitiveWordSet.size() + "。包含:" + sensitiveWordSet);
return sensitiveWordSet;
}
public static boolean contains(String txt) {
// logger.info("待检测的字符串为:" + txt);
Set sensitiveWord = getSensitiveWord(txt, MAX_MaTCH_TYPE);
if (sensitiveWord != null && sensitiveWord.size() > 0) {
return true;
}
return false;
}
public static Set containsBySet(String txt) {
// logger.info("待检测的字符串为:" + txt);
Set sensitiveWord = getSensitiveWord(txt, MAX_MaTCH_TYPE);
if (sensitiveWord != null && sensitiveWord.size() > 0) {
return sensitiveWord;
}
return sensitiveWord;
}
public static void main(String[] args) {
String string= "我叫李太白,我是一个诗人,我生活在唐朝。";
SensitiveWordUtil.contains(string);
}
}



