主要为IM私聊、直播间弹幕、公屏、大厅广播消息实现一个敏感词过滤和脱敏处理
引个流,公众号:马士兵程序员
每行一个敏感词,词库可网上寻找或者自定义
java复制代码public class SensitiveWordUtil {
/**
* 词库上下文环境
*/
public static final WordContext CONTENT = new WordContext();
public static final WordFilter WORD_FILTER = new WordFilter(CONTENT);
}
java复制代码public class WordContext {
/**
* 敏感词字典
*/
private final Map wordMap = new HashMap(1024);
/**
* 是否已初始化
*/
private boolean init;
/**
* 黑名单列表
*/
private final String blackList;
/**
* 白名单列表
*/
private final String whiteList;
public WordContext() {
this.blackList = "/blacklist.txt";
this.whiteList = "/whitelist.txt";
initKeyWord();
}
public WordContext(String blackList, String whiteList) {
this.blackList = blackList;
this.whiteList = whiteList;
initKeyWord();
}
/**
* 获取初始化的敏感词列表
*
* @return 敏感词列表
*/
public Map getWordMap() {
return wordMap;
}
/**
* 初始化
*/
private synchronized void initKeyWord() {
try {
if (!init) {
// 将敏感词库加入到HashMap中
addWord(readWordFile(blackList), WordType.BLACK);
// 将非敏感词库也加入到HashMap中
addWord(readWordFile(whiteList), WordType.WHITE);
}
init = true;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
* 中 = { isEnd = 0 国 = {
* isEnd = 1 人 = {isEnd = 0 民 = {isEnd = 1} } 男 = { isEnd = 0 人 = { isEnd = 1 }
* } } } 五 = { isEnd = 0 星 = { isEnd = 0 红 = { isEnd = 0 旗 = { isEnd = 1 } } } }
*/
public void addWord(Iterable wordList, WordType wordType) {
Map nowMap;
Map newWorMap;
// 迭代keyWordSet
for (String key : wordList) {
nowMap = wordMap;
for (int i = 0; i < key.length(); i++) {
// 转换成char型
char keyChar = key.charAt(i);
// 获取
Object wordMap = nowMap.get(keyChar);
// 如果存在该key,直接赋值
if (wordMap != null) {
nowMap = (Map) wordMap;
} else {
// 不存在则构建一个map,同时将isEnd设置为0,因为他不是最后一个
newWorMap = new HashMap<>(4);
// 不是最后一个
newWorMap.put("isEnd", String.valueOf(EndType.HAS_NEXT.ordinal()));
nowMap.put(keyChar, newWorMap);
nowMap = newWorMap;
}
if (i == key.length() - 1) {
// 最后一个
nowMap.put("isEnd", String.valueOf(EndType.IS_END.ordinal()));
nowMap.put("isWhiteWord", String.valueOf(wordType.ordinal()));
}
}
}
}
/**
* 在线删除敏感词
*
* @param wordList 敏感词列表
* @param wordType 黑名单 BLACk,白名单WHITE
*/
public void removeWord(Iterable wordList, WordType wordType) {
Map nowMap;
for (String key : wordList) {
List
java复制代码public class WordFilter {
/**
* 敏感词表
*/
private final Map wordMap;
/**
* 构造函数
*/
public WordFilter(WordContext context) {
this.wordMap = context.getWordMap();
}
/**
* 替换敏感词
*
* @param text 输入文本
*/
public String replace(final String text) {
return replace(text, 0, '*');
}
/**
* 替换敏感词
*
* @param text 输入文本
* @param symbol 替换符号
*/
public String replace(final String text, final char symbol) {
return replace(text, 0, symbol);
}
/**
* 替换敏感词
*
* @param text 输入文本
* @param skip 文本距离
* @param symbol 替换符号
*/
public String replace(final String text, final int skip, final char symbol) {
char[] charset = text.toCharArray();
for (int i = 0; i < charset.length; i++) {
FlagIndex fi = getFlagIndex(charset, i, skip);
if (fi.isFlag()) {
if (!fi.isWhiteWord()) {
for (int j : fi.getIndex()) {
charset[j] = symbol;
}
} else {
i += fi.getIndex().size() - 1;
}
}
}
return new String(charset);
}
/**
* 是否包含敏感词
*
* @param text 输入文本
*/
public boolean include(final String text) {
return include(text, 0);
}
/**
* 是否包含敏感词
*
* @param text 输入文本
* @param skip 文本距离
*/
public boolean include(final String text, final int skip) {
boolean include = false;
char[] charset = text.toCharArray();
for (int i = 0; i < charset.length; i++) {
FlagIndex fi = getFlagIndex(charset, i, skip);
if(fi.isFlag()) {
if (fi.isWhiteWord()) {
i += fi.getIndex().size() - 1;
} else {
include = true;
break;
}
}
}
return include;
}
/**
* 获取敏感词数量
*
* @param text 输入文本
*/
public int wordCount(final String text) {
return wordCount(text, 0);
}
/**
* 获取敏感词数量
*
* @param text 输入文本
* @param skip 文本距离
*/
public int wordCount(final String text, final int skip) {
int count = 0;
char[] charset = text.toCharArray();
for (int i = 0; i < charset.length; i++) {
FlagIndex fi = getFlagIndex(charset, i, skip);
if (fi.isFlag()) {
if(fi.isWhiteWord()) {
i += fi.getIndex().size() - 1;
} else {
count++;
}
}
}
return count;
}
/**
* 获取敏感词列表
*
* @param text 输入文本
*/
public List wordList(final String text) {
return wordList(text, 0);
}
/**
* 获取敏感词列表
*
* @param text 输入文本
* @param skip 文本距离
*/
public List wordList(final String text, final int skip) {
List wordList = new ArrayList<>();
char[] charset = text.toCharArray();
for (int i = 0; i < charset.length; i++) {
FlagIndex fi = getFlagIndex(charset, i, skip);
if (fi.isFlag()) {
if(fi.isWhiteWord()) {
i += fi.getIndex().size() - 1;
} else {
StringBuilder builder = new StringBuilder();
for (int j : fi.getIndex()) {
char word = text.charAt(j);
builder.append(word);
}
wordList.add(builder.toString());
}
}
}
return wordList;
}
/**
* 获取标记索引
*
* @param charset 输入文本
* @param begin 检测起始
* @param skip 文本距离
*/
private FlagIndex getFlagIndex(final char[] charset, final int begin, final int skip) {
FlagIndex fi = new FlagIndex();
Map current = wordMap;
boolean flag = false;
int count = 0;
List index = new ArrayList<>();
for (int i = begin; i < charset.length; i++) {
char word = charset[i];
Map mapTree = (Map) current.get(word);
if (count > skip || (i == begin && Objects.isNull(mapTree))) {
break;
}
if (Objects.nonNull(mapTree)) {
current = mapTree;
count = 0;
index.add(i);
} else {
count++;
if (flag && count > skip) {
break;
}
}
if ("1".equals(current.get("isEnd"))) {
flag = true;
}
if ("1".equals(current.get("isWhiteWord"))) {
fi.setWhiteWord(true);
break;
}
}
fi.setFlag(flag);
fi.setIndex(index);
return fi;
}
}
注意:此源代码出自 gitee.com/humingzhang… 其余的代码我不在此贴出,大家有兴趣可以自己去看
java复制代码@Retention(value = RetentionPolicy.RUNTIME)
@Target(value = {ElementType.METHOD})
public @interface SensitiveWordFilter {
/**
* 内容
*
* @return
*/
String[] content();
/**
* 过滤类型
*
* @return
*/
SensitiveWordFilterType filterType() default SensitiveWordFilterType.FILTER;
}
java复制代码@AllArgsConstructor
public enum SensitiveWordFilterType {
/**
* 过滤
*/
FILTER,
/**
* 替换/脱敏
*/
REPLACE,
;
}
java复制代码@Slf4j
public class SensitiveWordInterceptor implements MethodInterceptor {
private static final ParameterNameDiscoverer NAME_DISCOVERER = new DefaultParameterNameDiscoverer();
private static final ExpressionParser PARSER = new SpelExpressionParser();
private BeanResolver beanResolver;
public SensitiveWordInterceptor(BeanFactory beanFactory) {
this.beanResolver = new BeanFactoryResolver(beanFactory);
}
@Override
public Object invoke(MethodInvocation invocation) throws Throwable {
Class<?> cls = AopProxyUtils.ultimateTargetClass(invocation.getThis());
if (!cls.equals(invocation.getThis().getClass())) {
return invocation.proceed();
}
SensitiveWordFilter sensitiveWordFilter = invocation.getMethod().getAnnotation(SensitiveWordFilter.class);
StandardEvaluationContext context = new MethodBasedEvaluationContext(null, invocation.getMethod(),
invocation.getArguments(), NAME_DISCOVERER);
context.setBeanResolver(beanResolver);
String[] contentKeys = sensitiveWordFilter.content();
if (StringUtils.isEmpty(contentKeys)) {
log.warn("过滤内容为空.");
return invocation.proceed();
}
for (String key : contentKeys) {
String content = PARSER.parseExpression(key).getValue(context, String.class);
if (StringUtils.isBlank(content)) {
continue;
}
boolean include = SensitiveWordUtil.WORD_FILTER.include(StringUtils.deleteWhitespace(content));
if (sensitiveWordFilter.filterType().equals(SensitiveWordFilterType.FILTER)) {
if (include) {
log.error("内容包含敏感词,抛出异常 | key:{} | content:{}", key, content);
throw new SensitiveWordException(SensitiveWordCode.CONTAINS_SENSITIVE_WORD);
}
} else if (sensitiveWordFilter.filterType().equals(SensitiveWordFilterType.REPLACE)) {
if (include) {
PARSER.parseExpression(key).setValue(context, SensitiveWordUtil.WORD_FILTER.replace(StringUtils.deleteWhitespace(content)));
log.error("内容包含敏感词,已脱敏处理 | key:{} | content:{}", key, content);
}
}
}
return invocation.proceed();
}
}
java复制代码public class SensitiveWordAnnotationAdvisor extends AbstractPointcutAdvisor implements BeanFactoryAware {
private final Advice advice;
private final Pointcut pointcut = AnnotationMatchingPointcut.forMethodAnnotation(SensitiveWordFilter.class);
public SensitiveWordAnnotationAdvisor(@NonNull SensitiveWordInterceptor sensitiveWordInterceptor, int order) {
this.advice = sensitiveWordInterceptor;
setOrder(order);
}
@Override
public Pointcut getPointcut() {
return this.pointcut;
}
@Override
public Advice getAdvice() {
return this.advice;
}
@Override
public void setBeanFactory(BeanFactory beanFactory) throws BeansException {
if (this.advice instanceof BeanFactoryAware) {
((BeanFactoryAware) this.advice).setBeanFactory(beanFactory);
}
}
}
java复制代码@Configuration
public class SensitiveWordFilterAutoConfiguration {
@Bean
@ConditionalOnMissingBean
public SensitiveWordInterceptor sensitiveWordInterceptor(BeanFactory beanFactory) {
return new SensitiveWordInterceptor(beanFactory);
}
@Bean
@ConditionalOnMissingBean
public SensitiveWordAnnotationAdvisor sensitiveWordAnnotationAdvisor(SensitiveWordInterceptor sensitiveWordInterceptor) {
return new SensitiveWordAnnotationAdvisor(sensitiveWordInterceptor, Ordered.LOWEST_PRECEDENCE);
}
}
OK代码已完成,接下来我们看下实际使用够不够优雅和直观
过滤抛出异常
java复制代码@SensitiveWordFilter(content = {"#bo.name", "#bo.intro"}, filterType = SensitiveWordFilterType.FILTER)
public void update(LiveRoomUpdateBo bo) {
}
脱敏返回数据
java复制代码@SensitiveWordFilter(content = {"#bo.name", "#bo.intro"}, filterType = SensitiveWordFilterType.REPLACE)
public void update(LiveRoomUpdateBo bo) {
}
由于这种方式非常优雅和无侵入式,基于这种方式,可以扩展很多的其他用法,比如权限判断、分布式锁等
但是这种Aop代理方法拦截方式也不是用得越多越好,过多的反射操作势必会影响接口性能2
各位大佬点点,关注支持支持下
页面更新:2024-05-02
本站资料均由网友自行发布提供,仅用于学习交流。如有版权问题,请与我联系,QQ:4156828
© CopyRight 2020-2024 All Rights Reserved. Powered By 71396.com 闽ICP备11008920号-4
闽公网安备35020302034903号