敏感词更新策略问题调整,敏感词迁移至公共工具类

This commit is contained in:
Chopper 2021-11-23 16:53:53 +08:00
parent c276b1595c
commit f8408030a9
10 changed files with 158 additions and 137 deletions

View File

@ -1,12 +1,6 @@
package cn.lili.modules.system.utils;
import cn.lili.modules.system.entity.dos.SensitiveWords;
import cn.lili.modules.system.service.SensitiveWordsService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.stereotype.Component;
import java.io.Serializable;
import java.util.List;
@ -21,9 +15,12 @@ import java.util.NavigableSet;
* 2020-02-25 14:10:16
*/
@Slf4j
@Component
public class SensitiveWordsFilter implements Serializable, ApplicationRunner {
public class SensitiveWordsFilter implements Serializable {
/**
* 字符*
*/
public final static char WILDCARD_STAR = '*';
/**
* 为2的n次方考虑到敏感词大概在10k左右
@ -39,96 +36,20 @@ public class SensitiveWordsFilter implements Serializable, ApplicationRunner {
*/
protected static SensitiveWordsNode[] nodes;
@Autowired
private SensitiveWordsService sensitiveWordsService;
/**
* 增加一个敏感词如果词的长度trim后小于2则丢弃<br/>
* 此方法构建并不是主要的性能优化点
* 过滤铭感次
*
* @param word 敏感词
* @return 操作结果
*/
public static boolean put(String word) {
//长度小于2的不加入
if (word == null || word.trim().length() < 2) {
return false;
}
//两个字符的不考虑
if (word.length() == 2 && word.matches("\\w\\w")) {
return false;
}
StringPointer sp = new StringPointer(word.trim());
//计算头两个字符的hash
int hash = sp.nextTwoCharHash(0);
//计算头两个字符的mix表示mix相同两个字符相同
int mix = sp.nextTwoCharMix(0);
//转为在hash桶中的位置
int index = hash & (nodes.length - 1);
//从桶里拿第一个节点
SensitiveWordsNode node = nodes[index];
if (node == null) {
//如果没有节点则放进去一个
node = new SensitiveWordsNode(mix);
//并添加词
node.words.add(sp);
//放入桶里
nodes[index] = node;
} else {
//如果已经有节点1个或多个找到正确的节点
for (; node != null; node = node.next) {
//匹配节点
if (node.headTwoCharMix == mix) {
node.words.add(sp);
return true;
}
//如果匹配到最后仍然不成功则追加一个节点
if (node.next == null) {
new SensitiveWordsNode(mix, node).words.add(sp);
return true;
}
}
}
return true;
}
/**
* 移除敏感词
*
* @param word
* @param sentence 过滤赐予
* @return
*/
public static void remove(String word) {
StringPointer sp = new StringPointer(word.trim());
//计算头两个字符的hash
int hash = sp.nextTwoCharHash(0);
//计算头两个字符的mix表示mix相同两个字符相同
int mix = sp.nextTwoCharMix(0);
//转为在hash桶中的位置
int index = hash & (nodes.length - 1);
SensitiveWordsNode node = nodes[index];
for (; node != null; node = node.next) {
//匹配节点
if (node.headTwoCharMix == mix) {
node.words.remove(sp);
}
}
public static String filter(String sentence) {
return filter(sentence, WILDCARD_STAR);
}
/**
* 对句子进行敏感词过滤<br/>
* 如果无敏感词返回输入的sentence对象即可以用下面的方式判断是否有敏感词<br/><code>
* String result = filter.filter(sentence, CharacterConstant.WILDCARD_STAR);<br/>
* if(result != sentence){<br/>
* &nbsp;&nbsp;//有敏感词<br/>
* }
* </code>
* 如果无敏感词返回输入的sentence对象即可以用下面的方式判断是否有敏感词<br/>
*
* @param sentence 句子
* @param replace 敏感词的替换字符
@ -224,25 +145,95 @@ public class SensitiveWordsFilter implements Serializable, ApplicationRunner {
}
}
/**
* 初始化敏感词
*
* @param args
* @throws Exception
*/
@Override
public void run(ApplicationArguments args) {
try {
nodes = new SensitiveWordsNode[DEFAULT_INITIAL_CAPACITY];
//加入平台添加的敏感词
List<SensitiveWords> list = sensitiveWordsService.list();
if (list != null && list.size() > 0) {
for (SensitiveWords sensitiveWords : list) {
put(sensitiveWords.getSensitiveWord());
}
}
} catch (Exception e) {
log.error("初始化敏感词错误", e);
public static void init(List<String> words) {
nodes = new SensitiveWordsNode[DEFAULT_INITIAL_CAPACITY];
for (String word : words) {
put(word);
}
}
/**
* 增加一个敏感词如果词的长度trim后小于2则丢弃<br/>
* 此方法构建并不是主要的性能优化点
*
* @param word 敏感词
* @return 操作结果
*/
public static boolean put(String word) {
//长度小于2的不加入
if (word == null || word.trim().length() < 2) {
return false;
}
//两个字符的不考虑
if (word.length() == 2 && word.matches("\\w\\w")) {
return false;
}
StringPointer sp = new StringPointer(word.trim());
//计算头两个字符的hash
int hash = sp.nextTwoCharHash(0);
//计算头两个字符的mix表示mix相同两个字符相同
int mix = sp.nextTwoCharMix(0);
//转为在hash桶中的位置
int index = hash & (nodes.length - 1);
//从桶里拿第一个节点
SensitiveWordsNode node = nodes[index];
if (node == null) {
//如果没有节点则放进去一个
node = new SensitiveWordsNode(mix);
//并添加词
node.words.add(sp);
//放入桶里
nodes[index] = node;
} else {
//如果已经有节点1个或多个找到正确的节点
for (; node != null; node = node.next) {
//匹配节点
if (node.headTwoCharMix == mix) {
node.words.add(sp);
return true;
}
//如果匹配到最后仍然不成功则追加一个节点
if (node.next == null) {
new SensitiveWordsNode(mix, node).words.add(sp);
return true;
}
}
}
return true;
}
/**
* 移除敏感词
*
* @param word
* @return
*/
public static void remove(String word) {
StringPointer sp = new StringPointer(word.trim());
//计算头两个字符的hash
int hash = sp.nextTwoCharHash(0);
//计算头两个字符的mix表示mix相同两个字符相同
int mix = sp.nextTwoCharMix(0);
//转为在hash桶中的位置
int index = hash & (nodes.length - 1);
SensitiveWordsNode node = nodes[index];
for (; node != null; node = node.next) {
//匹配节点
if (node.headTwoCharMix == mix) {
node.words.remove(sp);
}
}
}
}

View File

@ -0,0 +1,40 @@
package cn.lili.cache.impl;
import cn.lili.cache.Cache;
import cn.lili.cache.CachePrefix;
import cn.lili.common.sensitive.SensitiveWordsFilter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.stereotype.Component;
import java.util.List;
/**
* 初始化敏感词
*
* @author Chopper
* @version v1.0
* 2021-11-23 12:08
*/
@Component
public class SensitiveInit implements ApplicationRunner {
@Autowired
private Cache<List<String>> cache;
/**
* 程序启动时获取最新的需要过滤的敏感词
*
* @param args
*/
@Override
public void run(ApplicationArguments args) {
List<String> sensitives = cache.get(CachePrefix.SENSITIVE.getPrefix());
if (sensitives == null || sensitives.isEmpty()) {
return;
}
SensitiveWordsFilter.init(sensitives);
}
}

View File

@ -0,0 +1,11 @@
package cn.lili.common.sensitive.quartz;
/**
*
* QuartzConfig
* @author Chopper
* @version v1.0
* 2021-11-23 16:30
*
*/
public class QuartzConfig {
}

View File

@ -0,0 +1,11 @@
package cn.lili.common.sensitive.quartz;
/**
*
* SensitiveQuartz
* @author Chopper
* @version v1.0
* 2021-11-23 16:31
*
*/
public class SensitiveQuartz {
}

View File

@ -1,20 +0,0 @@
package cn.lili.modules.system.utils;
/**
*
* 字符常量
* @author Bulbasaur
* @version v1.0
* @since v1.0
* 2020-02-25 14:10:16
*/
public class CharacterConstant {
/**
* 字符*
*/
public final static char WILDCARD_STAR = '*';
}

View File

@ -1,6 +0,0 @@
共产党
习近平
毛泽东
胡锦涛
邓小平
强奸

View File

@ -1,6 +0,0 @@
共产党
习近平
毛泽东
胡锦涛
邓小平
强奸