敏感词更新策略问题调整,敏感词迁移至公共工具类
This commit is contained in:
parent
c276b1595c
commit
f8408030a9
@ -1,12 +1,6 @@
|
||||
package cn.lili.modules.system.utils;
|
||||
|
||||
import cn.lili.modules.system.entity.dos.SensitiveWords;
|
||||
import cn.lili.modules.system.service.SensitiveWordsService;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.ApplicationArguments;
|
||||
import org.springframework.boot.ApplicationRunner;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
@ -21,9 +15,12 @@ import java.util.NavigableSet;
|
||||
* 2020-02-25 14:10:16
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class SensitiveWordsFilter implements Serializable, ApplicationRunner {
|
||||
public class SensitiveWordsFilter implements Serializable {
|
||||
|
||||
/**
|
||||
* 字符*
|
||||
*/
|
||||
public final static char WILDCARD_STAR = '*';
|
||||
|
||||
/**
|
||||
* 为2的n次方,考虑到敏感词大概在10k左右,
|
||||
@ -39,96 +36,20 @@ public class SensitiveWordsFilter implements Serializable, ApplicationRunner {
|
||||
*/
|
||||
protected static SensitiveWordsNode[] nodes;
|
||||
|
||||
@Autowired
|
||||
private SensitiveWordsService sensitiveWordsService;
|
||||
|
||||
|
||||
/**
|
||||
* 增加一个敏感词,如果词的长度(trim后)小于2,则丢弃<br/>
|
||||
* 此方法(构建)并不是主要的性能优化点。
|
||||
* 过滤铭感次
|
||||
*
|
||||
* @param word 敏感词
|
||||
* @return 操作结果
|
||||
*/
|
||||
public static boolean put(String word) {
|
||||
|
||||
//长度小于2的不加入
|
||||
if (word == null || word.trim().length() < 2) {
|
||||
return false;
|
||||
}
|
||||
//两个字符的不考虑
|
||||
if (word.length() == 2 && word.matches("\\w\\w")) {
|
||||
return false;
|
||||
}
|
||||
StringPointer sp = new StringPointer(word.trim());
|
||||
//计算头两个字符的hash
|
||||
int hash = sp.nextTwoCharHash(0);
|
||||
//计算头两个字符的mix表示(mix相同,两个字符相同)
|
||||
int mix = sp.nextTwoCharMix(0);
|
||||
//转为在hash桶中的位置
|
||||
int index = hash & (nodes.length - 1);
|
||||
|
||||
//从桶里拿第一个节点
|
||||
SensitiveWordsNode node = nodes[index];
|
||||
if (node == null) {
|
||||
//如果没有节点,则放进去一个
|
||||
node = new SensitiveWordsNode(mix);
|
||||
//并添加词
|
||||
node.words.add(sp);
|
||||
//放入桶里
|
||||
nodes[index] = node;
|
||||
} else {
|
||||
//如果已经有节点(1个或多个),找到正确的节点
|
||||
for (; node != null; node = node.next) {
|
||||
//匹配节点
|
||||
if (node.headTwoCharMix == mix) {
|
||||
node.words.add(sp);
|
||||
return true;
|
||||
}
|
||||
//如果匹配到最后仍然不成功,则追加一个节点
|
||||
if (node.next == null) {
|
||||
new SensitiveWordsNode(mix, node).words.add(sp);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* 移除敏感词
|
||||
*
|
||||
* @param word
|
||||
* @param sentence 过滤赐予
|
||||
* @return
|
||||
*/
|
||||
public static void remove(String word) {
|
||||
|
||||
StringPointer sp = new StringPointer(word.trim());
|
||||
//计算头两个字符的hash
|
||||
int hash = sp.nextTwoCharHash(0);
|
||||
//计算头两个字符的mix表示(mix相同,两个字符相同)
|
||||
int mix = sp.nextTwoCharMix(0);
|
||||
//转为在hash桶中的位置
|
||||
int index = hash & (nodes.length - 1);
|
||||
SensitiveWordsNode node = nodes[index];
|
||||
|
||||
for (; node != null; node = node.next) {
|
||||
//匹配节点
|
||||
if (node.headTwoCharMix == mix) {
|
||||
node.words.remove(sp);
|
||||
}
|
||||
|
||||
}
|
||||
public static String filter(String sentence) {
|
||||
return filter(sentence, WILDCARD_STAR);
|
||||
}
|
||||
|
||||
/**
|
||||
* 对句子进行敏感词过滤<br/>
|
||||
* 如果无敏感词返回输入的sentence对象,即可以用下面的方式判断是否有敏感词:<br/><code>
|
||||
* String result = filter.filter(sentence, CharacterConstant.WILDCARD_STAR);<br/>
|
||||
* if(result != sentence){<br/>
|
||||
* //有敏感词<br/>
|
||||
* }
|
||||
* </code>
|
||||
* 如果无敏感词返回输入的sentence对象,即可以用下面的方式判断是否有敏感词:<br/>
|
||||
*
|
||||
* @param sentence 句子
|
||||
* @param replace 敏感词的替换字符
|
||||
@ -224,25 +145,95 @@ public class SensitiveWordsFilter implements Serializable, ApplicationRunner {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 初始化敏感词
|
||||
*
|
||||
* @param args
|
||||
* @throws Exception
|
||||
*/
|
||||
@Override
|
||||
public void run(ApplicationArguments args) {
|
||||
try {
|
||||
nodes = new SensitiveWordsNode[DEFAULT_INITIAL_CAPACITY];
|
||||
//加入平台添加的敏感词
|
||||
List<SensitiveWords> list = sensitiveWordsService.list();
|
||||
if (list != null && list.size() > 0) {
|
||||
for (SensitiveWords sensitiveWords : list) {
|
||||
put(sensitiveWords.getSensitiveWord());
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("初始化敏感词错误", e);
|
||||
public static void init(List<String> words) {
|
||||
nodes = new SensitiveWordsNode[DEFAULT_INITIAL_CAPACITY];
|
||||
for (String word : words) {
|
||||
put(word);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 增加一个敏感词,如果词的长度(trim后)小于2,则丢弃<br/>
|
||||
* 此方法(构建)并不是主要的性能优化点。
|
||||
*
|
||||
* @param word 敏感词
|
||||
* @return 操作结果
|
||||
*/
|
||||
public static boolean put(String word) {
|
||||
|
||||
//长度小于2的不加入
|
||||
if (word == null || word.trim().length() < 2) {
|
||||
return false;
|
||||
}
|
||||
//两个字符的不考虑
|
||||
if (word.length() == 2 && word.matches("\\w\\w")) {
|
||||
return false;
|
||||
}
|
||||
StringPointer sp = new StringPointer(word.trim());
|
||||
//计算头两个字符的hash
|
||||
int hash = sp.nextTwoCharHash(0);
|
||||
//计算头两个字符的mix表示(mix相同,两个字符相同)
|
||||
int mix = sp.nextTwoCharMix(0);
|
||||
//转为在hash桶中的位置
|
||||
int index = hash & (nodes.length - 1);
|
||||
|
||||
//从桶里拿第一个节点
|
||||
SensitiveWordsNode node = nodes[index];
|
||||
if (node == null) {
|
||||
//如果没有节点,则放进去一个
|
||||
node = new SensitiveWordsNode(mix);
|
||||
//并添加词
|
||||
node.words.add(sp);
|
||||
//放入桶里
|
||||
nodes[index] = node;
|
||||
} else {
|
||||
//如果已经有节点(1个或多个),找到正确的节点
|
||||
for (; node != null; node = node.next) {
|
||||
//匹配节点
|
||||
if (node.headTwoCharMix == mix) {
|
||||
node.words.add(sp);
|
||||
return true;
|
||||
}
|
||||
//如果匹配到最后仍然不成功,则追加一个节点
|
||||
if (node.next == null) {
|
||||
new SensitiveWordsNode(mix, node).words.add(sp);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* 移除敏感词
|
||||
*
|
||||
* @param word
|
||||
* @return
|
||||
*/
|
||||
public static void remove(String word) {
|
||||
|
||||
StringPointer sp = new StringPointer(word.trim());
|
||||
//计算头两个字符的hash
|
||||
int hash = sp.nextTwoCharHash(0);
|
||||
//计算头两个字符的mix表示(mix相同,两个字符相同)
|
||||
int mix = sp.nextTwoCharMix(0);
|
||||
//转为在hash桶中的位置
|
||||
int index = hash & (nodes.length - 1);
|
||||
SensitiveWordsNode node = nodes[index];
|
||||
|
||||
for (; node != null; node = node.next) {
|
||||
//匹配节点
|
||||
if (node.headTwoCharMix == mix) {
|
||||
node.words.remove(sp);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
package cn.lili.cache.impl;
|
||||
|
||||
import cn.lili.cache.Cache;
|
||||
import cn.lili.cache.CachePrefix;
|
||||
import cn.lili.common.sensitive.SensitiveWordsFilter;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.ApplicationArguments;
|
||||
import org.springframework.boot.ApplicationRunner;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 初始化敏感词
|
||||
*
|
||||
* @author Chopper
|
||||
* @version v1.0
|
||||
* 2021-11-23 12:08
|
||||
*/
|
||||
@Component
|
||||
public class SensitiveInit implements ApplicationRunner {
|
||||
|
||||
@Autowired
|
||||
private Cache<List<String>> cache;
|
||||
|
||||
/**
|
||||
* 程序启动时,获取最新的需要过滤的敏感词
|
||||
*
|
||||
* @param args
|
||||
*/
|
||||
@Override
|
||||
public void run(ApplicationArguments args) {
|
||||
List<String> sensitives = cache.get(CachePrefix.SENSITIVE.getPrefix());
|
||||
if (sensitives == null || sensitives.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
SensitiveWordsFilter.init(sensitives);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
package cn.lili.common.sensitive.quartz;
|
||||
/**
|
||||
*
|
||||
* QuartzConfig
|
||||
* @author Chopper
|
||||
* @version v1.0
|
||||
* 2021-11-23 16:30
|
||||
*
|
||||
*/
|
||||
public class QuartzConfig {
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
package cn.lili.common.sensitive.quartz;
|
||||
/**
|
||||
*
|
||||
* SensitiveQuartz
|
||||
* @author Chopper
|
||||
* @version v1.0
|
||||
* 2021-11-23 16:31
|
||||
*
|
||||
*/
|
||||
public class SensitiveQuartz {
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package cn.lili.modules.system.utils;
|
||||
|
||||
/**
|
||||
*
|
||||
* 字符常量
|
||||
* @author Bulbasaur
|
||||
* @version v1.0
|
||||
* @since v1.0
|
||||
* 2020-02-25 14:10:16
|
||||
*/
|
||||
public class CharacterConstant {
|
||||
|
||||
|
||||
/**
|
||||
* 字符*
|
||||
*/
|
||||
public final static char WILDCARD_STAR = '*';
|
||||
|
||||
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
共产党
|
||||
习近平
|
||||
毛泽东
|
||||
胡锦涛
|
||||
邓小平
|
||||
强奸
|
@ -1,6 +0,0 @@
|
||||
共产党
|
||||
习近平
|
||||
毛泽东
|
||||
胡锦涛
|
||||
邓小平
|
||||
强奸
|
Loading…
x
Reference in New Issue
Block a user