From f8408030a9c66f43f3c8835a6475faeea03584aa Mon Sep 17 00:00:00 2001 From: Chopper Date: Tue, 23 Nov 2021 16:53:53 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=8F=E6=84=9F=E8=AF=8D=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E7=AD=96=E7=95=A5=E9=97=AE=E9=A2=98=E8=B0=83=E6=95=B4=EF=BC=8C?= =?UTF-8?q?=E6=95=8F=E6=84=9F=E8=AF=8D=E8=BF=81=E7=A7=BB=E8=87=B3=E5=85=AC?= =?UTF-8?q?=E5=85=B1=E5=B7=A5=E5=85=B7=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sensitive}/SensitiveWordsFilter.java | 201 +++++++++--------- .../sensitive}/SensitiveWordsNode.java | 0 .../sensitive}/StringPointer.java | 0 .../common/sensitive/init/SensitiveInit.java | 40 ++++ .../common/sensitive/quartz/QuartzConfig.java | 11 + .../sensitive/quartz/SensitiveQuartz.java | 11 + .../util => common/utils}/UuidUtils.java | 0 .../system/utils/CharacterConstant.java | 20 -- .../resources/sensitive/sensitive_words.txt | 6 - .../src/main/resources/sensitive_words.txt | 6 - 10 files changed, 158 insertions(+), 137 deletions(-) rename framework/src/main/java/cn/lili/{modules/system/utils => common/sensitive}/SensitiveWordsFilter.java (85%) rename framework/src/main/java/cn/lili/{modules/system/utils => common/sensitive}/SensitiveWordsNode.java (100%) rename framework/src/main/java/cn/lili/{modules/system/utils => common/sensitive}/StringPointer.java (100%) create mode 100644 framework/src/main/java/cn/lili/common/sensitive/init/SensitiveInit.java create mode 100644 framework/src/main/java/cn/lili/common/sensitive/quartz/QuartzConfig.java create mode 100644 framework/src/main/java/cn/lili/common/sensitive/quartz/SensitiveQuartz.java rename framework/src/main/java/cn/lili/{modules/connect/util => common/utils}/UuidUtils.java (100%) delete mode 100644 framework/src/main/java/cn/lili/modules/system/utils/CharacterConstant.java delete mode 100644 framework/src/main/resources/sensitive/sensitive_words.txt delete mode 100644 framework/src/main/resources/sensitive_words.txt diff --git a/framework/src/main/java/cn/lili/modules/system/utils/SensitiveWordsFilter.java b/framework/src/main/java/cn/lili/common/sensitive/SensitiveWordsFilter.java similarity index 85% rename from framework/src/main/java/cn/lili/modules/system/utils/SensitiveWordsFilter.java rename to framework/src/main/java/cn/lili/common/sensitive/SensitiveWordsFilter.java index 63a04787..9191fc65 100644 --- a/framework/src/main/java/cn/lili/modules/system/utils/SensitiveWordsFilter.java +++ b/framework/src/main/java/cn/lili/common/sensitive/SensitiveWordsFilter.java @@ -1,12 +1,6 @@ package cn.lili.modules.system.utils; -import cn.lili.modules.system.entity.dos.SensitiveWords; -import cn.lili.modules.system.service.SensitiveWordsService; import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.ApplicationArguments; -import org.springframework.boot.ApplicationRunner; -import org.springframework.stereotype.Component; import java.io.Serializable; import java.util.List; @@ -21,9 +15,12 @@ import java.util.NavigableSet; * 2020-02-25 14:10:16 */ @Slf4j -@Component -public class SensitiveWordsFilter implements Serializable, ApplicationRunner { +public class SensitiveWordsFilter implements Serializable { + /** + * 字符* + */ + public final static char WILDCARD_STAR = '*'; /** * 为2的n次方,考虑到敏感词大概在10k左右, @@ -39,96 +36,20 @@ public class SensitiveWordsFilter implements Serializable, ApplicationRunner { */ protected static SensitiveWordsNode[] nodes; - @Autowired - private SensitiveWordsService sensitiveWordsService; - /** - * 增加一个敏感词,如果词的长度(trim后)小于2,则丢弃
- * 此方法(构建)并不是主要的性能优化点。 + * 过滤铭感次 * - * @param word 敏感词 - * @return 操作结果 - */ - public static boolean put(String word) { - - //长度小于2的不加入 - if (word == null || word.trim().length() < 2) { - return false; - } - //两个字符的不考虑 - if (word.length() == 2 && word.matches("\\w\\w")) { - return false; - } - StringPointer sp = new StringPointer(word.trim()); - //计算头两个字符的hash - int hash = sp.nextTwoCharHash(0); - //计算头两个字符的mix表示(mix相同,两个字符相同) - int mix = sp.nextTwoCharMix(0); - //转为在hash桶中的位置 - int index = hash & (nodes.length - 1); - - //从桶里拿第一个节点 - SensitiveWordsNode node = nodes[index]; - if (node == null) { - //如果没有节点,则放进去一个 - node = new SensitiveWordsNode(mix); - //并添加词 - node.words.add(sp); - //放入桶里 - nodes[index] = node; - } else { - //如果已经有节点(1个或多个),找到正确的节点 - for (; node != null; node = node.next) { - //匹配节点 - if (node.headTwoCharMix == mix) { - node.words.add(sp); - return true; - } - //如果匹配到最后仍然不成功,则追加一个节点 - if (node.next == null) { - new SensitiveWordsNode(mix, node).words.add(sp); - return true; - } - } - } - return true; - } - - /** - * 移除敏感词 - * - * @param word + * @param sentence 过滤赐予 * @return */ - public static void remove(String word) { - - StringPointer sp = new StringPointer(word.trim()); - //计算头两个字符的hash - int hash = sp.nextTwoCharHash(0); - //计算头两个字符的mix表示(mix相同,两个字符相同) - int mix = sp.nextTwoCharMix(0); - //转为在hash桶中的位置 - int index = hash & (nodes.length - 1); - SensitiveWordsNode node = nodes[index]; - - for (; node != null; node = node.next) { - //匹配节点 - if (node.headTwoCharMix == mix) { - node.words.remove(sp); - } - - } + public static String filter(String sentence) { + return filter(sentence, WILDCARD_STAR); } /** * 对句子进行敏感词过滤
- * 如果无敏感词返回输入的sentence对象,即可以用下面的方式判断是否有敏感词:
- * String result = filter.filter(sentence, CharacterConstant.WILDCARD_STAR);
- * if(result != sentence){
- *   //有敏感词
- * } - *
+ * 如果无敏感词返回输入的sentence对象,即可以用下面的方式判断是否有敏感词:
* * @param sentence 句子 * @param replace 敏感词的替换字符 @@ -224,25 +145,95 @@ public class SensitiveWordsFilter implements Serializable, ApplicationRunner { } } + /** * 初始化敏感词 - * - * @param args - * @throws Exception */ - @Override - public void run(ApplicationArguments args) { - try { - nodes = new SensitiveWordsNode[DEFAULT_INITIAL_CAPACITY]; - //加入平台添加的敏感词 - List list = sensitiveWordsService.list(); - if (list != null && list.size() > 0) { - for (SensitiveWords sensitiveWords : list) { - put(sensitiveWords.getSensitiveWord()); - } - } - } catch (Exception e) { - log.error("初始化敏感词错误", e); + public static void init(List words) { + nodes = new SensitiveWordsNode[DEFAULT_INITIAL_CAPACITY]; + for (String word : words) { + put(word); } } + + + /** + * 增加一个敏感词,如果词的长度(trim后)小于2,则丢弃
+ * 此方法(构建)并不是主要的性能优化点。 + * + * @param word 敏感词 + * @return 操作结果 + */ + public static boolean put(String word) { + + //长度小于2的不加入 + if (word == null || word.trim().length() < 2) { + return false; + } + //两个字符的不考虑 + if (word.length() == 2 && word.matches("\\w\\w")) { + return false; + } + StringPointer sp = new StringPointer(word.trim()); + //计算头两个字符的hash + int hash = sp.nextTwoCharHash(0); + //计算头两个字符的mix表示(mix相同,两个字符相同) + int mix = sp.nextTwoCharMix(0); + //转为在hash桶中的位置 + int index = hash & (nodes.length - 1); + + //从桶里拿第一个节点 + SensitiveWordsNode node = nodes[index]; + if (node == null) { + //如果没有节点,则放进去一个 + node = new SensitiveWordsNode(mix); + //并添加词 + node.words.add(sp); + //放入桶里 + nodes[index] = node; + } else { + //如果已经有节点(1个或多个),找到正确的节点 + for (; node != null; node = node.next) { + //匹配节点 + if (node.headTwoCharMix == mix) { + node.words.add(sp); + return true; + } + //如果匹配到最后仍然不成功,则追加一个节点 + if (node.next == null) { + new SensitiveWordsNode(mix, node).words.add(sp); + return true; + } + } + } + return true; + } + + /** + * 移除敏感词 + * + * @param word + * @return + */ + public static void remove(String word) { + + StringPointer sp = new StringPointer(word.trim()); + //计算头两个字符的hash + int hash = sp.nextTwoCharHash(0); + //计算头两个字符的mix表示(mix相同,两个字符相同) + int mix = sp.nextTwoCharMix(0); + //转为在hash桶中的位置 + int index = hash & (nodes.length - 1); + SensitiveWordsNode node = nodes[index]; + + for (; node != null; node = node.next) { + //匹配节点 + if (node.headTwoCharMix == mix) { + node.words.remove(sp); + } + + } + } + + } diff --git a/framework/src/main/java/cn/lili/modules/system/utils/SensitiveWordsNode.java b/framework/src/main/java/cn/lili/common/sensitive/SensitiveWordsNode.java similarity index 100% rename from framework/src/main/java/cn/lili/modules/system/utils/SensitiveWordsNode.java rename to framework/src/main/java/cn/lili/common/sensitive/SensitiveWordsNode.java diff --git a/framework/src/main/java/cn/lili/modules/system/utils/StringPointer.java b/framework/src/main/java/cn/lili/common/sensitive/StringPointer.java similarity index 100% rename from framework/src/main/java/cn/lili/modules/system/utils/StringPointer.java rename to framework/src/main/java/cn/lili/common/sensitive/StringPointer.java diff --git a/framework/src/main/java/cn/lili/common/sensitive/init/SensitiveInit.java b/framework/src/main/java/cn/lili/common/sensitive/init/SensitiveInit.java new file mode 100644 index 00000000..a6a38b9b --- /dev/null +++ b/framework/src/main/java/cn/lili/common/sensitive/init/SensitiveInit.java @@ -0,0 +1,40 @@ +package cn.lili.cache.impl; + +import cn.lili.cache.Cache; +import cn.lili.cache.CachePrefix; +import cn.lili.common.sensitive.SensitiveWordsFilter; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.ApplicationArguments; +import org.springframework.boot.ApplicationRunner; +import org.springframework.stereotype.Component; + +import java.util.List; + +/** + * 初始化敏感词 + * + * @author Chopper + * @version v1.0 + * 2021-11-23 12:08 + */ +@Component +public class SensitiveInit implements ApplicationRunner { + + @Autowired + private Cache> cache; + + /** + * 程序启动时,获取最新的需要过滤的敏感词 + * + * @param args + */ + @Override + public void run(ApplicationArguments args) { + List sensitives = cache.get(CachePrefix.SENSITIVE.getPrefix()); + if (sensitives == null || sensitives.isEmpty()) { + return; + } + SensitiveWordsFilter.init(sensitives); + } + +} diff --git a/framework/src/main/java/cn/lili/common/sensitive/quartz/QuartzConfig.java b/framework/src/main/java/cn/lili/common/sensitive/quartz/QuartzConfig.java new file mode 100644 index 00000000..034c9fd2 --- /dev/null +++ b/framework/src/main/java/cn/lili/common/sensitive/quartz/QuartzConfig.java @@ -0,0 +1,11 @@ +package cn.lili.common.sensitive.quartz; +/** + * + * QuartzConfig + * @author Chopper + * @version v1.0 + * 2021-11-23 16:30 + * + */ +public class QuartzConfig { +} diff --git a/framework/src/main/java/cn/lili/common/sensitive/quartz/SensitiveQuartz.java b/framework/src/main/java/cn/lili/common/sensitive/quartz/SensitiveQuartz.java new file mode 100644 index 00000000..3f237ff3 --- /dev/null +++ b/framework/src/main/java/cn/lili/common/sensitive/quartz/SensitiveQuartz.java @@ -0,0 +1,11 @@ +package cn.lili.common.sensitive.quartz; +/** + * + * SensitiveQuartz + * @author Chopper + * @version v1.0 + * 2021-11-23 16:31 + * + */ +public class SensitiveQuartz { +} diff --git a/framework/src/main/java/cn/lili/modules/connect/util/UuidUtils.java b/framework/src/main/java/cn/lili/common/utils/UuidUtils.java similarity index 100% rename from framework/src/main/java/cn/lili/modules/connect/util/UuidUtils.java rename to framework/src/main/java/cn/lili/common/utils/UuidUtils.java diff --git a/framework/src/main/java/cn/lili/modules/system/utils/CharacterConstant.java b/framework/src/main/java/cn/lili/modules/system/utils/CharacterConstant.java deleted file mode 100644 index 44749097..00000000 --- a/framework/src/main/java/cn/lili/modules/system/utils/CharacterConstant.java +++ /dev/null @@ -1,20 +0,0 @@ -package cn.lili.modules.system.utils; - -/** - * - * 字符常量 - * @author Bulbasaur - * @version v1.0 - * @since v1.0 - * 2020-02-25 14:10:16 - */ -public class CharacterConstant { - - - /** - * 字符* - */ - public final static char WILDCARD_STAR = '*'; - - -} diff --git a/framework/src/main/resources/sensitive/sensitive_words.txt b/framework/src/main/resources/sensitive/sensitive_words.txt deleted file mode 100644 index 04912c08..00000000 --- a/framework/src/main/resources/sensitive/sensitive_words.txt +++ /dev/null @@ -1,6 +0,0 @@ -共产党 -习近平 -毛泽东 -胡锦涛 -邓小平 -强奸 \ No newline at end of file diff --git a/framework/src/main/resources/sensitive_words.txt b/framework/src/main/resources/sensitive_words.txt deleted file mode 100644 index 04912c08..00000000 --- a/framework/src/main/resources/sensitive_words.txt +++ /dev/null @@ -1,6 +0,0 @@ -共产党 -习近平 -毛泽东 -胡锦涛 -邓小平 -强奸 \ No newline at end of file