From fdf81d1b39baf715c0e015d83a266290eb8c0a1a Mon Sep 17 00:00:00 2001 From: Chopper Date: Tue, 2 Aug 2022 10:51:10 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E5=88=86=E8=AF=8D=E9=97=AE=E9=A2=98?= =?UTF-8?q?=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../search/entity/dos/CustomWords.java | 5 ++ .../serviceimpl/EsGoodsIndexServiceImpl.java | 49 ++++++++++++------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/framework/src/main/java/cn/lili/modules/search/entity/dos/CustomWords.java b/framework/src/main/java/cn/lili/modules/search/entity/dos/CustomWords.java index 4b53e75f..00d7ab44 100644 --- a/framework/src/main/java/cn/lili/modules/search/entity/dos/CustomWords.java +++ b/framework/src/main/java/cn/lili/modules/search/entity/dos/CustomWords.java @@ -11,6 +11,7 @@ import lombok.NoArgsConstructor; import org.hibernate.validator.constraints.Length; import javax.validation.constraints.NotEmpty; +import java.util.Date; /** * 自定义分词 @@ -41,4 +42,8 @@ public class CustomWords extends BaseEntity { private Integer disabled; + public CustomWords(String name) { + this.name = name; + this.disabled = 0; + } } diff --git a/framework/src/main/java/cn/lili/modules/search/serviceimpl/EsGoodsIndexServiceImpl.java b/framework/src/main/java/cn/lili/modules/search/serviceimpl/EsGoodsIndexServiceImpl.java index 4695102d..a2054507 100644 --- a/framework/src/main/java/cn/lili/modules/search/serviceimpl/EsGoodsIndexServiceImpl.java +++ b/framework/src/main/java/cn/lili/modules/search/serviceimpl/EsGoodsIndexServiceImpl.java @@ -283,38 +283,51 @@ public class EsGoodsIndexServiceImpl extends BaseElasticsearchService implements */ private void analyzeAndSaveWords(EsGoodsIndex goods) { try { - //分词器分词 - AnalyzeRequest analyzeRequest = AnalyzeRequest.withIndexAnalyzer(getIndexName(), "ik_max_word", goods.getGoodsName()); - AnalyzeResponse analyze = client.indices().analyze(analyzeRequest, RequestOptions.DEFAULT); - List tokens = analyze.getTokens(); - List customWordsList = new ArrayList<>(); List keywordsList = new ArrayList<>(); + //根据商品参数分词 if (goods.getAttrList() != null && !goods.getAttrList().isEmpty()) { //保存分词 for (EsGoodsAttribute esGoodsAttribute : goods.getAttrList()) { if (keywordsList.stream().noneMatch(i -> i.toLowerCase(Locale.ROOT).equals(esGoodsAttribute.getValue().toLowerCase(Locale.ROOT)))) { keywordsList.add(esGoodsAttribute.getValue()); - customWordsList.add(new CustomWords(esGoodsAttribute.getValue(), 0)); } } } - //分析词条 - for (AnalyzeResponse.AnalyzeToken token : tokens) { - if (keywordsList.stream().noneMatch(i -> i.toLowerCase(Locale.ROOT).equals(token.getTerm().toLowerCase(Locale.ROOT)))) { - keywordsList.add(token.getTerm()); - customWordsList.add(new CustomWords(token.getTerm(), 0)); + //根据商品名称生成分词 + keywordsList.add(goods.getGoodsName().substring(0, Math.min(goods.getGoodsName().length(), 10))); + + //如果有分词 + if (!keywordsList.isEmpty()) { + //去除重复词 + removeDuplicate(keywordsList); + //入库自定义分词 + List customWordsArrayList = new ArrayList<>(); + keywordsList.stream().forEach(item -> { + customWordsArrayList.add(new CustomWords(item)); + }); + //这里采用先批量删除再插入的方法,故意这么做。否则需要挨个匹配是否存在,性能消耗更大 + if (CollUtil.isNotEmpty(customWordsArrayList)) { + customWordsService.deleteBathByName(keywordsList); + customWordsService.insertBatchCustomWords(customWordsArrayList); } - //保存词条进入数据库 } - if (CollUtil.isNotEmpty(customWordsList)) { - customWordsService.deleteBathByName(keywordsList); - customWordsService.insertBatchCustomWords(customWordsList); - } - } catch (IOException e) { - log.info(goods + "分词错误", e); + } catch (Exception e) { + log.info(goods + "自定义分词错误", e); } } + /** + * 去除 重复元素 + * + * @param list + * @return + */ + public static void removeDuplicate(List list) { + HashSet h = new HashSet(list); + list.clear(); + list.addAll(h); + } + /** * 更新商品索引的的部分属性(只填写更新的字段,不需要更新的字段不要填写) * From b860c2eed0f97de04ef5495b3abf752cd8164b0b Mon Sep 17 00:00:00 2001 From: Chopper Date: Tue, 2 Aug 2022 10:52:47 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../serviceimpl/EsGoodsIndexServiceImpl.java | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/framework/src/main/java/cn/lili/modules/search/serviceimpl/EsGoodsIndexServiceImpl.java b/framework/src/main/java/cn/lili/modules/search/serviceimpl/EsGoodsIndexServiceImpl.java index a2054507..1d3ed8ca 100644 --- a/framework/src/main/java/cn/lili/modules/search/serviceimpl/EsGoodsIndexServiceImpl.java +++ b/framework/src/main/java/cn/lili/modules/search/serviceimpl/EsGoodsIndexServiceImpl.java @@ -296,20 +296,15 @@ public class EsGoodsIndexServiceImpl extends BaseElasticsearchService implements //根据商品名称生成分词 keywordsList.add(goods.getGoodsName().substring(0, Math.min(goods.getGoodsName().length(), 10))); - //如果有分词 - if (!keywordsList.isEmpty()) { - //去除重复词 - removeDuplicate(keywordsList); - //入库自定义分词 - List customWordsArrayList = new ArrayList<>(); - keywordsList.stream().forEach(item -> { - customWordsArrayList.add(new CustomWords(item)); - }); - //这里采用先批量删除再插入的方法,故意这么做。否则需要挨个匹配是否存在,性能消耗更大 - if (CollUtil.isNotEmpty(customWordsArrayList)) { - customWordsService.deleteBathByName(keywordsList); - customWordsService.insertBatchCustomWords(customWordsArrayList); - } + //去除重复词 + removeDuplicate(keywordsList); + //入库自定义分词 + List customWordsArrayList = new ArrayList<>(); + keywordsList.forEach(item -> customWordsArrayList.add(new CustomWords(item))); + //这里采用先批量删除再插入的方法,故意这么做。否则需要挨个匹配是否存在,性能消耗更大 + if (CollUtil.isNotEmpty(customWordsArrayList)) { + customWordsService.deleteBathByName(keywordsList); + customWordsService.insertBatchCustomWords(customWordsArrayList); } } catch (Exception e) { log.info(goods + "自定义分词错误", e);