Commit 8878b19d by xupeng

调整过滤逻辑

parent b1006535
...@@ -89,6 +89,8 @@ ...@@ -89,6 +89,8 @@
<groupId>com.secoo.search.third-patry</groupId> <groupId>com.secoo.search.third-patry</groupId>
<artifactId>third-patry-jpinyin</artifactId> <artifactId>third-patry-jpinyin</artifactId>
<version>1.1.8</version> <version>1.1.8</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/third-patry-jpinyin-1.1.8.jar</systemPath>
</dependency> </dependency>
<!-- es --> <!-- es -->
......
...@@ -69,7 +69,19 @@ public class SuggestTask { ...@@ -69,7 +69,19 @@ public class SuggestTask {
putIfKeyNotBlank(brandMap, cleanKeyword(brandInfo.getEnName()), brandInfo.getId()); putIfKeyNotBlank(brandMap, cleanKeyword(brandInfo.getEnName()), brandInfo.getId());
putIfKeyNotBlank(brandMap, cleanKeyword(brandInfo.getChName()), brandInfo.getId()); putIfKeyNotBlank(brandMap, cleanKeyword(brandInfo.getChName()), brandInfo.getId());
putIfKeyNotBlank(brandMap, cleanKeyword(brandInfo.getShortName()), brandInfo.getId()); putIfKeyNotBlank(brandMap, cleanKeyword(brandInfo.getShortName()), brandInfo.getId());
putIfKeyNotBlank(brandMap, cleanKeyword(brandInfo.getNickName()), brandInfo.getId());
if (StringUtils.isNotBlank(brandInfo.getNickName()) && brandInfo.getNickName().contains(",")) {
List<String> nickNameList = StringUtils.splitToList(brandInfo.getNickName(), ",");
if (CollectionUtils.isNotEmpty(nickNameList)) {
nickNameList.forEach(nickName -> {
if (StringUtils.isNotBlank(nickName)) {
putIfKeyNotBlank(brandMap, cleanKeyword(nickName), brandInfo.getId());
}
});
}
} else {
putIfKeyNotBlank(brandMap, cleanKeyword(brandInfo.getNickName()), brandInfo.getId());
}
} }
} }
return brandMap; return brandMap;
...@@ -423,17 +435,6 @@ public class SuggestTask { ...@@ -423,17 +435,6 @@ public class SuggestTask {
*/ */
private static boolean isFilterSuggestKeyword(EsSuggestKeywordInfo suggestKeywordInfo) { private static boolean isFilterSuggestKeyword(EsSuggestKeywordInfo suggestKeywordInfo) {
// 品牌词 类目词 人工干预词 不做过滤
if (suggestKeywordInfo.getIsBrand() || suggestKeywordInfo.getIsCategory() || suggestKeywordInfo.getIsManual()) {
suggestKeywordInfo.setIsSensitive(false);
return false;
}
// 敏感词过滤
if (suggestKeywordInfo.getIsSensitive()) {
return true;
}
// 过滤掉太短、太长的词 // 过滤掉太短、太长的词
if (StringUtils.isBlank(suggestKeywordInfo.getKeyword()) if (StringUtils.isBlank(suggestKeywordInfo.getKeyword())
|| suggestKeywordInfo.getKeyword().length() <= 1 || suggestKeywordInfo.getKeyword().length() <= 1
...@@ -441,8 +442,8 @@ public class SuggestTask { ...@@ -441,8 +442,8 @@ public class SuggestTask {
return true; return true;
} }
// 过滤掉纯数字的搜索词,原:过滤掉商品id,商品id是有7位数字组成 // 敏感词过滤
if (suggestKeywordInfo.getKeyword().length() > 6 && StringUtils.isNumber(suggestKeywordInfo.getKeyword())) { if (suggestKeywordInfo.getIsSensitive()) {
return true; return true;
} }
...@@ -453,6 +454,18 @@ public class SuggestTask { ...@@ -453,6 +454,18 @@ public class SuggestTask {
} }
} }
// 品牌词 类目词 人工干预词 不做过滤
if (suggestKeywordInfo.getIsBrand() || suggestKeywordInfo.getIsCategory() || suggestKeywordInfo.getIsManual()) {
suggestKeywordInfo.setIsSensitive(false);
return false;
}
// 过滤掉纯数字的搜索词,原:过滤掉商品id,商品id是有7位数字组成
if (suggestKeywordInfo.getKeyword().length() > 6 && StringUtils.isNumber(suggestKeywordInfo.getKeyword())) {
return true;
}
// 年数据过滤 // 年数据过滤
if (suggestKeywordInfo.getYearCount() < 2 || suggestKeywordInfo.getYearClickCount() < 2) { if (suggestKeywordInfo.getYearCount() < 2 || suggestKeywordInfo.getYearClickCount() < 2) {
return true; return true;
......
# suggestTask # suggestTask
suggestTask.prefixFilterList=["https://", "http://", "dg", "d & g", "dolce&gabbana","dolce & gabbana", "\u675C\u5609\u73ED\u7EB3", "\u907F\u5B55", "\u60C5\u8DA3", "cucci", "\u4E52\u4E53\u7403", "cuccl", "gucii","tod's","iwc7"] suggestTask.prefixFilterList=["https://", "http://", "dg", "d & g", "dolce&gabbana","dolce & gabbana", "杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球", "cuccl", "gucii","tod's","iwc7"]
suggestTask.ManualFolder=/data/pssmaster/corpus_set/suggest_corpus/manual suggestTask.ManualFolder=/data/pssmaster/corpus_set/suggest_corpus/manual
suggestTask.SensitiveFolder=/data/pssmaster/corpus_set/suggest_corpus/sensitive suggestTask.SensitiveFolder=/data/pssmaster/corpus_set/suggest_corpus/sensitive
suggestTask.EuropeWordFolder=/data/pssmaster/corpus_set/suggest_corpus/europe_word suggestTask.EuropeWordFolder=/data/pssmaster/corpus_set/suggest_corpus/europe_word
......
# suggestTask # suggestTask
suggestTask.prefixFilterList=["https://", "http://", "dg", "d & g", "dolce&gabbana","dolce & gabbana", "\u675C\u5609\u73ED\u7EB3", "\u907F\u5B55", "\u60C5\u8DA3", "cucci", "\u4E52\u4E53\u7403", "cuccl", "gucii","tod's","iwc7"] suggestTask.prefixFilterList=["https://", "http://", "dg", "d & g", "dolce&gabbana","dolce & gabbana", "杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球", "cuccl", "gucii","tod's","iwc7"]
suggestTask.ManualFolder=/data/pssmaster/corpus_set/suggest_corpus/manual suggestTask.ManualFolder=/data/pssmaster/corpus_set/suggest_corpus/manual
suggestTask.SensitiveFolder=/data/pssmaster/corpus_set/suggest_corpus/sensitive suggestTask.SensitiveFolder=/data/pssmaster/corpus_set/suggest_corpus/sensitive
suggestTask.EuropeWordFolder=/data/pssmaster/corpus_set/suggest_corpus/europe_word suggestTask.EuropeWordFolder=/data/pssmaster/corpus_set/suggest_corpus/europe_word
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment