Commit cc2602ef by 王明范

增加从配置文件中读部分词å

parent 200bfd5b
...@@ -7,8 +7,10 @@ import com.secoo.search.sqp4j.QueryPlan; ...@@ -7,8 +7,10 @@ import com.secoo.search.sqp4j.QueryPlan;
import com.secoo.search.sqp4j.QueryWord; import com.secoo.search.sqp4j.QueryWord;
import com.secoo.search.sqp4j.client.QueryPlanClient; import com.secoo.search.sqp4j.client.QueryPlanClient;
import com.secoo.so.suggest.client.SqpDubboClient; import com.secoo.so.suggest.client.SqpDubboClient;
import com.secoo.so.suggest.config.ConfigUtil;
import com.secoo.so.suggest.util.FileUtils; import com.secoo.so.suggest.util.FileUtils;
import com.secoo.so.suggest.util.StringUtils; import com.secoo.so.suggest.util.StringUtils;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -30,11 +32,13 @@ public class QueryPlanHelper { ...@@ -30,11 +32,13 @@ public class QueryPlanHelper {
List<String> wordList = new ArrayList<>(); List<String> wordList = new ArrayList<>();
Map<String, Integer> keywordMap = new HashMap<>(); Map<String, Integer> keywordMap = new HashMap<>();
private static String queryPlanFile = "/data/crontab/test/tmp/queryplan.txt"; private static String queryPlanFile = ""; ///data/crontab/test/tmp/queryplan.txt
private static List<String> newWordLines = new ArrayList<>(); private static List<String> newWordLines = new ArrayList<>();
private static long minTimeStamp = Long.MAX_VALUE; private static long minTimeStamp = Long.MAX_VALUE;
private QueryPlanHelper() { private QueryPlanHelper() {
queryPlanFile = ConfigUtil.getString("queryPlan.cachePath", "");
client = SqpDubboClient.getProdImpl(); client = SqpDubboClient.getProdImpl();
loadQueryPlanFromFile(); loadQueryPlanFromFile();
...@@ -78,27 +82,29 @@ public class QueryPlanHelper { ...@@ -78,27 +82,29 @@ public class QueryPlanHelper {
} }
private void loadQueryPlanFromFile() { private void loadQueryPlanFromFile() {
List<String> lines = FileUtils.readLines(queryPlanFile); if (StringUtils.isNotBlank(queryPlanFile)) {
if (lines != null && lines.size() > 0) { List<String> lines = FileUtils.readLines(queryPlanFile);
for (String line : lines) { if (lines != null && lines.size() > 0) {
if (StringUtils.isBlank(line)) { for (String line : lines) {
continue; if (StringUtils.isBlank(line)) {
} continue;
String[] arr = line.split(","); }
if(arr.length == 3) { String[] arr = line.split(",");
String keyword = arr[0]; if (arr.length == 3) {
String strWordCount = arr[1]; String keyword = arr[0];
String ts = arr[2]; String strWordCount = arr[1];
if (StringUtils.isNotBlank(keyword) && StringUtils.isNumber(strWordCount) && StringUtils.isNumber(ts)) { String ts = arr[2];
try { if (StringUtils.isNotBlank(keyword) && StringUtils.isNumber(strWordCount) && StringUtils.isNumber(ts)) {
long timeStamp = Long.valueOf(ts); try {
if (timeStamp < minTimeStamp) { long timeStamp = Long.valueOf(ts);
minTimeStamp = timeStamp; if (timeStamp < minTimeStamp) {
minTimeStamp = timeStamp;
}
int wordCount = Integer.valueOf(strWordCount);
keywordMap.put(keyword, wordCount);
} catch (Exception e) {
LOG.info("string to integer exception,", e);
} }
int wordCount = Integer.valueOf(strWordCount);
keywordMap.put(keyword, wordCount);
} catch (Exception e) {
LOG.info("string to integer exception,", e);
} }
} }
} }
...@@ -106,6 +112,9 @@ public class QueryPlanHelper { ...@@ -106,6 +112,9 @@ public class QueryPlanHelper {
} }
} }
public void writeQueryPlanToFile() { public void writeQueryPlanToFile() {
if (StringUtils.isBlank(queryPlanFile)) {
return;
}
long nowSecond = System.currentTimeMillis()/1000; long nowSecond = System.currentTimeMillis()/1000;
long sevenDays = 3600 * 24 * 7; long sevenDays = 3600 * 24 * 7;
if (nowSecond - minTimeStamp > sevenDays) { if (nowSecond - minTimeStamp > sevenDays) {
......
package com.secoo.so.suggest.helper;
import com.secoo.so.suggest.config.ConfigUtil;
import com.secoo.so.suggest.util.FileUtils;
import com.secoo.so.suggest.util.StringUtils;
import com.sun.xml.internal.ws.binding.FeatureListUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* @author wangmingfan
* @date 2022/8/8
* @description
*/
public class WordHelper {
private static final Logger LOG = LoggerFactory.getLogger(WordHelper.class);
private static final String specialWordFile = "specialWordPath";
private static final String synonymTagFile = "synonymTagPath";
/**
* 从文件中加载部分特定不进行分割的词
* 文件中每行一个词语
* @return java.util.Set<java.lang.String>
* @date 2022/8/8
*/
public static Set<String> loadSpecialWords() {
String specialWordPath = ConfigUtil.getString(specialWordFile);
Set<String> words = new HashSet<>();
if (StringUtils.isNotBlank(specialWordPath)) {
List<String> lines = FileUtils.readLines(specialWordPath);
if (lines != null && lines.size() > 0) {
words.addAll(lines);
}
}
return words;
}
/**
* 从文件中加载同义标签
* 文件中每行表示一组同义词,同一组同义词之间用竖线(|)隔开,如:
* 男款|男式|男士
* 女款|女式|女士
* @return java.util.Set<java.util.Set<java.lang.String>>
* @date 2022/8/8
*/
public static Set<Set<String>> loadSynonymTags() {
String synonymTagPath = ConfigUtil.getString(synonymTagFile);
Set<Set<String>> synonyms = new HashSet<>();
if (StringUtils.isNotBlank(synonymTagPath)) {
List<String> lines = FileUtils.readLines(synonymTagPath);
lines.stream().filter(line -> StringUtils.isNotBlank(line)).forEach(line -> {
String[] arrWords = line.split("\\|");
if (arrWords.length > 0) {
Set<String> words = new HashSet<>(Arrays.asList(arrWords));
synonyms.add(words);
}
});
}
return synonyms;
}
}
...@@ -15,6 +15,7 @@ import com.secoo.so.suggest.entity.SearchKeywordInfo; ...@@ -15,6 +15,7 @@ import com.secoo.so.suggest.entity.SearchKeywordInfo;
import com.secoo.so.suggest.es.EsClient; import com.secoo.so.suggest.es.EsClient;
import com.secoo.so.suggest.es.EsObject; import com.secoo.so.suggest.es.EsObject;
import com.secoo.so.suggest.helper.QueryPlanHelper; import com.secoo.so.suggest.helper.QueryPlanHelper;
import com.secoo.so.suggest.helper.WordHelper;
import com.secoo.so.suggest.util.*; import com.secoo.so.suggest.util.*;
import lombok.Data; import lombok.Data;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
...@@ -42,11 +43,8 @@ public class SuggestTask { ...@@ -42,11 +43,8 @@ public class SuggestTask {
private static int maxTagSize = 5; private static int maxTagSize = 5;
private static long startTime = System.currentTimeMillis(); private static long startTime = System.currentTimeMillis();
private static Set<String> spWordSet = new HashSet<>(Arrays.asList( private static Set<String> spWordSet = new HashSet<>();
"靴子","鞋子","裤子","袜子","裙子","帽子","杯子","箱子","包包","包袋","包带","表带", private static Set<Set<String>> synonymList = new HashSet<>();
"大号","中号","小号","衣服","t恤","衣服","男款","男士","男式","男性","男童","女款",
"女士","女式","女性","女童","大象","男包","女包","男鞋","女鞋"));
private static List<Set<String>> synonymList = new ArrayList<>();
public static void main(String[] args) { public static void main(String[] args) {
startTime = System.currentTimeMillis(); startTime = System.currentTimeMillis();
...@@ -67,7 +65,10 @@ public class SuggestTask { ...@@ -67,7 +65,10 @@ public class SuggestTask {
sensitiveMap = loadSensitiveMap(); sensitiveMap = loadSensitiveMap();
europeWordMap = loadEuropeWordMap(); europeWordMap = loadEuropeWordMap();
// 加载表填同义词 // 加载部分确定不能分割的特殊词
spWordSet = loadSpecialWords();
// 加载标签同义词
synonymList = loadTagSynonym(); synonymList = loadTagSynonym();
QueryPlanHelper sqp = QueryPlanHelper.getInstance(); QueryPlanHelper sqp = QueryPlanHelper.getInstance();
...@@ -197,13 +198,29 @@ public class SuggestTask { ...@@ -197,13 +198,29 @@ public class SuggestTask {
return prefixFilterList; return prefixFilterList;
} }
private static List<Set<String>> loadTagSynonym(){ private static Set<Set<String>> loadTagSynonym(){
List<Set<String>> synList = new ArrayList<>(); Set<Set<String>> synSet = new HashSet<>();
Set<String> maleWords = new HashSet<>(Arrays.asList("男性","男式","男士","男款","男")); Set<String> maleWords = new HashSet<>(Arrays.asList("男性","男式","男士","男款","男"));
Set<String> femaleWords = new HashSet<>(Arrays.asList("女性","女式","女士","女款","女")); Set<String> femaleWords = new HashSet<>(Arrays.asList("女性","女式","女士","女款","女"));
synList.add(maleWords); synSet.add(maleWords);
synList.add(femaleWords); synSet.add(femaleWords);
return synList; Set<Set<String>> fileSynonyms = WordHelper.loadSynonymTags();
if (fileSynonyms.size() > 0) {
synSet.addAll(fileSynonyms);
}
return synSet;
}
private static Set<String> loadSpecialWords(){
Set<String> baseSet = new HashSet<>(Arrays.asList(
"靴子","鞋子","裤子","袜子","裙子","帽子","杯子","箱子","包包","包袋","包带","表带",
"大号","中号","小号","衣服","t恤","衣服","男款","男士","男式","男性","男童","女款",
"女士","女式","女性","女童","大象","男包","女包","男鞋","女鞋"));
Set<String> fileWords = WordHelper.loadSpecialWords();
if (fileWords.size() > 0) {
baseSet.addAll(fileWords);
}
return baseSet;
} }
private static String cleanKeyword(String keyword) { private static String cleanKeyword(String keyword) {
......
...@@ -14,3 +14,7 @@ suggestTask.es.password=search5z0NvEn1D ...@@ -14,3 +14,7 @@ suggestTask.es.password=search5z0NvEn1D
suggestTask.es.index=search_suggest_index suggestTask.es.index=search_suggest_index
suggestTask.es.type=search_suggest_type suggestTask.es.type=search_suggest_type
suggestTask.es.batchSize=2000 suggestTask.es.batchSize=2000
queryPlan.cachePath=/data/crontab/suggest/tmp/queryplan.txt
specialWordPath=/data/crontab/suggest/dict/specialWord.txt
synonymTagPath=/data/crontab/suggest/dict/synonymTag.txt
...@@ -13,4 +13,8 @@ suggestTask.es.user=search ...@@ -13,4 +13,8 @@ suggestTask.es.user=search
suggestTask.es.password=search5z0NvEn1D suggestTask.es.password=search5z0NvEn1D
suggestTask.es.index=search_suggest_index_huidu suggestTask.es.index=search_suggest_index_huidu
suggestTask.es.type=search_suggest_type suggestTask.es.type=search_suggest_type
suggestTask.es.batchSize=2000 suggestTask.es.batchSize=2000
\ No newline at end of file
queryPlan.cachePath=/data/crontab/test/tmp/queryplan.txt
specialWordPath=/data/crontab/test/dict/specialWord.txt
synonymTagPath=/data/crontab/test/dict/synonymTag.txt
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment