Commit 200bfd5b by 王明范

sqp cache

parent ccce4f93
...@@ -7,7 +7,8 @@ import com.secoo.search.sqp4j.QueryPlan; ...@@ -7,7 +7,8 @@ import com.secoo.search.sqp4j.QueryPlan;
import com.secoo.search.sqp4j.QueryWord; import com.secoo.search.sqp4j.QueryWord;
import com.secoo.search.sqp4j.client.QueryPlanClient; import com.secoo.search.sqp4j.client.QueryPlanClient;
import com.secoo.so.suggest.client.SqpDubboClient; import com.secoo.so.suggest.client.SqpDubboClient;
import org.apache.commons.lang3.StringUtils; import com.secoo.so.suggest.util.FileUtils;
import com.secoo.so.suggest.util.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -28,8 +29,14 @@ public class QueryPlanHelper { ...@@ -28,8 +29,14 @@ public class QueryPlanHelper {
Map<String, Explanation> sqpCache = new HashMap<>(); Map<String, Explanation> sqpCache = new HashMap<>();
List<String> wordList = new ArrayList<>(); List<String> wordList = new ArrayList<>();
Map<String, Integer> keywordMap = new HashMap<>();
private static String queryPlanFile = "/data/crontab/test/tmp/queryplan.txt";
private static List<String> newWordLines = new ArrayList<>();
private static long minTimeStamp = Long.MAX_VALUE;
private QueryPlanHelper() { private QueryPlanHelper() {
client = SqpDubboClient.getProdImpl(); client = SqpDubboClient.getProdImpl();
loadQueryPlanFromFile();
} }
...@@ -44,6 +51,80 @@ public class QueryPlanHelper { ...@@ -44,6 +51,80 @@ public class QueryPlanHelper {
return instance; return instance;
} }
public int explainQueryWordCount(String keyword) {
int count = 0;
if (StringUtils.isNotBlank(keyword)) {
if (keywordMap.containsKey(keyword)) {
return keywordMap.get(keyword);
}
String traceId = UUID.randomUUID().toString();
Map<String, String> bucketInfo = new HashMap<>();
Buckets bucket = new Buckets(bucketInfo);
String cityCode = "";
long currDate = 0L;
int needSpell = 0;
Explanations explanations = client.explain(traceId, bucket, cityCode, currDate, needSpell, keyword, null);
if (explanations != null && explanations.getItems().size() > 0 && explanations.getItems().get(0) != null) {
Explanation explanation = explanations.getItems().get(0);
if (explanation.getQueryWords() != null) {
int wordCount = explanation.getQueryWords().size();
keywordMap.put(keyword, wordCount);
newWordLines.add(keyword + "," + wordCount + "," + (System.currentTimeMillis() /1000) );
return wordCount;
}
}
}
return count;
}
private void loadQueryPlanFromFile() {
List<String> lines = FileUtils.readLines(queryPlanFile);
if (lines != null && lines.size() > 0) {
for (String line : lines) {
if (StringUtils.isBlank(line)) {
continue;
}
String[] arr = line.split(",");
if(arr.length == 3) {
String keyword = arr[0];
String strWordCount = arr[1];
String ts = arr[2];
if (StringUtils.isNotBlank(keyword) && StringUtils.isNumber(strWordCount) && StringUtils.isNumber(ts)) {
try {
long timeStamp = Long.valueOf(ts);
if (timeStamp < minTimeStamp) {
minTimeStamp = timeStamp;
}
int wordCount = Integer.valueOf(strWordCount);
keywordMap.put(keyword, wordCount);
} catch (Exception e) {
LOG.info("string to integer exception,", e);
}
}
}
}
}
}
public void writeQueryPlanToFile() {
long nowSecond = System.currentTimeMillis()/1000;
long sevenDays = 3600 * 24 * 7;
if (nowSecond - minTimeStamp > sevenDays) {
// 文件中最早的时间戳超过7天,全量更新;否则只更新新增的
if (keywordMap.size() > 0) {
newWordLines = new ArrayList<>(); // map转存到newWordLines
for (Map.Entry<String, Integer> entry : keywordMap.entrySet()) {
String line = entry.getKey() + "," + entry.getValue() + "," + nowSecond;
newWordLines.add(line);
}
}
}
if (newWordLines != null && newWordLines.size() > 0) {
FileUtils.saveToFile(newWordLines, queryPlanFile, true);
newWordLines = new ArrayList<>();
}
}
public Explanation explain(String keyword) { public Explanation explain(String keyword) {
if (StringUtils.isNotBlank(keyword)) { if (StringUtils.isNotBlank(keyword)) {
if (sqpCache.containsKey(keyword)) { if (sqpCache.containsKey(keyword)) {
......
...@@ -45,7 +45,7 @@ public class SuggestTask { ...@@ -45,7 +45,7 @@ public class SuggestTask {
private static Set<String> spWordSet = new HashSet<>(Arrays.asList( private static Set<String> spWordSet = new HashSet<>(Arrays.asList(
"靴子","鞋子","裤子","袜子","裙子","帽子","杯子","箱子","包包","包袋","包带","表带", "靴子","鞋子","裤子","袜子","裙子","帽子","杯子","箱子","包包","包袋","包带","表带",
"大号","中号","小号","衣服","t恤","衣服","男款","男士","男式","男性","男童","女款", "大号","中号","小号","衣服","t恤","衣服","男款","男士","男式","男性","男童","女款",
"女士","女式","女性","女童","大象")); "女士","女式","女性","女童","大象","男包","女包","男鞋","女鞋"));
private static List<Set<String>> synonymList = new ArrayList<>(); private static List<Set<String>> synonymList = new ArrayList<>();
public static void main(String[] args) { public static void main(String[] args) {
...@@ -70,10 +70,13 @@ public class SuggestTask { ...@@ -70,10 +70,13 @@ public class SuggestTask {
// 加载表填同义词 // 加载表填同义词
synonymList = loadTagSynonym(); synonymList = loadTagSynonym();
QueryPlanHelper sqp = QueryPlanHelper.getInstance();
// 加载搜索词并处理 // 加载搜索词并处理
processSuggestTask(startTime); processSuggestTask(startTime);
log.info("<<<<<<<<<<<< end run SuggestTask, startTime: {} , cost: {}ms", startTime, (System.currentTimeMillis() - startTime) ); log.info("<<<<<<<<<<<< end run SuggestTask, startTime: {} , cost: {}ms", startTime, (System.currentTimeMillis() - startTime) );
System.exit(0);
} }
private static Map<String, Long> loadBrandMap() { private static Map<String, Long> loadBrandMap() {
...@@ -296,6 +299,8 @@ public class SuggestTask { ...@@ -296,6 +299,8 @@ public class SuggestTask {
} }
} }
mergeKeywordTag(tmpSuggestKeywordMap); // 处理部分keyword,合并为其他词的tag mergeKeywordTag(tmpSuggestKeywordMap); // 处理部分keyword,合并为其他词的tag
QueryPlanHelper.getInstance().writeQueryPlanToFile();
if ("true".equalsIgnoreCase(System.getProperty("suggest.saveToFile"))) { if ("true".equalsIgnoreCase(System.getProperty("suggest.saveToFile"))) {
// save to file // save to file
saveSuggestKeywordToFile(suggestKeywordInfoList); saveSuggestKeywordToFile(suggestKeywordInfoList);
...@@ -448,23 +453,10 @@ public class SuggestTask { ...@@ -448,23 +453,10 @@ public class SuggestTask {
if (StringUtils.isNotBlank(rightWord)) { if (StringUtils.isNotBlank(rightWord)) {
log.info("check word:" + word + " and " + fullWord); log.info("check word:" + word + " and " + fullWord);
QueryPlanHelper sqp = QueryPlanHelper.getInstance(); QueryPlanHelper sqp = QueryPlanHelper.getInstance();
Explanation explan1 = sqp.explain(word); int wordCount1 = sqp.explainQueryWordCount(word);
Explanation explan2 = sqp.explain(rightWord); int wordCount2 = sqp.explainQueryWordCount(rightWord);
Explanation explan3 = sqp.explain(fullWord); int wordCount3 = sqp.explainQueryWordCount(fullWord);
if (explan1 != null && explan2 != null && explan3 != null) { if (wordCount1 + wordCount2 > wordCount3) {
List<QueryWord> queryWords1 = explan1.getQueryWords();
List<QueryWord> queryWords2 = explan2.getQueryWords();
List<QueryWord> queryWords3 = explan3.getQueryWords();
log.info("queryWords1 size:"+queryWords1.size()+"; queryWords2 size:"+queryWords2.size()+"; queryWords3 size:" + queryWords3.size());
if (queryWords1 != null && queryWords2 != null && queryWords3 != null) {
if (queryWords1.size() + queryWords2.size() > queryWords3.size()) {
return true;
}
} else {
return true;
}
} else {
return true; return true;
} }
} else { } else {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment