Commit e8646980 by 王明范

pass word with tag

parent 83adf4b7
package com.secoo.so.suggest.task; package com.secoo.so.suggest.task;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.secoo.so.suggest.config.ConfigUtil; import com.secoo.so.suggest.config.ConfigUtil;
import com.secoo.so.suggest.db.DwDataSource; import com.secoo.so.suggest.db.DwDataSource;
import com.secoo.so.suggest.db.ErpDataSource; import com.secoo.so.suggest.db.ErpDataSource;
...@@ -279,7 +280,7 @@ public class SuggestTask { ...@@ -279,7 +280,7 @@ public class SuggestTask {
} }
private static void mergeKeywordTag(ConcurrentHashMap<String, EsSuggestKeywordInfo> esSuggestKeywordMap) { private static void mergeKeywordTag(ConcurrentHashMap<String, EsSuggestKeywordInfo> esSuggestKeywordMap) {
int maxCount = 100; int maxCount = 10000;
List<String> keyList = new ArrayList<>(esSuggestKeywordMap.keySet()); List<String> keyList = new ArrayList<>(esSuggestKeywordMap.keySet());
Collections.sort(keyList); // 按照字符排序,确保扩展词都在本词后面 Collections.sort(keyList); // 按照字符排序,确保扩展词都在本词后面
for(int i = 0;i< keyList.size(); i++) { for(int i = 0;i< keyList.size(); i++) {
...@@ -300,14 +301,30 @@ public class SuggestTask { ...@@ -300,14 +301,30 @@ public class SuggestTask {
break; break;
} }
String fulWord = keyList.get(j); String fulWord = keyList.get(j);
EsSuggestKeywordInfo tmpSuggest = esSuggestKeywordMap.get(fulWord);
if (StringUtils.isNotBlank(tmpSuggest.getSuggestTags())) {
continue;
}
int rightLen = StringUtils.getByteLength(fulWord) - wordLen; int rightLen = StringUtils.getByteLength(fulWord) - wordLen;
if (fulWord.startsWith(word) && rightLen > 3 && rightLen <= 12) { if (fulWord.startsWith(word)) {
if (rightLen > 3 && rightLen <= 14) {
String subWord = fulWord.substring(length, length + 1); String subWord = fulWord.substring(length, length + 1);
if (isEN && StringUtils.isEnStr(subWord)){ boolean isTShirt = false;
String rightWord = fulWord.substring(length, fulWord.length()).trim();
int realLen = StringUtils.getByteLength(rightWord);
if (rightWord.length() >= 2 && rightWord.toLowerCase().startsWith("t恤")) {
isTShirt = true;
}
if (isEN && (StringUtils.isEnStr(subWord) && !isTShirt)) {
continue;
}
if (realLen <= 3 || realLen > 14) {
continue; continue;
} }
suggestList.add(esSuggestKeywordMap.get(fulWord)); suggestList.add(esSuggestKeywordMap.get(fulWord));
keyCount++; keyCount++;
}
} else { } else {
break; break;
} }
...@@ -339,6 +356,9 @@ public class SuggestTask { ...@@ -339,6 +356,9 @@ public class SuggestTask {
sb.append(subWord); sb.append(subWord);
} }
if (sb.length() > 0) { if (sb.length() > 0) {
if ("lv女包".equals(word)) {
log.info("debugLog keyword tag:" + sb.toString());
}
suggestInfo.setSuggestTags(sb.toString()); suggestInfo.setSuggestTags(sb.toString());
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment