Commit 93b80089 by xupeng

fix code

parent d2e9cd84
...@@ -14,10 +14,8 @@ import lombok.extern.slf4j.Slf4j; ...@@ -14,10 +14,8 @@ import lombok.extern.slf4j.Slf4j;
import java.io.File; import java.io.File;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.*;
import java.util.HashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
...@@ -201,12 +199,13 @@ public class SuggestTask { ...@@ -201,12 +199,13 @@ public class SuggestTask {
return; return;
} }
ConcurrentHashMap<String, EsSuggestKeywordInfo> esSuggestKeywordMap = new ConcurrentHashMap<>();
// 通过线程池分批次并发处理搜索词 // 通过线程池分批次并发处理搜索词
long batchSize = ConfigUtil.getLong("suggestTask.batchSize", 10000); long batchSize = ConfigUtil.getLong("suggestTask.batchSize", 10000);
int threadPoolSize = ConfigUtil.getInt("suggestTask.threadPoolSize", 10); int threadPoolSize = ConfigUtil.getInt("suggestTask.threadPoolSize", 10);
ExecutorService execThreadPool = Executors.newFixedThreadPool(threadPoolSize); ExecutorService execThreadPool = Executors.newFixedThreadPool(threadPoolSize);
for (long startId = minId; startId <= maxId; startId = startId + batchSize) { for (long startId = minId; startId <= maxId; startId = startId + batchSize) {
execThreadPool.submit(new SearchKeywordProcessTask(startId, startId + batchSize, startTime)); execThreadPool.submit(new SearchKeywordProcessTask(esSuggestKeywordMap, startId, startId + batchSize, startTime));
} }
execThreadPool.shutdown(); execThreadPool.shutdown();
while (true) { while (true) {
...@@ -216,55 +215,73 @@ public class SuggestTask { ...@@ -216,55 +215,73 @@ public class SuggestTask {
} }
ObjectUtils.safeSleep(5000); ObjectUtils.safeSleep(5000);
} }
// 保存到es
// saveSuggestKeywordToEs(new ArrayList<>(esSuggestKeywordMap.values()));
// for test
saveSuggestKeywordToFile(new ArrayList<>(esSuggestKeywordMap.values()));
} }
/** /**
* 处理搜索词 * 处理搜索词
*/ */
private static void processSearchKeyword(List<SearchKeywordInfo> searchKeywordInfoList, long startTime) { private static void processSearchKeyword(ConcurrentHashMap<String, EsSuggestKeywordInfo> esSuggestKeywordMap, List<SearchKeywordInfo> searchKeywordInfoList, long startTime) {
if (CollectionUtils.isNotEmpty(searchKeywordInfoList)) { if (CollectionUtils.isNotEmpty(searchKeywordInfoList)) {
List<EsSuggestKeywordInfo> suggestKeywordInfoList = new ArrayList<>();
for (SearchKeywordInfo searchKeywordInfo : searchKeywordInfoList) { for (SearchKeywordInfo searchKeywordInfo : searchKeywordInfoList) {
if (StringUtils.isNotBlank(searchKeywordInfo.getKeyword())) { if (StringUtils.isNotBlank(searchKeywordInfo.getKeyword())) {
EsSuggestKeywordInfo suggestKeywordInfo = new EsSuggestKeywordInfo();
suggestKeywordInfo.setKeyword(cleanKeyword(searchKeywordInfo.getKeyword())); String keyword = cleanKeyword(searchKeywordInfo.getKeyword());
suggestKeywordInfo.setKeywordPinYin(PinYinUtils.changeToWithoutTonePinYin(suggestKeywordInfo.getKeyword(), "")); synchronized (keyword) {
suggestKeywordInfo.setYearCount(searchKeywordInfo.getYearPv()); EsSuggestKeywordInfo suggestKeywordInfo = esSuggestKeywordMap.get(keyword);
suggestKeywordInfo.setYearClickCount(searchKeywordInfo.getYearProductClickCount()); if (suggestKeywordInfo == null) {
suggestKeywordInfo.setYearCartCount(searchKeywordInfo.getYearAddCartCount()); suggestKeywordInfo = new EsSuggestKeywordInfo();
suggestKeywordInfo.setWeekCount(searchKeywordInfo.getWeekPv()); suggestKeywordInfo.setKeyword(keyword);
suggestKeywordInfo.setWeekClickCount(searchKeywordInfo.getWeekProductClickCount()); suggestKeywordInfo.setKeywordPinYin(PinYinUtils.changeToWithoutTonePinYin(suggestKeywordInfo.getKeyword(), ""));
suggestKeywordInfo.setWeekCartCount(searchKeywordInfo.getWeekAddCartCount());
suggestKeywordInfo.setSuggestTags(searchKeywordInfo.getPrepareTags()); suggestKeywordInfo.setYearCount(searchKeywordInfo.getYearPv());
suggestKeywordInfo.setKeywordVersion(searchKeywordInfo.getPDay()); suggestKeywordInfo.setYearClickCount(searchKeywordInfo.getYearProductClickCount());
suggestKeywordInfo.setUpdateTime(startTime); suggestKeywordInfo.setYearCartCount(searchKeywordInfo.getYearAddCartCount());
suggestKeywordInfo.setWeekCount(searchKeywordInfo.getWeekPv());
suggestKeywordInfo.setIsBrand(brandMap.containsKey(suggestKeywordInfo.getKeyword())); suggestKeywordInfo.setWeekClickCount(searchKeywordInfo.getWeekProductClickCount());
suggestKeywordInfo.setIsCategory(categoryMap.containsKey(suggestKeywordInfo.getKeyword())); suggestKeywordInfo.setWeekCartCount(searchKeywordInfo.getWeekAddCartCount());
suggestKeywordInfo.setIsSensitive(sensitiveMap.containsKey(suggestKeywordInfo.getKeyword()));
suggestKeywordInfo.setIsEuropeWord(europeWordMap.containsKey(suggestKeywordInfo.getKeyword())); suggestKeywordInfo.setSuggestTags(searchKeywordInfo.getPrepareTags());
suggestKeywordInfo.setIsManual(manualMap.containsKey(suggestKeywordInfo.getKeyword())); suggestKeywordInfo.setKeywordVersion(searchKeywordInfo.getPDay());
suggestKeywordInfo.setManualValue(suggestKeywordInfo.getIsManual() ? manualMap.get(suggestKeywordInfo.getKeyword()) : 0); suggestKeywordInfo.setUpdateTime(startTime);
// 计算suggestKeyword权重等属性 suggestKeywordInfo.setIsBrand(brandMap.containsKey(keyword));
processEsSuggestKeywordInfo(suggestKeywordInfo, searchKeywordInfo); suggestKeywordInfo.setIsCategory(categoryMap.containsKey(keyword));
suggestKeywordInfo.setIsSensitive(sensitiveMap.containsKey(keyword));
// 不过滤的suggest词,计算分值写es suggestKeywordInfo.setIsEuropeWord(europeWordMap.containsKey(keyword));
if (!isFilterSuggestKeyword(suggestKeywordInfo)) { suggestKeywordInfo.setIsManual(manualMap.containsKey(keyword));
suggestKeywordInfo.setManualValue(suggestKeywordInfo.getIsManual() ? manualMap.get(keyword) : 0);
// 保存es前执行标签清洗
cleanBeforeSaveToEs(suggestKeywordInfo); esSuggestKeywordMap.put(keyword, suggestKeywordInfo);
} else {
suggestKeywordInfoList.add(suggestKeywordInfo); suggestKeywordInfo.setYearCount(suggestKeywordInfo.getYearCount() + searchKeywordInfo.getYearPv());
suggestKeywordInfo.setYearClickCount(suggestKeywordInfo.getYearClickCount() + searchKeywordInfo.getYearProductClickCount());
suggestKeywordInfo.setYearCartCount(suggestKeywordInfo.getYearCartCount() + searchKeywordInfo.getYearAddCartCount());
suggestKeywordInfo.setWeekCount(suggestKeywordInfo.getWeekCount() + searchKeywordInfo.getWeekPv());
suggestKeywordInfo.setWeekClickCount(suggestKeywordInfo.getWeekClickCount() + searchKeywordInfo.getWeekProductClickCount());
suggestKeywordInfo.setWeekCartCount(suggestKeywordInfo.getWeekCartCount() + searchKeywordInfo.getWeekAddCartCount());
}
// 计算suggestKeyword权重等属性
processEsSuggestKeywordInfo(suggestKeywordInfo, searchKeywordInfo);
// 不过滤的suggest词,计算分值写es
if (!isFilterSuggestKeyword(suggestKeywordInfo)) {
// 保存es前执行标签清洗
cleanBeforeSaveToEs(suggestKeywordInfo);
esSuggestKeywordMap.put(suggestKeywordInfo.getKeyword(), suggestKeywordInfo);
}
} }
} }
} }
// 保存到es
// saveSuggestKeywordToEs(suggestKeywordInfoList);
// for test
saveSuggestKeywordToFile(suggestKeywordInfoList);
} }
} }
...@@ -279,13 +296,26 @@ public class SuggestTask { ...@@ -279,13 +296,26 @@ public class SuggestTask {
private static void saveSuggestKeywordToFile(List<EsSuggestKeywordInfo> suggestKeywordInfoList) { private static void saveSuggestKeywordToFile(List<EsSuggestKeywordInfo> suggestKeywordInfoList) {
if (CollectionUtils.isNotEmpty(suggestKeywordInfoList)) { if (CollectionUtils.isNotEmpty(suggestKeywordInfoList)) {
int batch = 2000;
String fileName = "/tmp/suggest-task/suggest-index-" + DateUtils.formatDate(startTime, "yyyyMMddHHmmss") + ".json";
List<String> lines = new ArrayList<>(); List<String> lines = new ArrayList<>();
suggestKeywordInfoList.forEach(suggestKeywordInfo -> { int count = 0;
for (count = 0; count < suggestKeywordInfoList.size(); count++) {
EsSuggestKeywordInfo suggestKeywordInfo = suggestKeywordInfoList.get(count);
lines.add(JSON.toJSONString(suggestKeywordInfo)); lines.add(JSON.toJSONString(suggestKeywordInfo));
}); if (count > 0 && lines.size() % batch == 0) {
String fileName = "/tmp/suggest-task/suggest_index-" + DateUtils.formatDate(startTime, "yyyyMMddHHmmss") + "-" + Thread.currentThread().getId() + ".json"; log.info("save {}/{} result to file: {}", lines.size(), count, fileName);
log.info("save result to file: " + fileName); FileUtils.saveToFile(lines, fileName, true);
FileUtils.saveToFile(lines, fileName, true); lines = new ArrayList<>();
}
}
if (CollectionUtils.isNotEmpty(lines)) {
log.info("save {}/{} result to file: {}", lines.size(), count, fileName);
FileUtils.saveToFile(lines, fileName, true);
lines.clear();
}
} }
} }
...@@ -306,12 +336,12 @@ public class SuggestTask { ...@@ -306,12 +336,12 @@ public class SuggestTask {
} }
// 过滤掉太长的词 // 过滤掉太长的词
if (suggestKeywordInfo.getKeyword().length() > 30) { if (suggestKeywordInfo.getKeyword().length() <= 1 || suggestKeywordInfo.getKeyword().length() > 50) {
return true; return true;
} }
// 过滤掉纯数字的搜索词,原:过滤掉商品id,商品id是有7位数字组成 // 过滤掉纯数字的搜索词,原:过滤掉商品id,商品id是有7位数字组成
if (StringUtils.isNumber(suggestKeywordInfo.getKeyword())) { if (suggestKeywordInfo.getKeyword().length() > 6 && StringUtils.isNumber(suggestKeywordInfo.getKeyword())) {
return true; return true;
} }
...@@ -392,7 +422,6 @@ public class SuggestTask { ...@@ -392,7 +422,6 @@ public class SuggestTask {
suggestKeywordInfo.setWeekClickRatio(suggestKeywordInfo.getWeekClickRatio() * 2); suggestKeywordInfo.setWeekClickRatio(suggestKeywordInfo.getWeekClickRatio() * 2);
} }
calculateWordRank(suggestKeywordInfo); calculateWordRank(suggestKeywordInfo);
calculateWordABRank(suggestKeywordInfo, searchKeywordInfo); calculateWordABRank(suggestKeywordInfo, searchKeywordInfo);
addNewScoreIfNewHotWord(suggestKeywordInfo); addNewScoreIfNewHotWord(suggestKeywordInfo);
...@@ -527,12 +556,14 @@ public class SuggestTask { ...@@ -527,12 +556,14 @@ public class SuggestTask {
private static final long serialVersionUID = -2853856815712590673L; private static final long serialVersionUID = -2853856815712590673L;
public SearchKeywordProcessTask(Long startId, Long endId, Long startTime) { public SearchKeywordProcessTask(ConcurrentHashMap<String, EsSuggestKeywordInfo> esSuggestKeywordMap, Long startId, Long endId, Long startTime) {
this.esSuggestKeywordMap = esSuggestKeywordMap;
this.startId = startId; this.startId = startId;
this.endId = endId; this.endId = endId;
this.startTime = startTime; this.startTime = startTime;
} }
private ConcurrentHashMap<String, EsSuggestKeywordInfo> esSuggestKeywordMap;
private Long startId; private Long startId;
private Long endId; private Long endId;
private Long startTime; private Long startTime;
...@@ -541,7 +572,7 @@ public class SuggestTask { ...@@ -541,7 +572,7 @@ public class SuggestTask {
public void run() { public void run() {
List<SearchKeywordInfo> searchKeywordInfoList = DwDataSource.querySearchKeywordInfoList(startId, endId); List<SearchKeywordInfo> searchKeywordInfoList = DwDataSource.querySearchKeywordInfoList(startId, endId);
if (CollectionUtils.isNotEmpty(searchKeywordInfoList)) { if (CollectionUtils.isNotEmpty(searchKeywordInfoList)) {
processSearchKeyword(searchKeywordInfoList, startTime); processSearchKeyword(this.esSuggestKeywordMap, searchKeywordInfoList, startTime);
} }
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment