Commit 93b80089 by xupeng

fix code

parent d2e9cd84
......@@ -14,10 +14,8 @@ import lombok.extern.slf4j.Slf4j;
import java.io.File;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
......@@ -201,12 +199,13 @@ public class SuggestTask {
return;
}
ConcurrentHashMap<String, EsSuggestKeywordInfo> esSuggestKeywordMap = new ConcurrentHashMap<>();
// 通过线程池分批次并发处理搜索词
long batchSize = ConfigUtil.getLong("suggestTask.batchSize", 10000);
int threadPoolSize = ConfigUtil.getInt("suggestTask.threadPoolSize", 10);
ExecutorService execThreadPool = Executors.newFixedThreadPool(threadPoolSize);
for (long startId = minId; startId <= maxId; startId = startId + batchSize) {
execThreadPool.submit(new SearchKeywordProcessTask(startId, startId + batchSize, startTime));
execThreadPool.submit(new SearchKeywordProcessTask(esSuggestKeywordMap, startId, startId + batchSize, startTime));
}
execThreadPool.shutdown();
while (true) {
......@@ -216,35 +215,58 @@ public class SuggestTask {
}
ObjectUtils.safeSleep(5000);
}
// 保存到es
// saveSuggestKeywordToEs(new ArrayList<>(esSuggestKeywordMap.values()));
// for test
saveSuggestKeywordToFile(new ArrayList<>(esSuggestKeywordMap.values()));
}
/**
* 处理搜索词
*/
private static void processSearchKeyword(List<SearchKeywordInfo> searchKeywordInfoList, long startTime) {
private static void processSearchKeyword(ConcurrentHashMap<String, EsSuggestKeywordInfo> esSuggestKeywordMap, List<SearchKeywordInfo> searchKeywordInfoList, long startTime) {
if (CollectionUtils.isNotEmpty(searchKeywordInfoList)) {
List<EsSuggestKeywordInfo> suggestKeywordInfoList = new ArrayList<>();
for (SearchKeywordInfo searchKeywordInfo : searchKeywordInfoList) {
if (StringUtils.isNotBlank(searchKeywordInfo.getKeyword())) {
EsSuggestKeywordInfo suggestKeywordInfo = new EsSuggestKeywordInfo();
suggestKeywordInfo.setKeyword(cleanKeyword(searchKeywordInfo.getKeyword()));
String keyword = cleanKeyword(searchKeywordInfo.getKeyword());
synchronized (keyword) {
EsSuggestKeywordInfo suggestKeywordInfo = esSuggestKeywordMap.get(keyword);
if (suggestKeywordInfo == null) {
suggestKeywordInfo = new EsSuggestKeywordInfo();
suggestKeywordInfo.setKeyword(keyword);
suggestKeywordInfo.setKeywordPinYin(PinYinUtils.changeToWithoutTonePinYin(suggestKeywordInfo.getKeyword(), ""));
suggestKeywordInfo.setYearCount(searchKeywordInfo.getYearPv());
suggestKeywordInfo.setYearClickCount(searchKeywordInfo.getYearProductClickCount());
suggestKeywordInfo.setYearCartCount(searchKeywordInfo.getYearAddCartCount());
suggestKeywordInfo.setWeekCount(searchKeywordInfo.getWeekPv());
suggestKeywordInfo.setWeekClickCount(searchKeywordInfo.getWeekProductClickCount());
suggestKeywordInfo.setWeekCartCount(searchKeywordInfo.getWeekAddCartCount());
suggestKeywordInfo.setSuggestTags(searchKeywordInfo.getPrepareTags());
suggestKeywordInfo.setKeywordVersion(searchKeywordInfo.getPDay());
suggestKeywordInfo.setUpdateTime(startTime);
suggestKeywordInfo.setIsBrand(brandMap.containsKey(suggestKeywordInfo.getKeyword()));
suggestKeywordInfo.setIsCategory(categoryMap.containsKey(suggestKeywordInfo.getKeyword()));
suggestKeywordInfo.setIsSensitive(sensitiveMap.containsKey(suggestKeywordInfo.getKeyword()));
suggestKeywordInfo.setIsEuropeWord(europeWordMap.containsKey(suggestKeywordInfo.getKeyword()));
suggestKeywordInfo.setIsManual(manualMap.containsKey(suggestKeywordInfo.getKeyword()));
suggestKeywordInfo.setManualValue(suggestKeywordInfo.getIsManual() ? manualMap.get(suggestKeywordInfo.getKeyword()) : 0);
suggestKeywordInfo.setIsBrand(brandMap.containsKey(keyword));
suggestKeywordInfo.setIsCategory(categoryMap.containsKey(keyword));
suggestKeywordInfo.setIsSensitive(sensitiveMap.containsKey(keyword));
suggestKeywordInfo.setIsEuropeWord(europeWordMap.containsKey(keyword));
suggestKeywordInfo.setIsManual(manualMap.containsKey(keyword));
suggestKeywordInfo.setManualValue(suggestKeywordInfo.getIsManual() ? manualMap.get(keyword) : 0);
esSuggestKeywordMap.put(keyword, suggestKeywordInfo);
} else {
suggestKeywordInfo.setYearCount(suggestKeywordInfo.getYearCount() + searchKeywordInfo.getYearPv());
suggestKeywordInfo.setYearClickCount(suggestKeywordInfo.getYearClickCount() + searchKeywordInfo.getYearProductClickCount());
suggestKeywordInfo.setYearCartCount(suggestKeywordInfo.getYearCartCount() + searchKeywordInfo.getYearAddCartCount());
suggestKeywordInfo.setWeekCount(suggestKeywordInfo.getWeekCount() + searchKeywordInfo.getWeekPv());
suggestKeywordInfo.setWeekClickCount(suggestKeywordInfo.getWeekClickCount() + searchKeywordInfo.getWeekProductClickCount());
suggestKeywordInfo.setWeekCartCount(suggestKeywordInfo.getWeekCartCount() + searchKeywordInfo.getWeekAddCartCount());
}
// 计算suggestKeyword权重等属性
processEsSuggestKeywordInfo(suggestKeywordInfo, searchKeywordInfo);
......@@ -255,16 +277,11 @@ public class SuggestTask {
// 保存es前执行标签清洗
cleanBeforeSaveToEs(suggestKeywordInfo);
suggestKeywordInfoList.add(suggestKeywordInfo);
esSuggestKeywordMap.put(suggestKeywordInfo.getKeyword(), suggestKeywordInfo);
}
}
}
}
// 保存到es
// saveSuggestKeywordToEs(suggestKeywordInfoList);
// for test
saveSuggestKeywordToFile(suggestKeywordInfoList);
}
}
......@@ -279,13 +296,26 @@ public class SuggestTask {
private static void saveSuggestKeywordToFile(List<EsSuggestKeywordInfo> suggestKeywordInfoList) {
if (CollectionUtils.isNotEmpty(suggestKeywordInfoList)) {
int batch = 2000;
String fileName = "/tmp/suggest-task/suggest-index-" + DateUtils.formatDate(startTime, "yyyyMMddHHmmss") + ".json";
List<String> lines = new ArrayList<>();
suggestKeywordInfoList.forEach(suggestKeywordInfo -> {
int count = 0;
for (count = 0; count < suggestKeywordInfoList.size(); count++) {
EsSuggestKeywordInfo suggestKeywordInfo = suggestKeywordInfoList.get(count);
lines.add(JSON.toJSONString(suggestKeywordInfo));
});
String fileName = "/tmp/suggest-task/suggest_index-" + DateUtils.formatDate(startTime, "yyyyMMddHHmmss") + "-" + Thread.currentThread().getId() + ".json";
log.info("save result to file: " + fileName);
if (count > 0 && lines.size() % batch == 0) {
log.info("save {}/{} result to file: {}", lines.size(), count, fileName);
FileUtils.saveToFile(lines, fileName, true);
lines = new ArrayList<>();
}
}
if (CollectionUtils.isNotEmpty(lines)) {
log.info("save {}/{} result to file: {}", lines.size(), count, fileName);
FileUtils.saveToFile(lines, fileName, true);
lines.clear();
}
}
}
......@@ -306,12 +336,12 @@ public class SuggestTask {
}
// 过滤掉太长的词
if (suggestKeywordInfo.getKeyword().length() > 30) {
if (suggestKeywordInfo.getKeyword().length() <= 1 || suggestKeywordInfo.getKeyword().length() > 50) {
return true;
}
// 过滤掉纯数字的搜索词,原:过滤掉商品id,商品id是有7位数字组成
if (StringUtils.isNumber(suggestKeywordInfo.getKeyword())) {
if (suggestKeywordInfo.getKeyword().length() > 6 && StringUtils.isNumber(suggestKeywordInfo.getKeyword())) {
return true;
}
......@@ -392,7 +422,6 @@ public class SuggestTask {
suggestKeywordInfo.setWeekClickRatio(suggestKeywordInfo.getWeekClickRatio() * 2);
}
calculateWordRank(suggestKeywordInfo);
calculateWordABRank(suggestKeywordInfo, searchKeywordInfo);
addNewScoreIfNewHotWord(suggestKeywordInfo);
......@@ -527,12 +556,14 @@ public class SuggestTask {
private static final long serialVersionUID = -2853856815712590673L;
public SearchKeywordProcessTask(Long startId, Long endId, Long startTime) {
public SearchKeywordProcessTask(ConcurrentHashMap<String, EsSuggestKeywordInfo> esSuggestKeywordMap, Long startId, Long endId, Long startTime) {
this.esSuggestKeywordMap = esSuggestKeywordMap;
this.startId = startId;
this.endId = endId;
this.startTime = startTime;
}
private ConcurrentHashMap<String, EsSuggestKeywordInfo> esSuggestKeywordMap;
private Long startId;
private Long endId;
private Long startTime;
......@@ -541,7 +572,7 @@ public class SuggestTask {
public void run() {
List<SearchKeywordInfo> searchKeywordInfoList = DwDataSource.querySearchKeywordInfoList(startId, endId);
if (CollectionUtils.isNotEmpty(searchKeywordInfoList)) {
processSearchKeyword(searchKeywordInfoList, startTime);
processSearchKeyword(this.esSuggestKeywordMap, searchKeywordInfoList, startTime);
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment