Commit 9ba0f533 by xupeng

修正代码,增加保存到es支持

parent e6f56eb1
......@@ -91,6 +91,23 @@
<version>1.1.8</version>
</dependency>
<!-- es -->
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>6.4.2</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>6.4.2</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>6.4.2</version>
</dependency>
</dependencies>
<build>
......
......@@ -18,8 +18,8 @@ public class EsSuggestKeywordInfo implements Serializable {
private Long weekClickCount;
private Long weekCartCount;
private Double yearClickRatio;
private Float yearCartRatio;
private Float weekClickRatio;
private Double yearCartRatio;
private Double weekClickRatio;
private Double weekCartRatio;
private Boolean isBrand;
private Boolean isCategory;
......@@ -27,7 +27,7 @@ public class EsSuggestKeywordInfo implements Serializable {
private Boolean isSensitive;
private Integer manualValue;
private Double wordRank;
private Float wordABRank;
private Double wordABRank;
private String keywordVersion;
private Boolean isEuropeWord;
private String suggestTags;
......
package com.secoo.so.suggest.es;
public class ESException extends Exception {
private static final long serialVersionUID = -4947060289056203488L;
private String msg;
private int code = 500;
public ESException(String msg) {
super(msg);
this.msg = msg;
}
public ESException(String msg, Throwable e) {
super(msg, e);
this.msg = msg;
}
public ESException(String msg, int code) {
super(msg);
this.msg = msg;
this.code = code;
}
public ESException(String msg, int code, Throwable e) {
super(msg, e);
this.msg = msg;
this.code = code;
}
public String getMsg() {
return msg;
}
public void setMsg(String msg) {
this.msg = msg;
}
public int getCode() {
return code;
}
public void setCode(int code) {
this.code = code;
}
}
package com.secoo.so.suggest.es;
import org.elasticsearch.action.DocWriteRequest;
import java.io.Serializable;
public class EsObject implements Serializable {
private static final long serialVersionUID = -3593470306368703625L;
public static final DocWriteRequest.OpType INDEX = DocWriteRequest.OpType.INDEX;
public static final DocWriteRequest.OpType CREATE = DocWriteRequest.OpType.CREATE;
public static final DocWriteRequest.OpType UPDATE = DocWriteRequest.OpType.UPDATE;
public static final DocWriteRequest.OpType DELETE = DocWriteRequest.OpType.DELETE;
private String id;
private Object object;
private DocWriteRequest.OpType opType = DocWriteRequest.OpType.INDEX;
public EsObject() {
}
public EsObject(String id, Object object) {
this.id = id;
this.object = object;
}
/**
* DocWriteRequest.OpType.INDEX : 覆盖更新
* DocWriteRequest.OpType.UPDATE : 只更新发送的字段
*
* @author xupeng
* @date: 2019-01-23
*/
public EsObject(String id, Object object, DocWriteRequest.OpType opType) {
this.id = id;
this.object = object;
this.opType = opType;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Object getObject() {
return object;
}
public void setObject(Object object) {
this.object = object;
}
public DocWriteRequest.OpType getOpType() {
return opType;
}
public void setOpType(DocWriteRequest.OpType opType) {
this.opType = opType;
}
}
package com.secoo.so.suggest.es;
import java.util.List;
import java.util.Map;
/**
*
* 分页查询时可以使用的回调接口
**/
public interface EsSearchCallback {
/**
* 分页查询时的callback,如果有该实现,search接口最终将返回空数据
* @param results
*/
public void callback(List<Map<String, Object>> results);
}
......@@ -8,6 +8,8 @@ import com.secoo.so.suggest.entity.BrandInfo;
import com.secoo.so.suggest.entity.CategoryInfo;
import com.secoo.so.suggest.entity.EsSuggestKeywordInfo;
import com.secoo.so.suggest.entity.SearchKeywordInfo;
import com.secoo.so.suggest.es.EsClient;
import com.secoo.so.suggest.es.EsObject;
import com.secoo.so.suggest.util.*;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
......@@ -194,8 +196,7 @@ public class SuggestTask {
Long warningCount = ConfigUtil.getLong("suggestTask.searchWordWarningCount", 1000000);
if (count < warningCount) {
log.warn("search word count is too little: count={}, warningCount={}, send warning", count, warningCount);
List<String> phones = StringUtils.splitToList(ConfigUtil.getString("suggestTask.warningPhones"), ",");
FeiShuUtil.sendMessage("suggest-task异常", "搜索词数量过低,不执行索引", phones);
FeiShuUtil.sendMessage("suggest-task异常", "搜索词数量过低,不执行索引");
return;
}
......@@ -221,8 +222,9 @@ public class SuggestTask {
// 过滤词
List<EsSuggestKeywordInfo> suggestKeywordInfoList = new ArrayList<>();
int processCount = 0;
int totalCount = esSuggestKeywordMap.values().size();
int totalCount = esSuggestKeywordMap.values().size();
for (EsSuggestKeywordInfo suggestKeywordInfo : esSuggestKeywordMap.values()) {
// 不过滤的suggest词,计算分值写es
if (!isFilterSuggestKeyword(suggestKeywordInfo)) {
......@@ -242,10 +244,12 @@ public class SuggestTask {
}
// 保存到es
// saveSuggestKeywordToEs(suggestKeywordInfoList);
saveSuggestKeywordToEs(suggestKeywordInfoList);
// for test
saveSuggestKeywordToFile(suggestKeywordInfoList);
if ("true".equalsIgnoreCase(System.getProperty("suggest.saveToFile"))) {
// for test
saveSuggestKeywordToFile(suggestKeywordInfoList);
}
}
/**
......@@ -305,6 +309,28 @@ public class SuggestTask {
private static void saveSuggestKeywordToEs(List<EsSuggestKeywordInfo> suggestKeywordInfoList) {
if (CollectionUtils.isNotEmpty(suggestKeywordInfoList)) {
String esUrl = ConfigUtil.getString("suggestTask.es.url");
String esUser = ConfigUtil.getString("suggestTask.es.user");
String esPassword = ConfigUtil.getString("suggestTask.es.password");
String esIndex = ConfigUtil.getString("suggestTask.es.index");
String esType = ConfigUtil.getString("suggestTask.es.type");
int esBatchSize = ConfigUtil.getInt("suggestTask.es.batchSize", 2000);
EsClient esClient = EsClient.buildEsClient(esUrl, esUser, esPassword);
List<List<EsSuggestKeywordInfo>> subLists = CollectionUtils.splitList(suggestKeywordInfoList, esBatchSize);
for (List<EsSuggestKeywordInfo> subList : subLists) {
List<EsObject> esList = new ArrayList<>();
for (EsSuggestKeywordInfo esSuggestKeywordInfo : subList) {
esList.add(new EsObject(StringUtils.md5(esSuggestKeywordInfo.getKeyword()), esSuggestKeywordInfo));
}
try {
esClient.batch(esIndex, esType, esList);
} catch (Exception e) {
log.error("saveSuggestKeywordToEs error", e);
FeiShuUtil.sendMessage("suggest-task save to es 异常", "suggest-task save to es 异常");
}
}
}
}
......@@ -349,8 +375,10 @@ public class SuggestTask {
return true;
}
// 过滤掉太长的词
if (suggestKeywordInfo.getKeyword().length() <= 1 || suggestKeywordInfo.getKeyword().length() > 50) {
// 过滤掉太短、太长的词
if (StringUtils.isBlank(suggestKeywordInfo.getKeyword())
|| suggestKeywordInfo.getKeyword().length() <= 1
|| StringUtils.getByteLength(suggestKeywordInfo.getKeyword()) > 50) {
return true;
}
......@@ -414,10 +442,10 @@ public class SuggestTask {
private static void processEsSuggestKeywordInfo(EsSuggestKeywordInfo suggestKeywordInfo, SearchKeywordInfo searchKeywordInfo) {
// 年点击加购率
suggestKeywordInfo.setYearClickRatio(CalculateUtils.calculateRatio(suggestKeywordInfo.getYearClickCount(), suggestKeywordInfo.getYearCount()));
suggestKeywordInfo.setYearCartRatio(CalculateUtils.calculateRatio(suggestKeywordInfo.getYearCartCount(), suggestKeywordInfo.getYearCount()).floatValue());
suggestKeywordInfo.setYearCartRatio(CalculateUtils.calculateRatio(suggestKeywordInfo.getYearCartCount(), suggestKeywordInfo.getYearCount()));
// 周点击加购率
suggestKeywordInfo.setWeekClickRatio(CalculateUtils.calculateRatio(suggestKeywordInfo.getWeekClickCount().intValue(), suggestKeywordInfo.getWeekCount().intValue()).floatValue());
suggestKeywordInfo.setWeekClickRatio(CalculateUtils.calculateRatio(suggestKeywordInfo.getWeekClickCount().intValue(), suggestKeywordInfo.getWeekCount().intValue()));
suggestKeywordInfo.setWeekCartRatio(CalculateUtils.calculateRatio(suggestKeywordInfo.getWeekCartCount().intValue(), suggestKeywordInfo.getWeekCount().intValue()));
......@@ -443,11 +471,11 @@ public class SuggestTask {
}
private static void calculateWordRank(EsSuggestKeywordInfo suggestKeywordInfo) {
public static Double calculateWordRank(EsSuggestKeywordInfo suggestKeywordInfo) {
Double wordRank = 10000.0;
// 长度因子
wordRank += 3000 * CalculateUtils.calculateLengthFactor(suggestKeywordInfo.getKeyword().length());
wordRank += 3000 * CalculateUtils.calculateLengthFactor(StringUtils.getByteLength(suggestKeywordInfo.getKeyword()));
// 年数量因子
wordRank += 2000 * CalculateUtils.calculateCountFactor(suggestKeywordInfo.getYearCount(), 1);
// 周数量因子
......@@ -475,9 +503,10 @@ public class SuggestTask {
}
suggestKeywordInfo.setWordRank(wordRank);
return wordRank;
}
private static void calculateWordABRank(EsSuggestKeywordInfo suggestKeywordInfo, SearchKeywordInfo searchKeywordInfo) {
public static Double calculateWordABRank(EsSuggestKeywordInfo suggestKeywordInfo, SearchKeywordInfo searchKeywordInfo) {
// 月点击加购率
Double monthClickRatio = CalculateUtils.calculateRatio(searchKeywordInfo.getMonthProductClickUv().intValue(), searchKeywordInfo.getMonthUv().intValue());
......@@ -510,7 +539,7 @@ public class SuggestTask {
Double wordABRank = 10000.0;
// 长度因子
wordABRank += 3000 * CalculateUtils.calculateLengthFactor(suggestKeywordInfo.getKeyword().length());
wordABRank += 3000 * CalculateUtils.calculateLengthFactor(StringUtils.getByteLength(suggestKeywordInfo.getKeyword()));
// 月数量因子
wordABRank += 2000 * CalculateUtils.calculateCountFactor(searchKeywordInfo.getMonthUv().intValue(), 4);
......@@ -542,7 +571,8 @@ public class SuggestTask {
if (suggestKeywordInfo.getIsManual() && suggestKeywordInfo.getManualValue() > 0) {
wordABRank *= Math.sqrt(suggestKeywordInfo.getManualValue() * 1.0);
}
suggestKeywordInfo.setWordABRank(wordABRank.floatValue());
suggestKeywordInfo.setWordABRank(wordABRank);
return wordABRank;
}
......@@ -560,7 +590,7 @@ public class SuggestTask {
return;
}
// 新词加分大小 类似于 人工干预值
suggestKeywordInfo.setWordABRank(new Double(suggestKeywordInfo.getWordABRank() * Math.sqrt(5.0)).floatValue());
suggestKeywordInfo.setWordABRank(new Double(suggestKeywordInfo.getWordABRank() * Math.sqrt(5.0)));
}
......@@ -585,6 +615,7 @@ public class SuggestTask {
@Override
public void run() {
List<SearchKeywordInfo> searchKeywordInfoList = DwDataSource.querySearchKeywordInfoList(startId, endId);
log.info("start process startId:{}, endId:{}, count:{}", startId, endId, searchKeywordInfoList.size());
if (CollectionUtils.isNotEmpty(searchKeywordInfoList)) {
processSearchKeyword(this.esSuggestKeywordMap, searchKeywordInfoList, startTime);
}
......
package com.secoo.so.suggest.util;
import com.alibaba.fastjson.JSON;
import com.secoo.so.suggest.config.ConfigUtil;
import org.apache.http.HttpStatus;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
......@@ -31,6 +32,9 @@ public class FeiShuUtil {
private static ExecutorService executor = new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<>(1024), Executors.defaultThreadFactory(), new ThreadPoolExecutor.DiscardPolicy());
public static void sendMessage(String title, String message) {
sendMessage(title, message, null);
}
public static void sendMessage(String title, String message, List<String> phones) {
if (StringUtils.isBlank(message)) {
......@@ -44,6 +48,10 @@ public class FeiShuUtil {
params.put("body", Collections.singletonList(message));
params.put("phones", phones);
if(CollectionUtils.isEmpty(phones)){
phones = StringUtils.splitToList(ConfigUtil.getString("suggestTask.warningPhones"), ",");
}
final String fTitle = title;
final String fMessage = message;
final String fPhones = StringUtils.join(phones, ",");
......
......@@ -1466,6 +1466,41 @@ public final class ObjectUtils {
return true;
}
/**
* 集合Map转具体对象集合
*
* @param sourceList
* @param clazz
* @param <T>
* @return
*/
public static <T> List<T> listToObjects(List<Map<String, Object>> sourceList, Class<T> clazz) {
if (CollectionUtils.isEmpty(sourceList)) {
return new ArrayList<>();
}
try {
String json = JSON.toJSONString(sourceList);
return JSON.parseArray(json, clazz);
} catch (Exception e) {
logger.error(e.getMessage(), e);
return new ArrayList<>();
}
}
public static <T> T mapToObjectWithJSON(Map<String, Object> sourceMap, Class<T> calzz) {
if (CollectionUtils.isEmpty(sourceMap)) {
return null;
}
try {
String json = JSON.toJSONString(sourceMap);
return JSON.parseObject(json, calzz);
} catch (Exception e) {
logger.error(e.getMessage(), e);
return null;
}
}
public static void main(String[] args) {
/*String json = "{\"name\":\"TMev344\",\"cluster_name\":\"elasticsearch\",\"cluster_uuid\":\"cRwLaMCuRkmZvpnYrcQ6mQ\",\"version\":{\"number\":\"6.5.0\",\"build_flavor\":\"default\",\"build_type\":\"zip\",\"build_hash\":\"816e6f6\",\"build_date\":\"2018-11-09T18:58:36.352602Z\",\"build_snapshot\":false,\"lucene_version\":\"7.5.0\",\"minimum_wire_compatibility_version\":\"5.6.0\",\"minimum_index_compatibility_version\":\"5.0.0\"},\"tagline\":\"You Know, for Search\"}";
Map<String, String> paramMap = parseJsonStrToAbsolutePathKeyMap(json);
......
......@@ -8,6 +8,8 @@ import java.io.UnsupportedEncodingException;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
......@@ -2035,4 +2037,68 @@ public abstract class StringUtils {
return false;
}
/**
* 计算中英文字符串的字节长度 <br/>
* 一个中文占3个字节
*
* @param str
* @return int 字符串的字节长度
*/
public static int getByteLength(String str) {
return getByteLength(str, "UTF-8");
}
/**
* 计算中英文字符串的字节长度 <br/>
* 一个中文占3个字节
*
* @param str
* @return int 字符串的字节长度
*/
public static int getByteLength(String str, String charset) {
if (str == null || str.length() == 0) {
return 0;
}
try {
return str.getBytes(charset).length;
} catch (UnsupportedEncodingException e) {
System.out.println("计算中英文字符串的字节长度失败,");
e.printStackTrace();
}
return 0;
}
/**
* 32位md5加密
*/
public static String md5(String str) {
if (str == null) {
return null;
}
String result = "";
try {
MessageDigest md5 = MessageDigest.getInstance("MD5");
md5.update(str.getBytes("UTF-8"));
byte b[] = md5.digest();
StringBuffer buf = new StringBuffer("");
int i = 0;
for (int offset = 0; offset < b.length; offset++) {
i = b[offset];
if (i < 0) {
i += 256;
}
if (i < 16) {
buf.append("0");
}
buf.append(Integer.toHexString(i));
}
result = buf.toString();
} catch (NoSuchAlgorithmException | UnsupportedEncodingException e) {
System.out.println("encode md5 error");
e.printStackTrace();
}
return result;
}
}
......@@ -4,7 +4,13 @@ suggestTask.ManualFolder=/data/pssmaster/corpus_set/suggest_corpus/manual
suggestTask.SensitiveFolder=/data/pssmaster/corpus_set/suggest_corpus/sensitive
suggestTask.EuropeWordFolder=/data/pssmaster/corpus_set/suggest_corpus/europe_word
suggestTask.batchSize=10000
suggestTask.threadPoolSize=20
suggestTask.threadPoolSize=10
suggestTask.searchWordWarningCount=1000000
suggestTask.suggestTagMaxSize=5
suggestTask.warningPhones=13426233960
suggestTask.es.url=http://bigdataescluster.secoolocal.com:9200
suggestTask.es.user=search
suggestTask.es.password=search5z0NvEn1D
suggestTask.es.index=search_suggest_index
suggestTask.es.type=search_suggest_type
suggestTask.es.batchSize=2000
......@@ -6,4 +6,10 @@ suggestTask.EuropeWordFolder=/data/pssmaster/corpus_set/suggest_corpus/europe_wo
suggestTask.batchSize=10000
suggestTask.threadPoolSize=10
suggestTask.suggestTagMaxSize=5
suggestTask.searchWordWarningCount=1000000
\ No newline at end of file
suggestTask.searchWordWarningCount=1000000
suggestTask.es.url=http://10.0.254.139:9200
suggestTask.es.user=suggest
suggestTask.es.password=suggest456
suggestTask.es.index=search_suggest_index
suggestTask.es.type=search_suggest_type
suggestTask.es.batchSize=2000
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment