Commit ccce4f93 by 王明范

æuse sqp

parent d835274b
...@@ -36,7 +36,41 @@ ...@@ -36,7 +36,41 @@
</profiles> </profiles>
<dependencies> <dependencies>
<dependency>
<groupId>com.secoo</groupId>
<artifactId>sqp4j-client</artifactId>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<artifactId>secoo-log</artifactId>
<groupId>com.secoo.mall</groupId>
</exclusion>
</exclusions>
<version>2.9.6.RELEASE</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>dubbo</artifactId>
<version>2.6.0</version>
<exclusions>
<exclusion>
<artifactId>spring-context</artifactId>
<groupId>org.springframework</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.github.sgroschupf</groupId>
<artifactId>zkclient</artifactId>
<version>0.1</version>
</dependency>
<dependency> <dependency>
<groupId>mysql</groupId> <groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId> <artifactId>mysql-connector-java</artifactId>
......
package com.secoo.so.suggest.client;
import com.alibaba.dubbo.config.ApplicationConfig;
import com.alibaba.dubbo.config.ReferenceConfig;
import com.alibaba.dubbo.config.RegistryConfig;
import com.secoo.search.sqp4j.QueryPlan;
import org.apache.log4j.Logger;
import java.util.Map;
/**
* @author wangmingfan
* @date 2020/8/17
* @description sqp dubbo client
*/
public class SqpDubboClient {
private static final Logger loger = Logger.getLogger(SqpDubboClient.class);
private static ReferenceConfig<QueryPlan> dubboSqpReferenceConfigProd = null;
private static ReferenceConfig<QueryPlan> dubboSqpReferenceConfigUat = null;
private static ReferenceConfig<QueryPlan> dubboSqpReferenceConfigTest = null;
public static QueryPlan getDirectImpl(String url, Map<String, String> map){
ReferenceConfig<QueryPlan> impl = directDubboSqpReferenceConfig(url);
QueryPlan dubboSqp = impl.get();
map.put("Client", impl.getClient());
map.put("Interface", impl.getInterface());
map.put("Protocol", impl.getProtocol());
map.put("Url", impl.getUrl());
map.put("Cluster", impl.getCluster());
return dubboSqp;
}
public static QueryPlan getTestImpl(){
ReferenceConfig<QueryPlan> impl = getTestDubboSqpReferenceConfig();
//impl.setGroup(""); //测试test环境未设置group
QueryPlan dubboSqp = impl.get();
return dubboSqp;
}
public static QueryPlan getUatImpl() {
ReferenceConfig<QueryPlan> impl = getDubboSqpReferenceConfigUat();
QueryPlan dubboSqp = impl.get();
return dubboSqp;
}
public static QueryPlan getProdImpl() {
ReferenceConfig<QueryPlan> impl = getDubboSqpReferenceConfigProd();
QueryPlan dubboSqp = impl.get();
return dubboSqp;
}
private static ReferenceConfig<QueryPlan> getDubboSqpReferenceConfigProd() {
if (dubboSqpReferenceConfigProd == null) {
dubboSqpReferenceConfigProd = buildDubboSqpReferenceConfig();
}
return dubboSqpReferenceConfigProd;
}
private static ReferenceConfig<QueryPlan> getDubboSqpReferenceConfigUat() {
if (dubboSqpReferenceConfigUat == null) {
dubboSqpReferenceConfigUat = buildDubboSqpReferenceConfig();
dubboSqpReferenceConfigUat.setGroup("grey");
}
return dubboSqpReferenceConfigUat;
}
private static ReferenceConfig<QueryPlan> buildDubboSqpReferenceConfig() {
ReferenceConfig<QueryPlan> impl = new ReferenceConfig<QueryPlan>();
impl.setProtocol("dubbo");
impl.setApplication(new ApplicationConfig("sem-test-tool"));
RegistryConfig registryConfig = new RegistryConfig("zookeeper://zk-mall1.secoolocal.com:5181?backup=zk-mall2.secoolocal.com:5181,zk-mall3.secoolocal.com:5181");
registryConfig.setProtocol("zookeeper");
registryConfig.setClient("zkclient");
impl.setRegistry(registryConfig);
// impl.setMonitor(new MonitorConfig() { { setProtocol("registry"); } });
impl.setVersion("1.0.0");
impl.setInterface("com.secoo.search.sqp4j.QueryPlan");
return impl;
}
/**
* 连接测试环境dubbo
* @return com.alibaba.dubbo.config.ReferenceConfig<com.secoo.search.sqp4j.QueryPlan>
* @author wangmingfan
* @date 2020/8/17
*/
private static ReferenceConfig<QueryPlan> getTestDubboSqpReferenceConfig() {
if (dubboSqpReferenceConfigTest == null) {
ReferenceConfig<QueryPlan> impl = new ReferenceConfig<QueryPlan>();
impl.setProtocol("dubbo");
impl.setApplication(new ApplicationConfig("sem-test-tool"));
RegistryConfig registryConfig = new RegistryConfig("zookeeper://10.185.240.81:2181?backup=10.185.240.82:2181,10.185.240.83:2181");
registryConfig.setProtocol("zookeeper");
registryConfig.setClient("zkclient");
impl.setRegistry(registryConfig);
// impl.setMonitor(new MonitorConfig() { { setProtocol("registry"); } });
impl.setVersion("1.0.0");
impl.setInterface("com.secoo.search.sqp4j.QueryPlan");
dubboSqpReferenceConfigTest = impl;
}
return dubboSqpReferenceConfigTest;
}
/**
* 直连dubbo服务
* @param Url url格式:ip:端口号
* @return com.alibaba.dubbo.config.ReferenceConfig<com.secoo.search.sqp4j.QueryPlan>
* @author wangmingfan
* @date 2020/4/6
*/
private static ReferenceConfig<QueryPlan> directDubboSqpReferenceConfig(String Url) {
ReferenceConfig<QueryPlan> impl = new ReferenceConfig<QueryPlan>();
impl.setProtocol("dubbo");
impl.setApplication(new ApplicationConfig("sem-test-tool"));
impl.setUrl(Url); //"10.185.240.158:20062"
//impl.setMonitor(new MonitorConfig() { { setProtocol("registry"); } });
impl.setVersion("1.0.0");
impl.setInterface("com.secoo.search.sqp4j.QueryPlan");
return impl;
}
}
package com.secoo.so.suggest.helper;
import com.secoo.abtest.common.Buckets;
import com.secoo.search.sqp4j.Explanation;
import com.secoo.search.sqp4j.Explanations;
import com.secoo.search.sqp4j.QueryPlan;
import com.secoo.search.sqp4j.QueryWord;
import com.secoo.search.sqp4j.client.QueryPlanClient;
import com.secoo.so.suggest.client.SqpDubboClient;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
/**
* @author wangmingfan
* @date 2022/8/4
* @description
*/
public class QueryPlanHelper {
private static final Logger LOG = LoggerFactory.getLogger(QueryPlanHelper.class);
private static volatile QueryPlanHelper instance;
private static QueryPlan client = null;
Map<String, Explanation> sqpCache = new HashMap<>();
List<String> wordList = new ArrayList<>();
private QueryPlanHelper() {
client = SqpDubboClient.getProdImpl();
}
public static QueryPlanHelper getInstance() {
if (instance == null) {
synchronized (QueryPlanHelper.class) {
if (instance == null) {
instance = new QueryPlanHelper();
}
}
}
return instance;
}
public Explanation explain(String keyword) {
if (StringUtils.isNotBlank(keyword)) {
if (sqpCache.containsKey(keyword)) {
return sqpCache.get(keyword);
}
String traceId = UUID.randomUUID().toString();
Map<String, String> bucketInfo = new HashMap<>();
Buckets bucket = new Buckets(bucketInfo);
String cityCode = "";
long currDate = 0L;
int needSpell = 0;
Explanations explanations = client.explain(traceId, bucket, cityCode, currDate, needSpell, keyword, null);
if (explanations != null && explanations.getItems().size() > 0 && explanations.getItems().get(0) != null) {
Explanation explanation = explanations.getItems().get(0);
cacheKeyword(keyword, explanation);
return explanation;
}
}
return null;
}
private void cacheKeyword(String keyword, Explanation explanation) {
sqpCache.put(keyword, explanation);
wordList.add(keyword);
if (sqpCache.size() > 100000) {
String removeWord = wordList.get(0);
wordList.remove(0);
sqpCache.remove(removeWord);
}
}
public static void main(String[] arg) {
Map<String, String> cache = new HashMap<>();
List<String> list = new ArrayList<>();
for(int i=0; i< 7;i++) {
cache.put("key_" + i, String.valueOf(i));
list.add("key_" + i);
if (cache.size()>5) {
String rk = list.get(0);
list.remove(0);
cache.remove(rk);
}
System.out.println("list size:"+list.size()+";map size:" + cache.size());
}
}
}
...@@ -2,6 +2,9 @@ package com.secoo.so.suggest.task; ...@@ -2,6 +2,9 @@ package com.secoo.so.suggest.task;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.secoo.search.sqp4j.Explanation;
import com.secoo.search.sqp4j.QueryPlan;
import com.secoo.search.sqp4j.QueryWord;
import com.secoo.so.suggest.config.ConfigUtil; import com.secoo.so.suggest.config.ConfigUtil;
import com.secoo.so.suggest.db.DwDataSource; import com.secoo.so.suggest.db.DwDataSource;
import com.secoo.so.suggest.db.ErpDataSource; import com.secoo.so.suggest.db.ErpDataSource;
...@@ -11,10 +14,12 @@ import com.secoo.so.suggest.entity.EsSuggestKeywordInfo; ...@@ -11,10 +14,12 @@ import com.secoo.so.suggest.entity.EsSuggestKeywordInfo;
import com.secoo.so.suggest.entity.SearchKeywordInfo; import com.secoo.so.suggest.entity.SearchKeywordInfo;
import com.secoo.so.suggest.es.EsClient; import com.secoo.so.suggest.es.EsClient;
import com.secoo.so.suggest.es.EsObject; import com.secoo.so.suggest.es.EsObject;
import com.secoo.so.suggest.helper.QueryPlanHelper;
import com.secoo.so.suggest.util.*; import com.secoo.so.suggest.util.*;
import lombok.Data; import lombok.Data;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import javax.management.Query;
import java.io.File; import java.io.File;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
...@@ -63,7 +68,7 @@ public class SuggestTask { ...@@ -63,7 +68,7 @@ public class SuggestTask {
europeWordMap = loadEuropeWordMap(); europeWordMap = loadEuropeWordMap();
// 加载表填同义词 // 加载表填同义词
loadTagSynonym(); synonymList = loadTagSynonym();
// 加载搜索词并处理 // 加载搜索词并处理
processSuggestTask(startTime); processSuggestTask(startTime);
...@@ -189,14 +194,13 @@ public class SuggestTask { ...@@ -189,14 +194,13 @@ public class SuggestTask {
return prefixFilterList; return prefixFilterList;
} }
private static void loadTagSynonym(){ private static List<Set<String>> loadTagSynonym(){
List<Set<String>> synList = new ArrayList<>();
Set<String> maleWords = new HashSet<>(Arrays.asList("男性","男式","男士","男款","男")); Set<String> maleWords = new HashSet<>(Arrays.asList("男性","男式","男士","男款","男"));
Set<String> femaleWords = new HashSet<>(Arrays.asList("女性","女式","女士","女款","女")); Set<String> femaleWords = new HashSet<>(Arrays.asList("女性","女式","女士","女款","女"));
if (synonymList == null) { synList.add(maleWords);
synonymList = new ArrayList<>(); synList.add(femaleWords);
} return synList;
synonymList.add(maleWords);
synonymList.add(femaleWords);
} }
private static String cleanKeyword(String keyword) { private static String cleanKeyword(String keyword) {
...@@ -441,6 +445,32 @@ public class SuggestTask { ...@@ -441,6 +445,32 @@ public class SuggestTask {
return true; return true;
} }
if (StringUtils.isNotBlank(rightWord)) {
log.info("check word:" + word + " and " + fullWord);
QueryPlanHelper sqp = QueryPlanHelper.getInstance();
Explanation explan1 = sqp.explain(word);
Explanation explan2 = sqp.explain(rightWord);
Explanation explan3 = sqp.explain(fullWord);
if (explan1 != null && explan2 != null && explan3 != null) {
List<QueryWord> queryWords1 = explan1.getQueryWords();
List<QueryWord> queryWords2 = explan2.getQueryWords();
List<QueryWord> queryWords3 = explan3.getQueryWords();
log.info("queryWords1 size:"+queryWords1.size()+"; queryWords2 size:"+queryWords2.size()+"; queryWords3 size:" + queryWords3.size());
if (queryWords1 != null && queryWords2 != null && queryWords3 != null) {
if (queryWords1.size() + queryWords2.size() > queryWords3.size()) {
return true;
}
} else {
return true;
}
} else {
return true;
}
} else {
return true;
}
return false; return false;
} }
......
...@@ -5,11 +5,12 @@ suggestTask.SensitiveFolder=/data/pssmaster/corpus_set/suggest_corpus/sensitive ...@@ -5,11 +5,12 @@ suggestTask.SensitiveFolder=/data/pssmaster/corpus_set/suggest_corpus/sensitive
suggestTask.EuropeWordFolder=/data/pssmaster/corpus_set/suggest_corpus/europe_word suggestTask.EuropeWordFolder=/data/pssmaster/corpus_set/suggest_corpus/europe_word
suggestTask.batchSize=10000 suggestTask.batchSize=10000
suggestTask.threadPoolSize=10 suggestTask.threadPoolSize=10
suggestTask.suggestTagMaxSize=5
suggestTask.searchWordWarningCount=1000000 suggestTask.searchWordWarningCount=1000000
suggestTask.es.url=http://10.0.254.139:9200 suggestTask.suggestTagMaxSize=5
suggestTask.es.user=suggest suggestTask.warningPhones=13426233960
suggestTask.es.password=suggest456 suggestTask.es.url=http://bigdataescluster.secoolocal.com:9200
suggestTask.es.index=search_suggest_index suggestTask.es.user=search
suggestTask.es.password=search5z0NvEn1D
suggestTask.es.index=search_suggest_index_huidu
suggestTask.es.type=search_suggest_type suggestTask.es.type=search_suggest_type
suggestTask.es.batchSize=2000 suggestTask.es.batchSize=2000
\ No newline at end of file
erp.read.url=jdbc:mysql://10.4.3.223:3306/secooErpDB?useUnicode=true&amp;characterEncoding=utf8&amp;noAccessToProcedureBodies=true&amp;zeroDateTimeBehavior=convertToNull&amp;allowMultiQueries=true erp.read.url=jdbc:mysql://192.168.50.40:3306/secooErpDB?useUnicode=true&amp;characterEncoding=utf8&amp;noAccessToProcedureBodies=true&amp;zeroDateTimeBehavior=convertToNull&amp;allowMultiQueries=true
erp.read.user=3306_test erp.read.user=so_Erp_R
erp.read.password=iS6CXpYqgZ8Mhjui erp.read.password=5RgzudyyFlApTmve
seo.read.url=jdbc:mysql://10.4.3.223:3306/secooSeoDB?useUnicode=true&amp;characterEncoding=utf8&amp;zeroDateTimeBehavior=convertToNull seo.read.url=jdbc:mysql://secooSeoDB.master.com:3307/secooSeoDB?useUnicode=true&amp;characterEncoding=utf8&amp;zeroDateTimeBehavior=convertToNull
seo.read.user=SeoDB_test seo.read.user=sem_Seo_W
seo.read.password=Cxkfq57huej0fTpK seo.read.password=C2IiHfNKYpT1onsR
\ No newline at end of file
dw.read.url=jdbc:mysql://secooDataWarehouse.slave.com:3306/secooDataWarehouse?useUnicode=true&amp;characterEncoding=utf8&amp;zeroDateTimeBehavior=convertToNull
dw.read.user=Search_DataWar_R
dw.read.password=pY1P9zUj9x1M65ot5szo
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment