Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
suggest-task
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
suggest-task
Commits
ccce4f93
Commit
ccce4f93
authored
Aug 04, 2022
by
王明范
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
æuse sqp
parent
d835274b
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
302 additions
and
20 deletions
+302
-20
pom.xml
suggest-task/pom.xml
+35
-1
SqpDubboClient.java
suggest-task/src/main/java/com/secoo/so/suggest/client/SqpDubboClient.java
+122
-0
QueryPlanHelper.java
suggest-task/src/main/java/com/secoo/so/suggest/helper/QueryPlanHelper.java
+90
-0
SuggestTask.java
suggest-task/src/main/java/com/secoo/so/suggest/task/SuggestTask.java
+37
-7
config.properties
suggest-task/src/main/profiles/test/config.properties
+7
-5
db.properties
suggest-task/src/main/profiles/test/db.properties
+11
-7
No files found.
suggest-task/pom.xml
View file @
ccce4f93
...
@@ -36,7 +36,41 @@
...
@@ -36,7 +36,41 @@
</profiles>
</profiles>
<dependencies>
<dependencies>
<dependency>
<groupId>
com.secoo
</groupId>
<artifactId>
sqp4j-client
</artifactId>
<exclusions>
<exclusion>
<groupId>
org.slf4j
</groupId>
<artifactId>
slf4j-log4j12
</artifactId>
</exclusion>
<exclusion>
<groupId>
log4j
</groupId>
<artifactId>
log4j
</artifactId>
</exclusion>
<exclusion>
<artifactId>
secoo-log
</artifactId>
<groupId>
com.secoo.mall
</groupId>
</exclusion>
</exclusions>
<version>
2.9.6.RELEASE
</version>
</dependency>
<dependency>
<groupId>
com.alibaba
</groupId>
<artifactId>
dubbo
</artifactId>
<version>
2.6.0
</version>
<exclusions>
<exclusion>
<artifactId>
spring-context
</artifactId>
<groupId>
org.springframework
</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>
com.github.sgroschupf
</groupId>
<artifactId>
zkclient
</artifactId>
<version>
0.1
</version>
</dependency>
<dependency>
<dependency>
<groupId>
mysql
</groupId>
<groupId>
mysql
</groupId>
<artifactId>
mysql-connector-java
</artifactId>
<artifactId>
mysql-connector-java
</artifactId>
...
...
suggest-task/src/main/java/com/secoo/so/suggest/client/SqpDubboClient.java
0 → 100644
View file @
ccce4f93
package
com
.
secoo
.
so
.
suggest
.
client
;
import
com.alibaba.dubbo.config.ApplicationConfig
;
import
com.alibaba.dubbo.config.ReferenceConfig
;
import
com.alibaba.dubbo.config.RegistryConfig
;
import
com.secoo.search.sqp4j.QueryPlan
;
import
org.apache.log4j.Logger
;
import
java.util.Map
;
/**
* @author wangmingfan
* @date 2020/8/17
* @description sqp dubbo client
*/
public
class
SqpDubboClient
{
private
static
final
Logger
loger
=
Logger
.
getLogger
(
SqpDubboClient
.
class
);
private
static
ReferenceConfig
<
QueryPlan
>
dubboSqpReferenceConfigProd
=
null
;
private
static
ReferenceConfig
<
QueryPlan
>
dubboSqpReferenceConfigUat
=
null
;
private
static
ReferenceConfig
<
QueryPlan
>
dubboSqpReferenceConfigTest
=
null
;
public
static
QueryPlan
getDirectImpl
(
String
url
,
Map
<
String
,
String
>
map
){
ReferenceConfig
<
QueryPlan
>
impl
=
directDubboSqpReferenceConfig
(
url
);
QueryPlan
dubboSqp
=
impl
.
get
();
map
.
put
(
"Client"
,
impl
.
getClient
());
map
.
put
(
"Interface"
,
impl
.
getInterface
());
map
.
put
(
"Protocol"
,
impl
.
getProtocol
());
map
.
put
(
"Url"
,
impl
.
getUrl
());
map
.
put
(
"Cluster"
,
impl
.
getCluster
());
return
dubboSqp
;
}
public
static
QueryPlan
getTestImpl
(){
ReferenceConfig
<
QueryPlan
>
impl
=
getTestDubboSqpReferenceConfig
();
//impl.setGroup(""); //测试test环境未设置group
QueryPlan
dubboSqp
=
impl
.
get
();
return
dubboSqp
;
}
public
static
QueryPlan
getUatImpl
()
{
ReferenceConfig
<
QueryPlan
>
impl
=
getDubboSqpReferenceConfigUat
();
QueryPlan
dubboSqp
=
impl
.
get
();
return
dubboSqp
;
}
public
static
QueryPlan
getProdImpl
()
{
ReferenceConfig
<
QueryPlan
>
impl
=
getDubboSqpReferenceConfigProd
();
QueryPlan
dubboSqp
=
impl
.
get
();
return
dubboSqp
;
}
private
static
ReferenceConfig
<
QueryPlan
>
getDubboSqpReferenceConfigProd
()
{
if
(
dubboSqpReferenceConfigProd
==
null
)
{
dubboSqpReferenceConfigProd
=
buildDubboSqpReferenceConfig
();
}
return
dubboSqpReferenceConfigProd
;
}
private
static
ReferenceConfig
<
QueryPlan
>
getDubboSqpReferenceConfigUat
()
{
if
(
dubboSqpReferenceConfigUat
==
null
)
{
dubboSqpReferenceConfigUat
=
buildDubboSqpReferenceConfig
();
dubboSqpReferenceConfigUat
.
setGroup
(
"grey"
);
}
return
dubboSqpReferenceConfigUat
;
}
private
static
ReferenceConfig
<
QueryPlan
>
buildDubboSqpReferenceConfig
()
{
ReferenceConfig
<
QueryPlan
>
impl
=
new
ReferenceConfig
<
QueryPlan
>();
impl
.
setProtocol
(
"dubbo"
);
impl
.
setApplication
(
new
ApplicationConfig
(
"sem-test-tool"
));
RegistryConfig
registryConfig
=
new
RegistryConfig
(
"zookeeper://zk-mall1.secoolocal.com:5181?backup=zk-mall2.secoolocal.com:5181,zk-mall3.secoolocal.com:5181"
);
registryConfig
.
setProtocol
(
"zookeeper"
);
registryConfig
.
setClient
(
"zkclient"
);
impl
.
setRegistry
(
registryConfig
);
// impl.setMonitor(new MonitorConfig() { { setProtocol("registry"); } });
impl
.
setVersion
(
"1.0.0"
);
impl
.
setInterface
(
"com.secoo.search.sqp4j.QueryPlan"
);
return
impl
;
}
/**
* 连接测试环境dubbo
* @return com.alibaba.dubbo.config.ReferenceConfig<com.secoo.search.sqp4j.QueryPlan>
* @author wangmingfan
* @date 2020/8/17
*/
private
static
ReferenceConfig
<
QueryPlan
>
getTestDubboSqpReferenceConfig
()
{
if
(
dubboSqpReferenceConfigTest
==
null
)
{
ReferenceConfig
<
QueryPlan
>
impl
=
new
ReferenceConfig
<
QueryPlan
>();
impl
.
setProtocol
(
"dubbo"
);
impl
.
setApplication
(
new
ApplicationConfig
(
"sem-test-tool"
));
RegistryConfig
registryConfig
=
new
RegistryConfig
(
"zookeeper://10.185.240.81:2181?backup=10.185.240.82:2181,10.185.240.83:2181"
);
registryConfig
.
setProtocol
(
"zookeeper"
);
registryConfig
.
setClient
(
"zkclient"
);
impl
.
setRegistry
(
registryConfig
);
// impl.setMonitor(new MonitorConfig() { { setProtocol("registry"); } });
impl
.
setVersion
(
"1.0.0"
);
impl
.
setInterface
(
"com.secoo.search.sqp4j.QueryPlan"
);
dubboSqpReferenceConfigTest
=
impl
;
}
return
dubboSqpReferenceConfigTest
;
}
/**
* 直连dubbo服务
* @param Url url格式:ip:端口号
* @return com.alibaba.dubbo.config.ReferenceConfig<com.secoo.search.sqp4j.QueryPlan>
* @author wangmingfan
* @date 2020/4/6
*/
private
static
ReferenceConfig
<
QueryPlan
>
directDubboSqpReferenceConfig
(
String
Url
)
{
ReferenceConfig
<
QueryPlan
>
impl
=
new
ReferenceConfig
<
QueryPlan
>();
impl
.
setProtocol
(
"dubbo"
);
impl
.
setApplication
(
new
ApplicationConfig
(
"sem-test-tool"
));
impl
.
setUrl
(
Url
);
//"10.185.240.158:20062"
//impl.setMonitor(new MonitorConfig() { { setProtocol("registry"); } });
impl
.
setVersion
(
"1.0.0"
);
impl
.
setInterface
(
"com.secoo.search.sqp4j.QueryPlan"
);
return
impl
;
}
}
suggest-task/src/main/java/com/secoo/so/suggest/helper/QueryPlanHelper.java
0 → 100644
View file @
ccce4f93
package
com
.
secoo
.
so
.
suggest
.
helper
;
import
com.secoo.abtest.common.Buckets
;
import
com.secoo.search.sqp4j.Explanation
;
import
com.secoo.search.sqp4j.Explanations
;
import
com.secoo.search.sqp4j.QueryPlan
;
import
com.secoo.search.sqp4j.QueryWord
;
import
com.secoo.search.sqp4j.client.QueryPlanClient
;
import
com.secoo.so.suggest.client.SqpDubboClient
;
import
org.apache.commons.lang3.StringUtils
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
java.util.*
;
/**
* @author wangmingfan
* @date 2022/8/4
* @description
*/
public
class
QueryPlanHelper
{
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
QueryPlanHelper
.
class
);
private
static
volatile
QueryPlanHelper
instance
;
private
static
QueryPlan
client
=
null
;
Map
<
String
,
Explanation
>
sqpCache
=
new
HashMap
<>();
List
<
String
>
wordList
=
new
ArrayList
<>();
private
QueryPlanHelper
()
{
client
=
SqpDubboClient
.
getProdImpl
();
}
public
static
QueryPlanHelper
getInstance
()
{
if
(
instance
==
null
)
{
synchronized
(
QueryPlanHelper
.
class
)
{
if
(
instance
==
null
)
{
instance
=
new
QueryPlanHelper
();
}
}
}
return
instance
;
}
public
Explanation
explain
(
String
keyword
)
{
if
(
StringUtils
.
isNotBlank
(
keyword
))
{
if
(
sqpCache
.
containsKey
(
keyword
))
{
return
sqpCache
.
get
(
keyword
);
}
String
traceId
=
UUID
.
randomUUID
().
toString
();
Map
<
String
,
String
>
bucketInfo
=
new
HashMap
<>();
Buckets
bucket
=
new
Buckets
(
bucketInfo
);
String
cityCode
=
""
;
long
currDate
=
0L
;
int
needSpell
=
0
;
Explanations
explanations
=
client
.
explain
(
traceId
,
bucket
,
cityCode
,
currDate
,
needSpell
,
keyword
,
null
);
if
(
explanations
!=
null
&&
explanations
.
getItems
().
size
()
>
0
&&
explanations
.
getItems
().
get
(
0
)
!=
null
)
{
Explanation
explanation
=
explanations
.
getItems
().
get
(
0
);
cacheKeyword
(
keyword
,
explanation
);
return
explanation
;
}
}
return
null
;
}
private
void
cacheKeyword
(
String
keyword
,
Explanation
explanation
)
{
sqpCache
.
put
(
keyword
,
explanation
);
wordList
.
add
(
keyword
);
if
(
sqpCache
.
size
()
>
100000
)
{
String
removeWord
=
wordList
.
get
(
0
);
wordList
.
remove
(
0
);
sqpCache
.
remove
(
removeWord
);
}
}
public
static
void
main
(
String
[]
arg
)
{
Map
<
String
,
String
>
cache
=
new
HashMap
<>();
List
<
String
>
list
=
new
ArrayList
<>();
for
(
int
i
=
0
;
i
<
7
;
i
++)
{
cache
.
put
(
"key_"
+
i
,
String
.
valueOf
(
i
));
list
.
add
(
"key_"
+
i
);
if
(
cache
.
size
()>
5
)
{
String
rk
=
list
.
get
(
0
);
list
.
remove
(
0
);
cache
.
remove
(
rk
);
}
System
.
out
.
println
(
"list size:"
+
list
.
size
()+
";map size:"
+
cache
.
size
());
}
}
}
suggest-task/src/main/java/com/secoo/so/suggest/task/SuggestTask.java
View file @
ccce4f93
...
@@ -2,6 +2,9 @@ package com.secoo.so.suggest.task;
...
@@ -2,6 +2,9 @@ package com.secoo.so.suggest.task;
import
com.alibaba.fastjson.JSON
;
import
com.alibaba.fastjson.JSON
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.secoo.search.sqp4j.Explanation
;
import
com.secoo.search.sqp4j.QueryPlan
;
import
com.secoo.search.sqp4j.QueryWord
;
import
com.secoo.so.suggest.config.ConfigUtil
;
import
com.secoo.so.suggest.config.ConfigUtil
;
import
com.secoo.so.suggest.db.DwDataSource
;
import
com.secoo.so.suggest.db.DwDataSource
;
import
com.secoo.so.suggest.db.ErpDataSource
;
import
com.secoo.so.suggest.db.ErpDataSource
;
...
@@ -11,10 +14,12 @@ import com.secoo.so.suggest.entity.EsSuggestKeywordInfo;
...
@@ -11,10 +14,12 @@ import com.secoo.so.suggest.entity.EsSuggestKeywordInfo;
import
com.secoo.so.suggest.entity.SearchKeywordInfo
;
import
com.secoo.so.suggest.entity.SearchKeywordInfo
;
import
com.secoo.so.suggest.es.EsClient
;
import
com.secoo.so.suggest.es.EsClient
;
import
com.secoo.so.suggest.es.EsObject
;
import
com.secoo.so.suggest.es.EsObject
;
import
com.secoo.so.suggest.helper.QueryPlanHelper
;
import
com.secoo.so.suggest.util.*
;
import
com.secoo.so.suggest.util.*
;
import
lombok.Data
;
import
lombok.Data
;
import
lombok.extern.slf4j.Slf4j
;
import
lombok.extern.slf4j.Slf4j
;
import
javax.management.Query
;
import
java.io.File
;
import
java.io.File
;
import
java.io.Serializable
;
import
java.io.Serializable
;
import
java.util.*
;
import
java.util.*
;
...
@@ -63,7 +68,7 @@ public class SuggestTask {
...
@@ -63,7 +68,7 @@ public class SuggestTask {
europeWordMap
=
loadEuropeWordMap
();
europeWordMap
=
loadEuropeWordMap
();
// 加载表填同义词
// 加载表填同义词
loadTagSynonym
();
synonymList
=
loadTagSynonym
();
// 加载搜索词并处理
// 加载搜索词并处理
processSuggestTask
(
startTime
);
processSuggestTask
(
startTime
);
...
@@ -189,14 +194,13 @@ public class SuggestTask {
...
@@ -189,14 +194,13 @@ public class SuggestTask {
return
prefixFilterList
;
return
prefixFilterList
;
}
}
private
static
void
loadTagSynonym
(){
private
static
List
<
Set
<
String
>>
loadTagSynonym
(){
List
<
Set
<
String
>>
synList
=
new
ArrayList
<>();
Set
<
String
>
maleWords
=
new
HashSet
<>(
Arrays
.
asList
(
"男性"
,
"男式"
,
"男士"
,
"男款"
,
"男"
));
Set
<
String
>
maleWords
=
new
HashSet
<>(
Arrays
.
asList
(
"男性"
,
"男式"
,
"男士"
,
"男款"
,
"男"
));
Set
<
String
>
femaleWords
=
new
HashSet
<>(
Arrays
.
asList
(
"女性"
,
"女式"
,
"女士"
,
"女款"
,
"女"
));
Set
<
String
>
femaleWords
=
new
HashSet
<>(
Arrays
.
asList
(
"女性"
,
"女式"
,
"女士"
,
"女款"
,
"女"
));
if
(
synonymList
==
null
)
{
synList
.
add
(
maleWords
);
synonymList
=
new
ArrayList
<>();
synList
.
add
(
femaleWords
);
}
return
synList
;
synonymList
.
add
(
maleWords
);
synonymList
.
add
(
femaleWords
);
}
}
private
static
String
cleanKeyword
(
String
keyword
)
{
private
static
String
cleanKeyword
(
String
keyword
)
{
...
@@ -441,6 +445,32 @@ public class SuggestTask {
...
@@ -441,6 +445,32 @@ public class SuggestTask {
return
true
;
return
true
;
}
}
if
(
StringUtils
.
isNotBlank
(
rightWord
))
{
log
.
info
(
"check word:"
+
word
+
" and "
+
fullWord
);
QueryPlanHelper
sqp
=
QueryPlanHelper
.
getInstance
();
Explanation
explan1
=
sqp
.
explain
(
word
);
Explanation
explan2
=
sqp
.
explain
(
rightWord
);
Explanation
explan3
=
sqp
.
explain
(
fullWord
);
if
(
explan1
!=
null
&&
explan2
!=
null
&&
explan3
!=
null
)
{
List
<
QueryWord
>
queryWords1
=
explan1
.
getQueryWords
();
List
<
QueryWord
>
queryWords2
=
explan2
.
getQueryWords
();
List
<
QueryWord
>
queryWords3
=
explan3
.
getQueryWords
();
log
.
info
(
"queryWords1 size:"
+
queryWords1
.
size
()+
"; queryWords2 size:"
+
queryWords2
.
size
()+
"; queryWords3 size:"
+
queryWords3
.
size
());
if
(
queryWords1
!=
null
&&
queryWords2
!=
null
&&
queryWords3
!=
null
)
{
if
(
queryWords1
.
size
()
+
queryWords2
.
size
()
>
queryWords3
.
size
())
{
return
true
;
}
}
else
{
return
true
;
}
}
else
{
return
true
;
}
}
else
{
return
true
;
}
return
false
;
return
false
;
}
}
...
...
suggest-task/src/main/profiles/test/config.properties
View file @
ccce4f93
...
@@ -5,11 +5,12 @@ suggestTask.SensitiveFolder=/data/pssmaster/corpus_set/suggest_corpus/sensitive
...
@@ -5,11 +5,12 @@ suggestTask.SensitiveFolder=/data/pssmaster/corpus_set/suggest_corpus/sensitive
suggestTask.EuropeWordFolder
=
/data/pssmaster/corpus_set/suggest_corpus/europe_word
suggestTask.EuropeWordFolder
=
/data/pssmaster/corpus_set/suggest_corpus/europe_word
suggestTask.batchSize
=
10000
suggestTask.batchSize
=
10000
suggestTask.threadPoolSize
=
10
suggestTask.threadPoolSize
=
10
suggestTask.suggestTagMaxSize
=
5
suggestTask.searchWordWarningCount
=
1000000
suggestTask.searchWordWarningCount
=
1000000
suggestTask.es.url
=
http://10.0.254.139:9200
suggestTask.suggestTagMaxSize
=
5
suggestTask.es.user
=
suggest
suggestTask.warningPhones
=
13426233960
suggestTask.es.password
=
suggest456
suggestTask.es.url
=
http://bigdataescluster.secoolocal.com:9200
suggestTask.es.index
=
search_suggest_index
suggestTask.es.user
=
search
suggestTask.es.password
=
search5z0NvEn1D
suggestTask.es.index
=
search_suggest_index_huidu
suggestTask.es.type
=
search_suggest_type
suggestTask.es.type
=
search_suggest_type
suggestTask.es.batchSize
=
2000
suggestTask.es.batchSize
=
2000
\ No newline at end of file
suggest-task/src/main/profiles/test/db.properties
View file @
ccce4f93
erp.read.url
=
jdbc:mysql://1
0.4.3.223
:3306/secooErpDB?useUnicode=true&characterEncoding=utf8&noAccessToProcedureBodies=true&zeroDateTimeBehavior=convertToNull&allowMultiQueries=true
erp.read.url
=
jdbc:mysql://1
92.168.50.40
:3306/secooErpDB?useUnicode=true&characterEncoding=utf8&noAccessToProcedureBodies=true&zeroDateTimeBehavior=convertToNull&allowMultiQueries=true
erp.read.user
=
3306_test
erp.read.user
=
so_Erp_R
erp.read.password
=
iS6CXpYqgZ8Mhjui
erp.read.password
=
5RgzudyyFlApTmve
seo.read.url
=
jdbc:mysql://10.4.3.223:3306/secooSeoDB?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull
seo.read.url
=
jdbc:mysql://secooSeoDB.master.com:3307/secooSeoDB?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull
seo.read.user
=
SeoDB_test
seo.read.user
=
sem_Seo_W
seo.read.password
=
Cxkfq57huej0fTpK
seo.read.password
=
C2IiHfNKYpT1onsR
\ No newline at end of file
dw.read.url
=
jdbc:mysql://secooDataWarehouse.slave.com:3306/secooDataWarehouse?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull
dw.read.user
=
Search_DataWar_R
dw.read.password
=
pY1P9zUj9x1M65ot5szo
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment