Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
suggest-task
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
suggest-task
Commits
cc2602ef
Commit
cc2602ef
authored
Aug 08, 2022
by
王明范
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
增加从配置文件中读部分词å
parent
200bfd5b
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
135 additions
and
34 deletions
+135
-34
QueryPlanHelper.java
suggest-task/src/main/java/com/secoo/so/suggest/helper/QueryPlanHelper.java
+30
-21
WordHelper.java
suggest-task/src/main/java/com/secoo/so/suggest/helper/WordHelper.java
+67
-0
SuggestTask.java
suggest-task/src/main/java/com/secoo/so/suggest/task/SuggestTask.java
+28
-11
config.properties
suggest-task/src/main/profiles/prod/config.properties
+4
-0
config.properties
suggest-task/src/main/profiles/test/config.properties
+6
-2
No files found.
suggest-task/src/main/java/com/secoo/so/suggest/helper/QueryPlanHelper.java
View file @
cc2602ef
...
...
@@ -7,8 +7,10 @@ import com.secoo.search.sqp4j.QueryPlan;
import
com.secoo.search.sqp4j.QueryWord
;
import
com.secoo.search.sqp4j.client.QueryPlanClient
;
import
com.secoo.so.suggest.client.SqpDubboClient
;
import
com.secoo.so.suggest.config.ConfigUtil
;
import
com.secoo.so.suggest.util.FileUtils
;
import
com.secoo.so.suggest.util.StringUtils
;
import
org.apache.lucene.queryparser.classic.QueryParser
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -30,11 +32,13 @@ public class QueryPlanHelper {
List
<
String
>
wordList
=
new
ArrayList
<>();
Map
<
String
,
Integer
>
keywordMap
=
new
HashMap
<>();
private
static
String
queryPlanFile
=
"
/data/crontab/test/tmp/queryplan.txt"
;
private
static
String
queryPlanFile
=
"
"
;
///data/crontab/test/tmp/queryplan.txt
private
static
List
<
String
>
newWordLines
=
new
ArrayList
<>();
private
static
long
minTimeStamp
=
Long
.
MAX_VALUE
;
private
QueryPlanHelper
()
{
queryPlanFile
=
ConfigUtil
.
getString
(
"queryPlan.cachePath"
,
""
);
client
=
SqpDubboClient
.
getProdImpl
();
loadQueryPlanFromFile
();
...
...
@@ -78,27 +82,29 @@ public class QueryPlanHelper {
}
private
void
loadQueryPlanFromFile
()
{
List
<
String
>
lines
=
FileUtils
.
readLines
(
queryPlanFile
);
if
(
lines
!=
null
&&
lines
.
size
()
>
0
)
{
for
(
String
line
:
lines
)
{
if
(
StringUtils
.
isBlank
(
line
))
{
continue
;
}
String
[]
arr
=
line
.
split
(
","
);
if
(
arr
.
length
==
3
)
{
String
keyword
=
arr
[
0
];
String
strWordCount
=
arr
[
1
];
String
ts
=
arr
[
2
];
if
(
StringUtils
.
isNotBlank
(
keyword
)
&&
StringUtils
.
isNumber
(
strWordCount
)
&&
StringUtils
.
isNumber
(
ts
))
{
try
{
long
timeStamp
=
Long
.
valueOf
(
ts
);
if
(
timeStamp
<
minTimeStamp
)
{
minTimeStamp
=
timeStamp
;
if
(
StringUtils
.
isNotBlank
(
queryPlanFile
))
{
List
<
String
>
lines
=
FileUtils
.
readLines
(
queryPlanFile
);
if
(
lines
!=
null
&&
lines
.
size
()
>
0
)
{
for
(
String
line
:
lines
)
{
if
(
StringUtils
.
isBlank
(
line
))
{
continue
;
}
String
[]
arr
=
line
.
split
(
","
);
if
(
arr
.
length
==
3
)
{
String
keyword
=
arr
[
0
];
String
strWordCount
=
arr
[
1
];
String
ts
=
arr
[
2
];
if
(
StringUtils
.
isNotBlank
(
keyword
)
&&
StringUtils
.
isNumber
(
strWordCount
)
&&
StringUtils
.
isNumber
(
ts
))
{
try
{
long
timeStamp
=
Long
.
valueOf
(
ts
);
if
(
timeStamp
<
minTimeStamp
)
{
minTimeStamp
=
timeStamp
;
}
int
wordCount
=
Integer
.
valueOf
(
strWordCount
);
keywordMap
.
put
(
keyword
,
wordCount
);
}
catch
(
Exception
e
)
{
LOG
.
info
(
"string to integer exception,"
,
e
);
}
int
wordCount
=
Integer
.
valueOf
(
strWordCount
);
keywordMap
.
put
(
keyword
,
wordCount
);
}
catch
(
Exception
e
)
{
LOG
.
info
(
"string to integer exception,"
,
e
);
}
}
}
...
...
@@ -106,6 +112,9 @@ public class QueryPlanHelper {
}
}
public
void
writeQueryPlanToFile
()
{
if
(
StringUtils
.
isBlank
(
queryPlanFile
))
{
return
;
}
long
nowSecond
=
System
.
currentTimeMillis
()/
1000
;
long
sevenDays
=
3600
*
24
*
7
;
if
(
nowSecond
-
minTimeStamp
>
sevenDays
)
{
...
...
suggest-task/src/main/java/com/secoo/so/suggest/helper/WordHelper.java
0 → 100644
View file @
cc2602ef
package
com
.
secoo
.
so
.
suggest
.
helper
;
import
com.secoo.so.suggest.config.ConfigUtil
;
import
com.secoo.so.suggest.util.FileUtils
;
import
com.secoo.so.suggest.util.StringUtils
;
import
com.sun.xml.internal.ws.binding.FeatureListUtil
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
java.util.Arrays
;
import
java.util.HashSet
;
import
java.util.List
;
import
java.util.Set
;
/**
* @author wangmingfan
* @date 2022/8/8
* @description
*/
public
class
WordHelper
{
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
WordHelper
.
class
);
private
static
final
String
specialWordFile
=
"specialWordPath"
;
private
static
final
String
synonymTagFile
=
"synonymTagPath"
;
/**
* 从文件中加载部分特定不进行分割的词
* 文件中每行一个词语
* @return java.util.Set<java.lang.String>
* @date 2022/8/8
*/
public
static
Set
<
String
>
loadSpecialWords
()
{
String
specialWordPath
=
ConfigUtil
.
getString
(
specialWordFile
);
Set
<
String
>
words
=
new
HashSet
<>();
if
(
StringUtils
.
isNotBlank
(
specialWordPath
))
{
List
<
String
>
lines
=
FileUtils
.
readLines
(
specialWordPath
);
if
(
lines
!=
null
&&
lines
.
size
()
>
0
)
{
words
.
addAll
(
lines
);
}
}
return
words
;
}
/**
* 从文件中加载同义标签
* 文件中每行表示一组同义词,同一组同义词之间用竖线(|)隔开,如:
* 男款|男式|男士
* 女款|女式|女士
* @return java.util.Set<java.util.Set<java.lang.String>>
* @date 2022/8/8
*/
public
static
Set
<
Set
<
String
>>
loadSynonymTags
()
{
String
synonymTagPath
=
ConfigUtil
.
getString
(
synonymTagFile
);
Set
<
Set
<
String
>>
synonyms
=
new
HashSet
<>();
if
(
StringUtils
.
isNotBlank
(
synonymTagPath
))
{
List
<
String
>
lines
=
FileUtils
.
readLines
(
synonymTagPath
);
lines
.
stream
().
filter
(
line
->
StringUtils
.
isNotBlank
(
line
)).
forEach
(
line
->
{
String
[]
arrWords
=
line
.
split
(
"\\|"
);
if
(
arrWords
.
length
>
0
)
{
Set
<
String
>
words
=
new
HashSet
<>(
Arrays
.
asList
(
arrWords
));
synonyms
.
add
(
words
);
}
});
}
return
synonyms
;
}
}
suggest-task/src/main/java/com/secoo/so/suggest/task/SuggestTask.java
View file @
cc2602ef
...
...
@@ -15,6 +15,7 @@ import com.secoo.so.suggest.entity.SearchKeywordInfo;
import
com.secoo.so.suggest.es.EsClient
;
import
com.secoo.so.suggest.es.EsObject
;
import
com.secoo.so.suggest.helper.QueryPlanHelper
;
import
com.secoo.so.suggest.helper.WordHelper
;
import
com.secoo.so.suggest.util.*
;
import
lombok.Data
;
import
lombok.extern.slf4j.Slf4j
;
...
...
@@ -42,11 +43,8 @@ public class SuggestTask {
private
static
int
maxTagSize
=
5
;
private
static
long
startTime
=
System
.
currentTimeMillis
();
private
static
Set
<
String
>
spWordSet
=
new
HashSet
<>(
Arrays
.
asList
(
"靴子"
,
"鞋子"
,
"裤子"
,
"袜子"
,
"裙子"
,
"帽子"
,
"杯子"
,
"箱子"
,
"包包"
,
"包袋"
,
"包带"
,
"表带"
,
"大号"
,
"中号"
,
"小号"
,
"衣服"
,
"t恤"
,
"衣服"
,
"男款"
,
"男士"
,
"男式"
,
"男性"
,
"男童"
,
"女款"
,
"女士"
,
"女式"
,
"女性"
,
"女童"
,
"大象"
,
"男包"
,
"女包"
,
"男鞋"
,
"女鞋"
));
private
static
List
<
Set
<
String
>>
synonymList
=
new
ArrayList
<>();
private
static
Set
<
String
>
spWordSet
=
new
HashSet
<>();
private
static
Set
<
Set
<
String
>>
synonymList
=
new
HashSet
<>();
public
static
void
main
(
String
[]
args
)
{
startTime
=
System
.
currentTimeMillis
();
...
...
@@ -67,7 +65,10 @@ public class SuggestTask {
sensitiveMap
=
loadSensitiveMap
();
europeWordMap
=
loadEuropeWordMap
();
// 加载表填同义词
// 加载部分确定不能分割的特殊词
spWordSet
=
loadSpecialWords
();
// 加载标签同义词
synonymList
=
loadTagSynonym
();
QueryPlanHelper
sqp
=
QueryPlanHelper
.
getInstance
();
...
...
@@ -197,13 +198,29 @@ public class SuggestTask {
return
prefixFilterList
;
}
private
static
Lis
t
<
Set
<
String
>>
loadTagSynonym
(){
List
<
Set
<
String
>>
synList
=
new
ArrayLis
t
<>();
private
static
Se
t
<
Set
<
String
>>
loadTagSynonym
(){
Set
<
Set
<
String
>>
synSet
=
new
HashSe
t
<>();
Set
<
String
>
maleWords
=
new
HashSet
<>(
Arrays
.
asList
(
"男性"
,
"男式"
,
"男士"
,
"男款"
,
"男"
));
Set
<
String
>
femaleWords
=
new
HashSet
<>(
Arrays
.
asList
(
"女性"
,
"女式"
,
"女士"
,
"女款"
,
"女"
));
synList
.
add
(
maleWords
);
synList
.
add
(
femaleWords
);
return
synList
;
synSet
.
add
(
maleWords
);
synSet
.
add
(
femaleWords
);
Set
<
Set
<
String
>>
fileSynonyms
=
WordHelper
.
loadSynonymTags
();
if
(
fileSynonyms
.
size
()
>
0
)
{
synSet
.
addAll
(
fileSynonyms
);
}
return
synSet
;
}
private
static
Set
<
String
>
loadSpecialWords
(){
Set
<
String
>
baseSet
=
new
HashSet
<>(
Arrays
.
asList
(
"靴子"
,
"鞋子"
,
"裤子"
,
"袜子"
,
"裙子"
,
"帽子"
,
"杯子"
,
"箱子"
,
"包包"
,
"包袋"
,
"包带"
,
"表带"
,
"大号"
,
"中号"
,
"小号"
,
"衣服"
,
"t恤"
,
"衣服"
,
"男款"
,
"男士"
,
"男式"
,
"男性"
,
"男童"
,
"女款"
,
"女士"
,
"女式"
,
"女性"
,
"女童"
,
"大象"
,
"男包"
,
"女包"
,
"男鞋"
,
"女鞋"
));
Set
<
String
>
fileWords
=
WordHelper
.
loadSpecialWords
();
if
(
fileWords
.
size
()
>
0
)
{
baseSet
.
addAll
(
fileWords
);
}
return
baseSet
;
}
private
static
String
cleanKeyword
(
String
keyword
)
{
...
...
suggest-task/src/main/profiles/prod/config.properties
View file @
cc2602ef
...
...
@@ -14,3 +14,7 @@ suggestTask.es.password=search5z0NvEn1D
suggestTask.es.index
=
search_suggest_index
suggestTask.es.type
=
search_suggest_type
suggestTask.es.batchSize
=
2000
queryPlan.cachePath
=
/data/crontab/suggest/tmp/queryplan.txt
specialWordPath
=
/data/crontab/suggest/dict/specialWord.txt
synonymTagPath
=
/data/crontab/suggest/dict/synonymTag.txt
suggest-task/src/main/profiles/test/config.properties
View file @
cc2602ef
...
...
@@ -13,4 +13,8 @@ suggestTask.es.user=search
suggestTask.es.password
=
search5z0NvEn1D
suggestTask.es.index
=
search_suggest_index_huidu
suggestTask.es.type
=
search_suggest_type
suggestTask.es.batchSize
=
2000
\ No newline at end of file
suggestTask.es.batchSize
=
2000
queryPlan.cachePath
=
/data/crontab/test/tmp/queryplan.txt
specialWordPath
=
/data/crontab/test/dict/specialWord.txt
synonymTagPath
=
/data/crontab/test/dict/synonymTag.txt
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment