Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
suggest-task
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
suggest-task
Commits
93b80089
Commit
93b80089
authored
Feb 07, 2022
by
xupeng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix code
parent
d2e9cd84
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
84 additions
and
53 deletions
+84
-53
SuggestTask.java
suggest-task/src/main/java/com/secoo/so/suggest/task/SuggestTask.java
+84
-53
No files found.
suggest-task/src/main/java/com/secoo/so/suggest/task/SuggestTask.java
View file @
93b80089
...
@@ -14,10 +14,8 @@ import lombok.extern.slf4j.Slf4j;
...
@@ -14,10 +14,8 @@ import lombok.extern.slf4j.Slf4j;
import
java.io.File
;
import
java.io.File
;
import
java.io.Serializable
;
import
java.io.Serializable
;
import
java.util.ArrayList
;
import
java.util.*
;
import
java.util.HashMap
;
import
java.util.concurrent.ConcurrentHashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.concurrent.ExecutorService
;
import
java.util.concurrent.ExecutorService
;
import
java.util.concurrent.Executors
;
import
java.util.concurrent.Executors
;
...
@@ -201,12 +199,13 @@ public class SuggestTask {
...
@@ -201,12 +199,13 @@ public class SuggestTask {
return
;
return
;
}
}
ConcurrentHashMap
<
String
,
EsSuggestKeywordInfo
>
esSuggestKeywordMap
=
new
ConcurrentHashMap
<>();
// 通过线程池分批次并发处理搜索词
// 通过线程池分批次并发处理搜索词
long
batchSize
=
ConfigUtil
.
getLong
(
"suggestTask.batchSize"
,
10000
);
long
batchSize
=
ConfigUtil
.
getLong
(
"suggestTask.batchSize"
,
10000
);
int
threadPoolSize
=
ConfigUtil
.
getInt
(
"suggestTask.threadPoolSize"
,
10
);
int
threadPoolSize
=
ConfigUtil
.
getInt
(
"suggestTask.threadPoolSize"
,
10
);
ExecutorService
execThreadPool
=
Executors
.
newFixedThreadPool
(
threadPoolSize
);
ExecutorService
execThreadPool
=
Executors
.
newFixedThreadPool
(
threadPoolSize
);
for
(
long
startId
=
minId
;
startId
<=
maxId
;
startId
=
startId
+
batchSize
)
{
for
(
long
startId
=
minId
;
startId
<=
maxId
;
startId
=
startId
+
batchSize
)
{
execThreadPool
.
submit
(
new
SearchKeywordProcessTask
(
startId
,
startId
+
batchSize
,
startTime
));
execThreadPool
.
submit
(
new
SearchKeywordProcessTask
(
esSuggestKeywordMap
,
startId
,
startId
+
batchSize
,
startTime
));
}
}
execThreadPool
.
shutdown
();
execThreadPool
.
shutdown
();
while
(
true
)
{
while
(
true
)
{
...
@@ -216,55 +215,73 @@ public class SuggestTask {
...
@@ -216,55 +215,73 @@ public class SuggestTask {
}
}
ObjectUtils
.
safeSleep
(
5000
);
ObjectUtils
.
safeSleep
(
5000
);
}
}
// 保存到es
// saveSuggestKeywordToEs(new ArrayList<>(esSuggestKeywordMap.values()));
// for test
saveSuggestKeywordToFile
(
new
ArrayList
<>(
esSuggestKeywordMap
.
values
()));
}
}
/**
/**
* 处理搜索词
* 处理搜索词
*/
*/
private
static
void
processSearchKeyword
(
List
<
SearchKeywordInfo
>
searchKeywordInfoList
,
long
startTime
)
{
private
static
void
processSearchKeyword
(
ConcurrentHashMap
<
String
,
EsSuggestKeywordInfo
>
esSuggestKeywordMap
,
List
<
SearchKeywordInfo
>
searchKeywordInfoList
,
long
startTime
)
{
if
(
CollectionUtils
.
isNotEmpty
(
searchKeywordInfoList
))
{
if
(
CollectionUtils
.
isNotEmpty
(
searchKeywordInfoList
))
{
List
<
EsSuggestKeywordInfo
>
suggestKeywordInfoList
=
new
ArrayList
<>();
for
(
SearchKeywordInfo
searchKeywordInfo
:
searchKeywordInfoList
)
{
for
(
SearchKeywordInfo
searchKeywordInfo
:
searchKeywordInfoList
)
{
if
(
StringUtils
.
isNotBlank
(
searchKeywordInfo
.
getKeyword
()))
{
if
(
StringUtils
.
isNotBlank
(
searchKeywordInfo
.
getKeyword
()))
{
EsSuggestKeywordInfo
suggestKeywordInfo
=
new
EsSuggestKeywordInfo
();
suggestKeywordInfo
.
setKeyword
(
cleanKeyword
(
searchKeywordInfo
.
getKeyword
()));
String
keyword
=
cleanKeyword
(
searchKeywordInfo
.
getKeyword
());
suggestKeywordInfo
.
setKeywordPinYin
(
PinYinUtils
.
changeToWithoutTonePinYin
(
suggestKeywordInfo
.
getKeyword
(),
""
));
synchronized
(
keyword
)
{
suggestKeywordInfo
.
setYearCount
(
searchKeywordInfo
.
getYearPv
());
EsSuggestKeywordInfo
suggestKeywordInfo
=
esSuggestKeywordMap
.
get
(
keyword
);
suggestKeywordInfo
.
setYearClickCount
(
searchKeywordInfo
.
getYearProductClickCount
());
if
(
suggestKeywordInfo
==
null
)
{
suggestKeywordInfo
.
setYearCartCount
(
searchKeywordInfo
.
getYearAddCartCount
());
suggestKeywordInfo
=
new
EsSuggestKeywordInfo
();
suggestKeywordInfo
.
setWeekCount
(
searchKeywordInfo
.
getWeekPv
());
suggestKeywordInfo
.
setKeyword
(
keyword
);
suggestKeywordInfo
.
setWeekClickCount
(
searchKeywordInfo
.
getWeekProductClickCount
());
suggestKeywordInfo
.
setKeywordPinYin
(
PinYinUtils
.
changeToWithoutTonePinYin
(
suggestKeywordInfo
.
getKeyword
(),
""
));
suggestKeywordInfo
.
setWeekCartCount
(
searchKeywordInfo
.
getWeekAddCartCount
());
suggestKeywordInfo
.
setSuggestTags
(
searchKeywordInfo
.
getPrepareTags
());
suggestKeywordInfo
.
setYearCount
(
searchKeywordInfo
.
getYearPv
());
suggestKeywordInfo
.
setKeywordVersion
(
searchKeywordInfo
.
getPDay
());
suggestKeywordInfo
.
setYearClickCount
(
searchKeywordInfo
.
getYearProductClickCount
());
suggestKeywordInfo
.
setUpdateTime
(
startTime
);
suggestKeywordInfo
.
setYearCartCount
(
searchKeywordInfo
.
getYearAddCartCount
());
suggestKeywordInfo
.
setWeekCount
(
searchKeywordInfo
.
getWeekPv
());
suggestKeywordInfo
.
setIsBrand
(
brandMap
.
containsKey
(
suggestKeywordInfo
.
getKeyword
()));
suggestKeywordInfo
.
setWeekClickCount
(
searchKeywordInfo
.
getWeekProductClickCount
());
suggestKeywordInfo
.
setIsCategory
(
categoryMap
.
containsKey
(
suggestKeywordInfo
.
getKeyword
()));
suggestKeywordInfo
.
setWeekCartCount
(
searchKeywordInfo
.
getWeekAddCartCount
());
suggestKeywordInfo
.
setIsSensitive
(
sensitiveMap
.
containsKey
(
suggestKeywordInfo
.
getKeyword
()));
suggestKeywordInfo
.
setIsEuropeWord
(
europeWordMap
.
containsKey
(
suggestKeywordInfo
.
getKeyword
()));
suggestKeywordInfo
.
setSuggestTags
(
searchKeywordInfo
.
getPrepareTags
());
suggestKeywordInfo
.
setIsManual
(
manualMap
.
containsKey
(
suggestKeywordInfo
.
getKeyword
()));
suggestKeywordInfo
.
setKeywordVersion
(
searchKeywordInfo
.
getPDay
());
suggestKeywordInfo
.
setManualValue
(
suggestKeywordInfo
.
getIsManual
()
?
manualMap
.
get
(
suggestKeywordInfo
.
getKeyword
())
:
0
);
suggestKeywordInfo
.
setUpdateTime
(
startTime
);
// 计算suggestKeyword权重等属性
suggestKeywordInfo
.
setIsBrand
(
brandMap
.
containsKey
(
keyword
));
processEsSuggestKeywordInfo
(
suggestKeywordInfo
,
searchKeywordInfo
);
suggestKeywordInfo
.
setIsCategory
(
categoryMap
.
containsKey
(
keyword
));
suggestKeywordInfo
.
setIsSensitive
(
sensitiveMap
.
containsKey
(
keyword
));
// 不过滤的suggest词,计算分值写es
suggestKeywordInfo
.
setIsEuropeWord
(
europeWordMap
.
containsKey
(
keyword
));
if
(!
isFilterSuggestKeyword
(
suggestKeywordInfo
))
{
suggestKeywordInfo
.
setIsManual
(
manualMap
.
containsKey
(
keyword
));
suggestKeywordInfo
.
setManualValue
(
suggestKeywordInfo
.
getIsManual
()
?
manualMap
.
get
(
keyword
)
:
0
);
// 保存es前执行标签清洗
cleanBeforeSaveToEs
(
suggestKeywordInfo
);
esSuggestKeywordMap
.
put
(
keyword
,
suggestKeywordInfo
);
}
else
{
suggestKeywordInfoList
.
add
(
suggestKeywordInfo
);
suggestKeywordInfo
.
setYearCount
(
suggestKeywordInfo
.
getYearCount
()
+
searchKeywordInfo
.
getYearPv
());
suggestKeywordInfo
.
setYearClickCount
(
suggestKeywordInfo
.
getYearClickCount
()
+
searchKeywordInfo
.
getYearProductClickCount
());
suggestKeywordInfo
.
setYearCartCount
(
suggestKeywordInfo
.
getYearCartCount
()
+
searchKeywordInfo
.
getYearAddCartCount
());
suggestKeywordInfo
.
setWeekCount
(
suggestKeywordInfo
.
getWeekCount
()
+
searchKeywordInfo
.
getWeekPv
());
suggestKeywordInfo
.
setWeekClickCount
(
suggestKeywordInfo
.
getWeekClickCount
()
+
searchKeywordInfo
.
getWeekProductClickCount
());
suggestKeywordInfo
.
setWeekCartCount
(
suggestKeywordInfo
.
getWeekCartCount
()
+
searchKeywordInfo
.
getWeekAddCartCount
());
}
// 计算suggestKeyword权重等属性
processEsSuggestKeywordInfo
(
suggestKeywordInfo
,
searchKeywordInfo
);
// 不过滤的suggest词,计算分值写es
if
(!
isFilterSuggestKeyword
(
suggestKeywordInfo
))
{
// 保存es前执行标签清洗
cleanBeforeSaveToEs
(
suggestKeywordInfo
);
esSuggestKeywordMap
.
put
(
suggestKeywordInfo
.
getKeyword
(),
suggestKeywordInfo
);
}
}
}
}
}
}
}
// 保存到es
// saveSuggestKeywordToEs(suggestKeywordInfoList);
// for test
saveSuggestKeywordToFile
(
suggestKeywordInfoList
);
}
}
}
}
...
@@ -279,13 +296,26 @@ public class SuggestTask {
...
@@ -279,13 +296,26 @@ public class SuggestTask {
private
static
void
saveSuggestKeywordToFile
(
List
<
EsSuggestKeywordInfo
>
suggestKeywordInfoList
)
{
private
static
void
saveSuggestKeywordToFile
(
List
<
EsSuggestKeywordInfo
>
suggestKeywordInfoList
)
{
if
(
CollectionUtils
.
isNotEmpty
(
suggestKeywordInfoList
))
{
if
(
CollectionUtils
.
isNotEmpty
(
suggestKeywordInfoList
))
{
int
batch
=
2000
;
String
fileName
=
"/tmp/suggest-task/suggest-index-"
+
DateUtils
.
formatDate
(
startTime
,
"yyyyMMddHHmmss"
)
+
".json"
;
List
<
String
>
lines
=
new
ArrayList
<>();
List
<
String
>
lines
=
new
ArrayList
<>();
suggestKeywordInfoList
.
forEach
(
suggestKeywordInfo
->
{
int
count
=
0
;
for
(
count
=
0
;
count
<
suggestKeywordInfoList
.
size
();
count
++)
{
EsSuggestKeywordInfo
suggestKeywordInfo
=
suggestKeywordInfoList
.
get
(
count
);
lines
.
add
(
JSON
.
toJSONString
(
suggestKeywordInfo
));
lines
.
add
(
JSON
.
toJSONString
(
suggestKeywordInfo
));
});
if
(
count
>
0
&&
lines
.
size
()
%
batch
==
0
)
{
String
fileName
=
"/tmp/suggest-task/suggest_index-"
+
DateUtils
.
formatDate
(
startTime
,
"yyyyMMddHHmmss"
)
+
"-"
+
Thread
.
currentThread
().
getId
()
+
".json"
;
log
.
info
(
"save {}/{} result to file: {}"
,
lines
.
size
(),
count
,
fileName
);
log
.
info
(
"save result to file: "
+
fileName
);
FileUtils
.
saveToFile
(
lines
,
fileName
,
true
);
FileUtils
.
saveToFile
(
lines
,
fileName
,
true
);
lines
=
new
ArrayList
<>();
}
}
if
(
CollectionUtils
.
isNotEmpty
(
lines
))
{
log
.
info
(
"save {}/{} result to file: {}"
,
lines
.
size
(),
count
,
fileName
);
FileUtils
.
saveToFile
(
lines
,
fileName
,
true
);
lines
.
clear
();
}
}
}
}
}
...
@@ -306,12 +336,12 @@ public class SuggestTask {
...
@@ -306,12 +336,12 @@ public class SuggestTask {
}
}
// 过滤掉太长的词
// 过滤掉太长的词
if
(
suggestKeywordInfo
.
getKeyword
().
length
()
>
3
0
)
{
if
(
suggestKeywordInfo
.
getKeyword
().
length
()
<=
1
||
suggestKeywordInfo
.
getKeyword
().
length
()
>
5
0
)
{
return
true
;
return
true
;
}
}
// 过滤掉纯数字的搜索词,原:过滤掉商品id,商品id是有7位数字组成
// 过滤掉纯数字的搜索词,原:过滤掉商品id,商品id是有7位数字组成
if
(
StringUtils
.
isNumber
(
suggestKeywordInfo
.
getKeyword
()))
{
if
(
suggestKeywordInfo
.
getKeyword
().
length
()
>
6
&&
StringUtils
.
isNumber
(
suggestKeywordInfo
.
getKeyword
()))
{
return
true
;
return
true
;
}
}
...
@@ -392,7 +422,6 @@ public class SuggestTask {
...
@@ -392,7 +422,6 @@ public class SuggestTask {
suggestKeywordInfo
.
setWeekClickRatio
(
suggestKeywordInfo
.
getWeekClickRatio
()
*
2
);
suggestKeywordInfo
.
setWeekClickRatio
(
suggestKeywordInfo
.
getWeekClickRatio
()
*
2
);
}
}
calculateWordRank
(
suggestKeywordInfo
);
calculateWordRank
(
suggestKeywordInfo
);
calculateWordABRank
(
suggestKeywordInfo
,
searchKeywordInfo
);
calculateWordABRank
(
suggestKeywordInfo
,
searchKeywordInfo
);
addNewScoreIfNewHotWord
(
suggestKeywordInfo
);
addNewScoreIfNewHotWord
(
suggestKeywordInfo
);
...
@@ -527,12 +556,14 @@ public class SuggestTask {
...
@@ -527,12 +556,14 @@ public class SuggestTask {
private
static
final
long
serialVersionUID
=
-
2853856815712590673L
;
private
static
final
long
serialVersionUID
=
-
2853856815712590673L
;
public
SearchKeywordProcessTask
(
Long
startId
,
Long
endId
,
Long
startTime
)
{
public
SearchKeywordProcessTask
(
ConcurrentHashMap
<
String
,
EsSuggestKeywordInfo
>
esSuggestKeywordMap
,
Long
startId
,
Long
endId
,
Long
startTime
)
{
this
.
esSuggestKeywordMap
=
esSuggestKeywordMap
;
this
.
startId
=
startId
;
this
.
startId
=
startId
;
this
.
endId
=
endId
;
this
.
endId
=
endId
;
this
.
startTime
=
startTime
;
this
.
startTime
=
startTime
;
}
}
private
ConcurrentHashMap
<
String
,
EsSuggestKeywordInfo
>
esSuggestKeywordMap
;
private
Long
startId
;
private
Long
startId
;
private
Long
endId
;
private
Long
endId
;
private
Long
startTime
;
private
Long
startTime
;
...
@@ -541,7 +572,7 @@ public class SuggestTask {
...
@@ -541,7 +572,7 @@ public class SuggestTask {
public
void
run
()
{
public
void
run
()
{
List
<
SearchKeywordInfo
>
searchKeywordInfoList
=
DwDataSource
.
querySearchKeywordInfoList
(
startId
,
endId
);
List
<
SearchKeywordInfo
>
searchKeywordInfoList
=
DwDataSource
.
querySearchKeywordInfoList
(
startId
,
endId
);
if
(
CollectionUtils
.
isNotEmpty
(
searchKeywordInfoList
))
{
if
(
CollectionUtils
.
isNotEmpty
(
searchKeywordInfoList
))
{
processSearchKeyword
(
searchKeywordInfoList
,
startTime
);
processSearchKeyword
(
this
.
esSuggestKeywordMap
,
searchKeywordInfoList
,
startTime
);
}
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment