Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
suggest-task
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
suggest-task
Commits
8878b19d
Commit
8878b19d
authored
Feb 09, 2022
by
xupeng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
调整过滤逻辑
parent
b1006535
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
31 additions
and
16 deletions
+31
-16
third-patry-jpinyin-1.1.8.jar
suggest-task/lib/third-patry-jpinyin-1.1.8.jar
+0
-0
pom.xml
suggest-task/pom.xml
+2
-0
SuggestTask.java
suggest-task/src/main/java/com/secoo/so/suggest/task/SuggestTask.java
+27
-14
config.properties
suggest-task/src/main/profiles/prod/config.properties
+1
-1
config.properties
suggest-task/src/main/profiles/test/config.properties
+1
-1
No files found.
suggest-task/lib/third-patry-jpinyin-1.1.8.jar
0 → 100644
View file @
8878b19d
File added
suggest-task/pom.xml
View file @
8878b19d
...
@@ -89,6 +89,8 @@
...
@@ -89,6 +89,8 @@
<groupId>
com.secoo.search.third-patry
</groupId>
<groupId>
com.secoo.search.third-patry
</groupId>
<artifactId>
third-patry-jpinyin
</artifactId>
<artifactId>
third-patry-jpinyin
</artifactId>
<version>
1.1.8
</version>
<version>
1.1.8
</version>
<scope>
system
</scope>
<systemPath>
${project.basedir}/lib/third-patry-jpinyin-1.1.8.jar
</systemPath>
</dependency>
</dependency>
<!-- es -->
<!-- es -->
...
...
suggest-task/src/main/java/com/secoo/so/suggest/task/SuggestTask.java
View file @
8878b19d
...
@@ -69,7 +69,19 @@ public class SuggestTask {
...
@@ -69,7 +69,19 @@ public class SuggestTask {
putIfKeyNotBlank
(
brandMap
,
cleanKeyword
(
brandInfo
.
getEnName
()),
brandInfo
.
getId
());
putIfKeyNotBlank
(
brandMap
,
cleanKeyword
(
brandInfo
.
getEnName
()),
brandInfo
.
getId
());
putIfKeyNotBlank
(
brandMap
,
cleanKeyword
(
brandInfo
.
getChName
()),
brandInfo
.
getId
());
putIfKeyNotBlank
(
brandMap
,
cleanKeyword
(
brandInfo
.
getChName
()),
brandInfo
.
getId
());
putIfKeyNotBlank
(
brandMap
,
cleanKeyword
(
brandInfo
.
getShortName
()),
brandInfo
.
getId
());
putIfKeyNotBlank
(
brandMap
,
cleanKeyword
(
brandInfo
.
getShortName
()),
brandInfo
.
getId
());
putIfKeyNotBlank
(
brandMap
,
cleanKeyword
(
brandInfo
.
getNickName
()),
brandInfo
.
getId
());
if
(
StringUtils
.
isNotBlank
(
brandInfo
.
getNickName
())
&&
brandInfo
.
getNickName
().
contains
(
","
))
{
List
<
String
>
nickNameList
=
StringUtils
.
splitToList
(
brandInfo
.
getNickName
(),
","
);
if
(
CollectionUtils
.
isNotEmpty
(
nickNameList
))
{
nickNameList
.
forEach
(
nickName
->
{
if
(
StringUtils
.
isNotBlank
(
nickName
))
{
putIfKeyNotBlank
(
brandMap
,
cleanKeyword
(
nickName
),
brandInfo
.
getId
());
}
});
}
}
else
{
putIfKeyNotBlank
(
brandMap
,
cleanKeyword
(
brandInfo
.
getNickName
()),
brandInfo
.
getId
());
}
}
}
}
}
return
brandMap
;
return
brandMap
;
...
@@ -423,17 +435,6 @@ public class SuggestTask {
...
@@ -423,17 +435,6 @@ public class SuggestTask {
*/
*/
private
static
boolean
isFilterSuggestKeyword
(
EsSuggestKeywordInfo
suggestKeywordInfo
)
{
private
static
boolean
isFilterSuggestKeyword
(
EsSuggestKeywordInfo
suggestKeywordInfo
)
{
// 品牌词 类目词 人工干预词 不做过滤
if
(
suggestKeywordInfo
.
getIsBrand
()
||
suggestKeywordInfo
.
getIsCategory
()
||
suggestKeywordInfo
.
getIsManual
())
{
suggestKeywordInfo
.
setIsSensitive
(
false
);
return
false
;
}
// 敏感词过滤
if
(
suggestKeywordInfo
.
getIsSensitive
())
{
return
true
;
}
// 过滤掉太短、太长的词
// 过滤掉太短、太长的词
if
(
StringUtils
.
isBlank
(
suggestKeywordInfo
.
getKeyword
())
if
(
StringUtils
.
isBlank
(
suggestKeywordInfo
.
getKeyword
())
||
suggestKeywordInfo
.
getKeyword
().
length
()
<=
1
||
suggestKeywordInfo
.
getKeyword
().
length
()
<=
1
...
@@ -441,8 +442,8 @@ public class SuggestTask {
...
@@ -441,8 +442,8 @@ public class SuggestTask {
return
true
;
return
true
;
}
}
//
过滤掉纯数字的搜索词,原:过滤掉商品id,商品id是有7位数字组成
//
敏感词过滤
if
(
suggestKeywordInfo
.
get
Keyword
().
length
()
>
6
&&
StringUtils
.
isNumber
(
suggestKeywordInfo
.
getKeyword
()
))
{
if
(
suggestKeywordInfo
.
get
IsSensitive
(
))
{
return
true
;
return
true
;
}
}
...
@@ -453,6 +454,18 @@ public class SuggestTask {
...
@@ -453,6 +454,18 @@ public class SuggestTask {
}
}
}
}
// 品牌词 类目词 人工干预词 不做过滤
if
(
suggestKeywordInfo
.
getIsBrand
()
||
suggestKeywordInfo
.
getIsCategory
()
||
suggestKeywordInfo
.
getIsManual
())
{
suggestKeywordInfo
.
setIsSensitive
(
false
);
return
false
;
}
// 过滤掉纯数字的搜索词,原:过滤掉商品id,商品id是有7位数字组成
if
(
suggestKeywordInfo
.
getKeyword
().
length
()
>
6
&&
StringUtils
.
isNumber
(
suggestKeywordInfo
.
getKeyword
()))
{
return
true
;
}
// 年数据过滤
// 年数据过滤
if
(
suggestKeywordInfo
.
getYearCount
()
<
2
||
suggestKeywordInfo
.
getYearClickCount
()
<
2
)
{
if
(
suggestKeywordInfo
.
getYearCount
()
<
2
||
suggestKeywordInfo
.
getYearClickCount
()
<
2
)
{
return
true
;
return
true
;
...
...
suggest-task/src/main/profiles/prod/config.properties
View file @
8878b19d
# suggestTask
# suggestTask
suggestTask.prefixFilterList
=
["https://", "http://", "dg", "d & g", "dolce&gabbana","dolce & gabbana", "
\u
675C
\u5609\u
73ED
\u
7EB3", "
\u
907F
\u
5B55", "
\u
60C5
\u
8DA3", "cucci", "
\u
4E52
\u
4E53
\u7403
", "cuccl", "gucii","tod's","iwc7"]
suggestTask.prefixFilterList
=
["https://", "http://", "dg", "d & g", "dolce&gabbana","dolce & gabbana", "
杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球
", "cuccl", "gucii","tod's","iwc7"]
suggestTask.ManualFolder
=
/data/pssmaster/corpus_set/suggest_corpus/manual
suggestTask.ManualFolder
=
/data/pssmaster/corpus_set/suggest_corpus/manual
suggestTask.SensitiveFolder
=
/data/pssmaster/corpus_set/suggest_corpus/sensitive
suggestTask.SensitiveFolder
=
/data/pssmaster/corpus_set/suggest_corpus/sensitive
suggestTask.EuropeWordFolder
=
/data/pssmaster/corpus_set/suggest_corpus/europe_word
suggestTask.EuropeWordFolder
=
/data/pssmaster/corpus_set/suggest_corpus/europe_word
...
...
suggest-task/src/main/profiles/test/config.properties
View file @
8878b19d
# suggestTask
# suggestTask
suggestTask.prefixFilterList
=
["https://", "http://", "dg", "d & g", "dolce&gabbana","dolce & gabbana", "
\u
675C
\u5609\u
73ED
\u
7EB3", "
\u
907F
\u
5B55", "
\u
60C5
\u
8DA3", "cucci", "
\u
4E52
\u
4E53
\u7403
", "cuccl", "gucii","tod's","iwc7"]
suggestTask.prefixFilterList
=
["https://", "http://", "dg", "d & g", "dolce&gabbana","dolce & gabbana", "
杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球
", "cuccl", "gucii","tod's","iwc7"]
suggestTask.ManualFolder
=
/data/pssmaster/corpus_set/suggest_corpus/manual
suggestTask.ManualFolder
=
/data/pssmaster/corpus_set/suggest_corpus/manual
suggestTask.SensitiveFolder
=
/data/pssmaster/corpus_set/suggest_corpus/sensitive
suggestTask.SensitiveFolder
=
/data/pssmaster/corpus_set/suggest_corpus/sensitive
suggestTask.EuropeWordFolder
=
/data/pssmaster/corpus_set/suggest_corpus/europe_word
suggestTask.EuropeWordFolder
=
/data/pssmaster/corpus_set/suggest_corpus/europe_word
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment