Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
suggest-task
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
suggest-task
Commits
af4c8ed7
Commit
af4c8ed7
authored
Dec 14, 2020
by
zhaoyanchao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
更改品牌品类优先,避免误识别为无结果词
parent
ed50608a
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
27 additions
and
8 deletions
+27
-8
suggest-task.go
main/suggest-task.go
+27
-8
No files found.
main/suggest-task.go
View file @
af4c8ed7
...
...
@@ -70,6 +70,8 @@ var t2s, _ = gocc.New("t2s")
var
prefixFilterArr
=
[]
string
{
"https://"
,
"http://"
,
"dg"
,
"d & g"
,
"dolce&gabbana"
,
"dolce & gabbana"
,
"杜嘉班纳"
,
"避孕"
,
"情趣"
,
"cucci"
,
"乒乓球"
,
"cuccl"
,
"gucii"
,
"tod's"
,
"iwc7"
}
const
TABLE_SPLIT_STEP_SIZE
=
10000
const
MAX_TAG_SIZE
=
5
...
...
@@ -78,6 +80,7 @@ var UPDATE_TIME = time.Now().UnixNano() / 1e6
func
main
()
{
startTime
:=
time
.
Now
()
log
.
SetFlags
(
log
.
Lshortfile
|
log
.
LstdFlags
)
datawareDB
,
err
:=
sql
.
Open
(
"mysql"
,
RUN_ENV
.
DataWareDB
)
...
...
@@ -180,6 +183,7 @@ func cleanForEs(w *Word) {
func
addWord
(
w
*
Word
,
processor
*
elastic
.
BulkProcessor
)
{
processWord
(
w
)
if
!
isFilterWord
(
w
)
{
wordMap
.
Store
(
w
.
Keyword
,
w
)
cleanForEs
(
w
)
...
...
@@ -305,7 +309,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
results
,
err
:=
db
.
Query
(
sqlStr
)
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
log
.
Print
(
"read database success "
)
for
results
.
Next
()
{
...
...
@@ -472,25 +475,41 @@ func processWord(w *Word) {
func
isFilterWord
(
w
*
Word
)
bool
{
// 品牌词 类目词 人工干预词 不做过滤
if
w
.
IsBrand
||
w
.
IsCategory
||
w
.
IsManual
{
w
.
IsSensitive
=
false
return
false
}
// 敏感词过滤
if
w
.
IsSensitive
{
return
true
}
if
w
.
IsSensitive
{
return
true
}
// 过滤掉太长的词 每个中文字占3个byte
if
utf8
.
RuneCountInString
(
w
.
Keyword
)
<=
1
||
len
(
w
.
Keyword
)
>
50
{
return
true
}
if
utf8
.
RuneCountInString
(
w
.
Keyword
)
<=
1
||
len
(
w
.
Keyword
)
>
50
{
return
true
}
// 过滤掉商品id,商品id是有7位数字组成
if
len
(
w
.
Keyword
)
>
6
&&
isAllDigit
(
w
.
Keyword
)
{
return
true
}
if
len
(
w
.
Keyword
)
>
6
&&
isAllDigit
(
w
.
Keyword
)
{
return
true
}
// 前缀过滤
for
_
,
v
:=
range
prefixFilterArr
{
if
strings
.
HasPrefix
(
w
.
Keyword
,
v
)
{
return
true
}
if
strings
.
HasPrefix
(
w
.
Keyword
,
v
)
{
return
true
}
}
// 品牌词 类目词 人工干预词 不做过滤
if
w
.
IsBrand
||
w
.
IsCategory
||
w
.
IsManual
{
return
false
}
// 年数据过滤
if
w
.
YearCount
<
2
||
w
.
YearClickCount
<
2
{
return
true
}
if
w
.
YearCount
<
2
||
w
.
YearClickCount
<
2
{
return
true
}
// 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5
if
isHotSearchWord
(
w
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment