Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
suggest-task
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
suggest-task
Commits
6abf6e24
Unverified
Commit
6abf6e24
authored
Dec 14, 2020
by
David Star
Committed by
GitHub
Dec 14, 2020
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2 from yanchaosb123/rank_opt
敏感词放后
parents
bca1a519
af4c8ed7
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
71 additions
and
42 deletions
+71
-42
suggest-task.go
main/suggest-task.go
+53
-11
test.go
main/test.go
+18
-31
No files found.
main/suggest-task.go
View file @
6abf6e24
...
@@ -70,9 +70,9 @@ var t2s, _ = gocc.New("t2s")
...
@@ -70,9 +70,9 @@ var t2s, _ = gocc.New("t2s")
var
prefixFilterArr
=
[]
string
{
"https://"
,
"http://"
,
"dg"
,
"d & g"
,
"dolce&gabbana"
,
var
prefixFilterArr
=
[]
string
{
"https://"
,
"http://"
,
"dg"
,
"d & g"
,
"dolce&gabbana"
,
"dolce & gabbana"
,
"杜嘉班纳"
,
"避孕"
,
"情趣"
,
"cucci"
,
"乒乓球"
,
"cuccl"
,
"gucii"
,
"tod's"
,
"iwc7"
}
"dolce & gabbana"
,
"杜嘉班纳"
,
"避孕"
,
"情趣"
,
"cucci"
,
"乒乓球"
,
"cuccl"
,
"gucii"
,
"tod's"
,
"iwc7"
}
const
TABLE_SPLIT_STEP_SIZE
=
10000
const
LEVEL_SIZE
=
1
const
TABLE_SPLIT_STEP_SIZE
=
10000
const
MAX_TAG_SIZE
=
5
const
MAX_TAG_SIZE
=
5
...
@@ -80,6 +80,7 @@ var UPDATE_TIME = time.Now().UnixNano() / 1e6
...
@@ -80,6 +80,7 @@ var UPDATE_TIME = time.Now().UnixNano() / 1e6
func
main
()
{
func
main
()
{
startTime
:=
time
.
Now
()
startTime
:=
time
.
Now
()
log
.
SetFlags
(
log
.
Lshortfile
|
log
.
LstdFlags
)
datawareDB
,
err
:=
sql
.
Open
(
"mysql"
,
RUN_ENV
.
DataWareDB
)
datawareDB
,
err
:=
sql
.
Open
(
"mysql"
,
RUN_ENV
.
DataWareDB
)
...
@@ -117,7 +118,7 @@ func main() {
...
@@ -117,7 +118,7 @@ func main() {
count
:=
arr
[
1
]
/
TABLE_SPLIT_STEP_SIZE
count
:=
arr
[
1
]
/
TABLE_SPLIT_STEP_SIZE
log
.
Printf
(
"maxId/10000=%d
\n
"
,
count
)
log
.
Printf
(
"maxId/10000=%d
\n
"
,
count
)
if
arr
[
1
]
<
28
00000
{
if
arr
[
1
]
<
10
00000
{
log
.
Printf
(
"data is too little ,return"
)
log
.
Printf
(
"data is too little ,return"
)
sendSuggestNotify
()
sendSuggestNotify
()
return
return
...
@@ -182,6 +183,7 @@ func cleanForEs(w *Word) {
...
@@ -182,6 +183,7 @@ func cleanForEs(w *Word) {
func
addWord
(
w
*
Word
,
processor
*
elastic
.
BulkProcessor
)
{
func
addWord
(
w
*
Word
,
processor
*
elastic
.
BulkProcessor
)
{
processWord
(
w
)
processWord
(
w
)
if
!
isFilterWord
(
w
)
{
if
!
isFilterWord
(
w
)
{
wordMap
.
Store
(
w
.
Keyword
,
w
)
wordMap
.
Store
(
w
.
Keyword
,
w
)
cleanForEs
(
w
)
cleanForEs
(
w
)
...
@@ -307,7 +309,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
...
@@ -307,7 +309,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
results
,
err
:=
db
.
Query
(
sqlStr
)
results
,
err
:=
db
.
Query
(
sqlStr
)
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
log
.
Print
(
"read database success "
)
log
.
Print
(
"read database success "
)
for
results
.
Next
()
{
for
results
.
Next
()
{
...
@@ -467,30 +468,48 @@ func processWord(w *Word) {
...
@@ -467,30 +468,48 @@ func processWord(w *Word) {
calculateWordRank
(
w
)
calculateWordRank
(
w
)
calculateWordABRank
(
w
)
calculateWordABRank
(
w
)
addNewScoreIfNewHotWord
(
w
)
}
}
func
isFilterWord
(
w
*
Word
)
bool
{
func
isFilterWord
(
w
*
Word
)
bool
{
// 品牌词 类目词 人工干预词 不做过滤
if
w
.
IsBrand
||
w
.
IsCategory
||
w
.
IsManual
{
w
.
IsSensitive
=
false
return
false
}
// 敏感词过滤
// 敏感词过滤
if
w
.
IsSensitive
{
return
true
}
if
w
.
IsSensitive
{
return
true
}
// 过滤掉太长的词 每个中文字占3个byte
// 过滤掉太长的词 每个中文字占3个byte
if
utf8
.
RuneCountInString
(
w
.
Keyword
)
<=
1
||
len
(
w
.
Keyword
)
>
50
{
return
true
}
if
utf8
.
RuneCountInString
(
w
.
Keyword
)
<=
1
||
len
(
w
.
Keyword
)
>
50
{
return
true
}
// 过滤掉商品id,商品id是有7位数字组成
// 过滤掉商品id,商品id是有7位数字组成
if
len
(
w
.
Keyword
)
>
6
&&
isAllDigit
(
w
.
Keyword
)
{
return
true
}
if
len
(
w
.
Keyword
)
>
6
&&
isAllDigit
(
w
.
Keyword
)
{
return
true
}
// 前缀过滤
// 前缀过滤
for
_
,
v
:=
range
prefixFilterArr
{
for
_
,
v
:=
range
prefixFilterArr
{
if
strings
.
HasPrefix
(
w
.
Keyword
,
v
)
{
return
true
}
if
strings
.
HasPrefix
(
w
.
Keyword
,
v
)
{
return
true
}
}
}
// 品牌词 类目词 人工干预词 不做过滤
if
w
.
IsBrand
||
w
.
IsCategory
||
w
.
IsManual
{
return
false
}
// 年数据过滤
// 年数据过滤
if
w
.
YearCount
<
2
||
w
.
YearClickCount
<
2
{
return
true
}
if
w
.
YearCount
<
2
||
w
.
YearClickCount
<
2
{
return
true
}
// 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5
// 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5
if
isHotSearchWord
(
w
)
{
if
isHotSearchWord
(
w
)
{
...
@@ -502,6 +521,29 @@ func isFilterWord(w *Word) bool {
...
@@ -502,6 +521,29 @@ func isFilterWord(w *Word) bool {
}
}
}
}
func
addNewScoreIfNewHotWord
(
w
*
Word
)
{
if
w
==
nil
{
return
}
// 比例有意义
if
w
.
WeekCount
==
0
||
w
.
YearCount
==
0
||
w
.
WeekCount
<
20
{
return
}
// 周点击占年点击 40% 以上
if
w
.
WeekCount
*
10
/
w
.
YearCount
<=
5
{
return
}
if
w
.
WeekClickCount
<
3
||
w
.
WeekUv
<
5
{
return
}
// 新词加分大小 类似于 人工干预值
w
.
WordABRank
=
w
.
WordABRank
*
math
.
Sqrt
(
5.0
)
fmt
.
Printf
(
"最新热词添加分数,新词: %s"
,
w
.
Keyword
)
}
func
isAllDigit
(
str
string
)
bool
{
func
isAllDigit
(
str
string
)
bool
{
for
_
,
x
:=
range
str
{
for
_
,
x
:=
range
str
{
// x 的类型是 rune 其实就是对应字符的 utf8 编码
// x 的类型是 rune 其实就是对应字符的 utf8 编码
...
...
main/test.go
View file @
6abf6e24
package
main
package
main
import
(
import
"fmt"
"math"
"strings"
type
Phone
interface
{
"fmt"
call
()
)
type
B
struct
{
Keyword
string
`json:"keyword"`
KeywordPinYin
string
`json:"keywordPinYin"`
YearCount
int32
`json:"yearCount"`
YearClickCount
int32
`json:"yearClickCount"`
YearCartCount
int32
`json:"yearCartCount"`
ZhaoCount
int32
`json:"-"`
}
}
func
main
()
{
prefix
:=
strings
.
HasPrefix
(
"tod's"
,
"tod's"
)
type
NokiaPhone
struct
{
fmt
.
Print
(
prefix
)
Name
string
}
func
(
nokiaPhone
*
NokiaPhone
)
call
()
{
fmt
.
Print
(
nokiaPhone
.
Name
)
}
}
//
//func (nokiaPhone *NokiaPhone) call() {
// fmt.Print(nokiaPhone.Name)
//}
func
main
()
{
func
calculateRatioFactor2
(
ratio
float64
,
count
int32
)
float64
{
var
rank
float64
var
phone
=
NokiaPhone
{
Name
:
"zhangsan"
}
switch
{
phone
.
call
()
case
count
>
1
&&
count
<
10
:
rank
=
1.2
case
count
>=
10
&&
count
<
20
:
rank
=
1.4
case
count
>=
20
&&
count
<
50
:
rank
=
1.6
case
count
>=
50
&&
count
<
100
:
rank
=
1.8
case
count
>=
100
&&
count
<
200
:
rank
=
2.0
case
count
>=
200
&&
count
<
500
:
rank
=
2.2
case
count
>=
500
:
rank
=
2.5
default
:
rank
=
1.0
}
//根据搜索转化率,转换为热度因子
return
math
.
Log10
(
math
.
Sqrt
(
ratio
+
10
))
*
rank
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment