Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
suggest-task
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
suggest-task
Commits
bca1a519
Unverified
Commit
bca1a519
authored
Oct 14, 2020
by
David Star
Committed by
GitHub
Oct 14, 2020
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1 from yanchaosb123/rank_opt
算分优化
parents
a59ff1f5
33b7d636
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
302 additions
and
49 deletions
+302
-49
suggest-task-dependency.go
main/suggest-task-dependency.go
+77
-7
suggest-task.go
main/suggest-task.go
+203
-30
test.go
main/test.go
+22
-12
No files found.
main/suggest-task-dependency.go
View file @
bca1a519
package
main
package
main
import
(
import
(
"strings"
"net/http"
"time"
"encoding/json"
"bytes"
"io/ioutil"
"container/list"
"strings"
"github.com/mozillazg/go-pinyin"
"github.com/mozillazg/go-pinyin"
"fmt"
"strconv"
"strconv"
"fmt"
)
)
type
ENV
struct
{
type
ENV
struct
{
...
@@ -16,6 +22,13 @@ type ENV struct {
...
@@ -16,6 +22,13 @@ type ENV struct {
ManualFolder
string
ManualFolder
string
SensitiveFolder
string
SensitiveFolder
string
}
}
type
Message
struct
{
Title
string
Phones
*
list
.
List
Body
*
list
.
List
}
var
test_env
=
&
ENV
{
var
test_env
=
&
ENV
{
DataWareDB
:
"DataWarehouse_test:FihdZW7o1XKtDETZexOG@tcp(test01-secooDataWarehouse.master.com:3306)/secooDataWarehouse"
,
DataWareDB
:
"DataWarehouse_test:FihdZW7o1XKtDETZexOG@tcp(test01-secooDataWarehouse.master.com:3306)/secooDataWarehouse"
,
ErpDB
:
"3306_test:iS6CXpYqgZ8Mhjui@tcp(10.4.3.223:3306)/secooErpDB"
,
ErpDB
:
"3306_test:iS6CXpYqgZ8Mhjui@tcp(10.4.3.223:3306)/secooErpDB"
,
...
@@ -37,12 +50,28 @@ var prod_env = &ENV {
...
@@ -37,12 +50,28 @@ var prod_env = &ENV {
SensitiveFolder
:
"/data/pssmaster/corpus_set/suggest_corpus/sensitive"
}
SensitiveFolder
:
"/data/pssmaster/corpus_set/suggest_corpus/sensitive"
}
// 重要,该参数 确定是 正式还是 测试环境
// 重要,该参数 确定是 正式还是 测试环境
var
RUN_ENV
=
test
_env
var
RUN_ENV
=
prod
_env
/************************* 下面是 util 方法 *****************************/
/************************* 下面是 util 方法 *****************************/
var
CH_EN_PUNC
=
map
[
string
]
string
{
","
:
","
,
"。"
:
"."
,
"!"
:
"!"
,
"?"
:
"?"
,
"【"
:
"["
,
"】"
:
"]"
,
"("
:
"("
,
")"
:
")"
,
"‘"
:
"'"
,
"’"
:
"'"
,
"“"
:
"
\"
"
,
"”"
:
"
\"
"
,
}
func
convertToPinyin
(
str
string
)
string
{
func
convertToPinyin
(
str
string
)
string
{
var
ret
string
var
ret
string
for
_
,
v
:=
range
str
{
for
_
,
v
:=
range
str
{
...
@@ -62,7 +91,7 @@ func convertToPinyin(str string) string {
...
@@ -62,7 +91,7 @@ func convertToPinyin(str string) string {
func
cleanKeyword
(
keyword
string
)
string
{
func
cleanKeyword
(
keyword
string
)
string
{
out
,
err
:=
t2s
.
Convert
(
keyword
)
out
,
err
:=
t2s
.
Convert
(
keyword
)
if
err
!=
nil
{
fmt
.
Println
(
err
)
}
if
err
!=
nil
{
fmt
.
Println
(
err
)
}
keyword
=
strings
.
ToLower
(
strings
.
Trim
(
DBC2SBC
(
strings
.
TrimSpace
(
out
)),
"
\ufffc
|,"
))
keyword
=
strings
.
ToLower
(
strings
.
Trim
(
DBC2SBC
(
strings
.
TrimSpace
(
out
)),
"
\ufffc
|,
|.
"
))
return
strings
.
Join
(
strings
.
Fields
(
keyword
),
" "
)
return
strings
.
Join
(
strings
.
Fields
(
keyword
),
" "
)
}
}
...
@@ -78,16 +107,58 @@ func DBC2SBC(s string) string {
...
@@ -78,16 +107,58 @@ func DBC2SBC(s string) string {
var
strLst
[]
string
var
strLst
[]
string
for
_
,
i
:=
range
s
{
for
_
,
i
:=
range
s
{
insideCode
:=
i
insideCode
:=
i
if
insideCode
==
12288
{
if
insideCode
==
12288
{
insideCode
=
32
insideCode
=
32
}
else
{
}
else
{
insideCode
-=
65248
insideCode
-=
65248
}
}
if
insideCode
<
32
||
insideCode
>
126
{
if
key
,
exist
:=
CH_EN_PUNC
[
string
(
i
)];
exist
{
strLst
=
append
(
strLst
,
key
)
}
else
if
insideCode
<
32
||
insideCode
>
126
{
strLst
=
append
(
strLst
,
string
(
i
))
strLst
=
append
(
strLst
,
string
(
i
))
}
else
{
}
else
{
strLst
=
append
(
strLst
,
string
(
insideCode
))
strLst
=
append
(
strLst
,
string
(
insideCode
))
}
}
}
}
return
strings
.
Join
(
strLst
,
""
)
return
strings
.
Join
(
strLst
,
""
)
}
}
\ No newline at end of file
// 发送POST请求
// url: 请求地址
// data: POST请求提交的数据
// contentType: 请求体格式,如:application/json
func
Post
(
url
string
,
data
interface
{},
contentType
string
)
string
{
// 超时时间:5秒
client
:=
&
http
.
Client
{
Timeout
:
5
*
time
.
Second
}
jsonStr
,
_
:=
json
.
Marshal
(
data
)
resp
,
err
:=
client
.
Post
(
url
,
contentType
,
bytes
.
NewBuffer
(
jsonStr
))
if
err
!=
nil
{
panic
(
err
)
}
defer
resp
.
Body
.
Close
()
result
,
_
:=
ioutil
.
ReadAll
(
resp
.
Body
)
return
string
(
result
)
}
func
sendSuggestNotify
()
{
msg
:=
Message
{
Title
:
"提示词数据异常"
,
}
body
:=
list
.
New
()
body
.
PushBack
(
"提示词数据太少"
)
msg
.
Body
=
body
phones
:=
list
.
New
()
phones
.
PushBack
(
"17621863255,13894895183"
)
msg
.
Phones
=
phones
Post
(
"http://matrix-inform.secoolocal.com/user/sendToUser"
,
msg
,
"application/json"
)
}
main/suggest-task.go
View file @
bca1a519
...
@@ -3,8 +3,7 @@ package main
...
@@ -3,8 +3,7 @@ package main
import
(
import
(
"bufio"
"bufio"
"context"
"context"
"crypto/md5"
"database/sql"
"database/sql"
"fmt"
"fmt"
_
"github.com/go-sql-driver/mysql"
_
"github.com/go-sql-driver/mysql"
"github.com/liuzl/gocc"
"github.com/liuzl/gocc"
...
@@ -19,7 +18,9 @@ import (
...
@@ -19,7 +18,9 @@ import (
"time"
"time"
"unicode"
"unicode"
"unicode/utf8"
"unicode/utf8"
)
"crypto/md5"
)
type
Word
struct
{
type
Word
struct
{
Keyword
string
`json:"keyword"`
Keyword
string
`json:"keyword"`
...
@@ -42,6 +43,18 @@ type Word struct {
...
@@ -42,6 +43,18 @@ type Word struct {
WordRank
float64
`json:"wordRank"`
WordRank
float64
`json:"wordRank"`
WordABRank
float64
`json:"wordABRank"`
WordABRank
float64
`json:"wordABRank"`
KeywordVersion
string
`json:"keywordVersion"`
KeywordVersion
string
`json:"keywordVersion"`
WeekUv
int32
`json:"-"`
WeekClickUv
int32
`json:"-"`
WeekAddCartUv
int32
`json:"-"`
MonthPv
int32
`json:"-"`
MonthClickCount
int32
`json:"-"`
MonthAddCartCount
int32
`json:"-"`
MonthUv
int32
`json:"-"`
MonthProductClickUv
int32
`json:"-"`
MonthAddCartUv
int32
`json:"-"`
SuggestTags
string
`json:"suggestTags"`
UpdateTime
int64
`json:"updateTime"`
}
}
var
wordMap
sync
.
Map
var
wordMap
sync
.
Map
...
@@ -55,18 +68,25 @@ var dateStr = fmt.Sprintf("%d-%02d-%02d",now.Year(),now.Month(),now.Day())
...
@@ -55,18 +68,25 @@ var dateStr = fmt.Sprintf("%d-%02d-%02d",now.Year(),now.Month(),now.Day())
var
t2s
,
_
=
gocc
.
New
(
"t2s"
)
var
t2s
,
_
=
gocc
.
New
(
"t2s"
)
var
prefixFilterArr
=
[]
string
{
"https://"
,
"http://"
,
"dg"
,
"d & g"
,
"dolce&gabbana"
,
var
prefixFilterArr
=
[]
string
{
"https://"
,
"http://"
,
"dg"
,
"d & g"
,
"dolce&gabbana"
,
"dolce & gabbana"
,
"杜嘉班纳"
,
"避孕"
,
"情趣"
,
"cucci"
,
"乒乓球"
,
"cuccl"
,
"gucii"
}
"dolce & gabbana"
,
"杜嘉班纳"
,
"避孕"
,
"情趣"
,
"cucci"
,
"乒乓球"
,
"cuccl"
,
"gucii"
,
"tod's"
,
"iwc7"
}
const
TABLE_SPLIT_STEP_SIZE
=
10000
const
TABLE_SPLIT_STEP_SIZE
=
10000
const
LEVEL_SIZE
=
1
const
MAX_TAG_SIZE
=
5
var
UPDATE_TIME
=
time
.
Now
()
.
UnixNano
()
/
1e6
func
main
()
{
func
main
()
{
startTime
:=
time
.
Now
()
startTime
:=
time
.
Now
()
datawareDB
,
err
:=
sql
.
Open
(
"mysql"
,
RUN_ENV
.
DataWareDB
)
datawareDB
,
err
:=
sql
.
Open
(
"mysql"
,
RUN_ENV
.
DataWareDB
)
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
datawareDB
.
SetConnMaxLifetime
(
10
*
time
.
Minute
)
datawareDB
.
SetConnMaxLifetime
(
10
*
time
.
Minute
)
datawareDB
.
SetMaxOpenConns
(
50
)
datawareDB
.
SetMaxOpenConns
(
3
50
)
datawareDB
.
SetMaxIdleConns
(
5
0
)
datawareDB
.
SetMaxIdleConns
(
10
0
)
var
client
*
elastic
.
Client
var
client
*
elastic
.
Client
if
RUN_ENV
.
EsUser
!=
""
{
if
RUN_ENV
.
EsUser
!=
""
{
...
@@ -78,8 +98,11 @@ func main() {
...
@@ -78,8 +98,11 @@ func main() {
bulkProcessor
,
err
:=
elastic
.
NewBulkProcessorService
(
client
)
.
bulkProcessor
,
err
:=
elastic
.
NewBulkProcessorService
(
client
)
.
Workers
(
50
)
.
Workers
(
50
)
.
BulkActions
(
5
000
)
.
BulkActions
(
2
000
)
.
FlushInterval
(
500
*
time
.
Millisecond
)
.
FlushInterval
(
500
*
time
.
Millisecond
)
.
Backoff
(
elastic
.
NewExponentialBackoff
(
time
.
Duration
(
10000
)
*
time
.
Millisecond
,
time
.
Duration
(
100000
)
*
time
.
Millisecond
)
)
.
After
(
after
)
.
After
(
after
)
.
Do
(
context
.
Background
())
Do
(
context
.
Background
())
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
...
@@ -94,12 +117,21 @@ func main() {
...
@@ -94,12 +117,21 @@ func main() {
count
:=
arr
[
1
]
/
TABLE_SPLIT_STEP_SIZE
count
:=
arr
[
1
]
/
TABLE_SPLIT_STEP_SIZE
log
.
Printf
(
"maxId/10000=%d
\n
"
,
count
)
log
.
Printf
(
"maxId/10000=%d
\n
"
,
count
)
for
i
:=
0
;
i
<=
count
;
i
++
{
if
arr
[
1
]
<
2800000
{
go
queryIndex
(
i
*
TABLE_SPLIT_STEP_SIZE
,
datawareDB
,
bulkProcessor
,
&
wg
)
log
.
Printf
(
"data is too little ,return"
)
sendSuggestNotify
()
return
}
for
j
:=
0
;
j
<
count
;
j
++
{
wg
.
Add
(
1
)
go
queryIndex
(
j
*
TABLE_SPLIT_STEP_SIZE
,
datawareDB
,
bulkProcessor
,
&
wg
)
}
}
wg
.
Wait
()
wg
.
Wait
()
fmt
.
Println
(
"all thread has read maps"
)
fmt
.
Println
(
"all thread has read maps"
)
checkUnusedData
(
bulkProcessor
)
checkUnusedData
(
bulkProcessor
)
err
=
bulkProcessor
.
Flush
()
err
=
bulkProcessor
.
Flush
()
...
@@ -130,10 +162,29 @@ func checkUnusedData(bulkProcessor *elastic.BulkProcessor) {
...
@@ -130,10 +162,29 @@ func checkUnusedData(bulkProcessor *elastic.BulkProcessor) {
}
}
}
}
/** 写入 es 前做下字段清理 */
func
cleanForEs
(
w
*
Word
)
{
if
w
.
SuggestTags
==
"null"
||
w
.
SuggestTags
==
"NULL"
{
w
.
SuggestTags
=
""
return
}
var
arr
=
strings
.
Split
(
w
.
SuggestTags
,
","
)
var
s
=
""
for
i
,
leng
:=
0
,
len
(
arr
);
i
<
MAX_TAG_SIZE
&&
i
<
leng
;
i
++
{
if
i
==
MAX_TAG_SIZE
-
1
||
i
==
leng
-
1
{
s
=
s
+
arr
[
i
]
}
else
{
s
=
s
+
arr
[
i
]
+
","
}
}
w
.
SuggestTags
=
s
}
func
addWord
(
w
*
Word
,
processor
*
elastic
.
BulkProcessor
)
{
func
addWord
(
w
*
Word
,
processor
*
elastic
.
BulkProcessor
)
{
processWord
(
w
)
processWord
(
w
)
if
!
isFilterWord
(
w
)
{
if
!
isFilterWord
(
w
)
{
wordMap
.
Store
(
w
.
Keyword
,
w
)
wordMap
.
Store
(
w
.
Keyword
,
w
)
cleanForEs
(
w
)
id
:=
fmt
.
Sprintf
(
"%x"
,
md5
.
Sum
([]
byte
(
w
.
Keyword
)))
id
:=
fmt
.
Sprintf
(
"%x"
,
md5
.
Sum
([]
byte
(
w
.
Keyword
)))
req
:=
elastic
.
NewBulkIndexRequest
()
.
req
:=
elastic
.
NewBulkIndexRequest
()
.
Index
(
"search_suggest_index"
)
.
Index
(
"search_suggest_index"
)
.
...
@@ -149,17 +200,25 @@ func loadErpDB() {
...
@@ -149,17 +200,25 @@ func loadErpDB() {
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
defer
db
.
Close
()
defer
db
.
Close
()
var
brandQuery
=
fmt
.
Sprintf
(
"select id,en_name,ch_name from secooErpDB.t_product_brand where is_del = 0 and enabled = 1"
)
var
brandQuery
=
fmt
.
Sprintf
(
"select id,en_name,ch_name
,short_name,nickname
from secooErpDB.t_product_brand where is_del = 0 and enabled = 1"
)
brandResults
,
err
:=
db
.
Query
(
brandQuery
)
brandResults
,
err
:=
db
.
Query
(
brandQuery
)
if
err
!=
nil
{
panic
(
err
.
Error
())
}
if
err
!=
nil
{
panic
(
err
.
Error
())
}
for
brandResults
.
Next
()
{
for
brandResults
.
Next
()
{
var
id
int
var
id
int
var
enName
string
var
enName
string
var
chName
string
var
chName
string
err
=
brandResults
.
Scan
(
&
id
,
&
enName
,
&
chName
)
var
shortName
sql
.
NullString
var
nickName
sql
.
NullString
err
=
brandResults
.
Scan
(
&
id
,
&
enName
,
&
chName
,
&
shortName
,
&
nickName
)
if
err
!=
nil
{
panic
(
err
.
Error
())
}
if
err
!=
nil
{
panic
(
err
.
Error
())
}
brandMap
[
cleanKeyword
(
enName
)]
=
id
brandMap
[
cleanKeyword
(
enName
)]
=
id
brandMap
[
cleanKeyword
(
chName
)]
=
id
brandMap
[
cleanKeyword
(
chName
)]
=
id
if
_
,
exist
:=
brandMap
[
cleanKeyword
(
shortName
.
String
)];
!
exist
{
brandMap
[
cleanKeyword
(
shortName
.
String
)]
=
id
}
if
_
,
exist
:=
brandMap
[
cleanKeyword
(
nickName
.
String
)];
!
exist
{
brandMap
[
cleanKeyword
(
nickName
.
String
)]
=
id
}
}
}
fmt
.
Println
(
"brandMap size is :"
,
len
(
brandMap
),
", brandMap is "
,
brandMap
)
fmt
.
Println
(
"brandMap size is :"
,
len
(
brandMap
),
", brandMap is "
,
brandMap
)
...
@@ -236,13 +295,21 @@ func queryInfo(db *sql.DB) []int {
...
@@ -236,13 +295,21 @@ func queryInfo(db *sql.DB) []int {
}
}
func
queryIndex
(
idFlag
int
,
db
*
sql
.
DB
,
bulkProcessor
*
elastic
.
BulkProcessor
,
wg
*
sync
.
WaitGroup
)
{
func
queryIndex
(
idFlag
int
,
db
*
sql
.
DB
,
bulkProcessor
*
elastic
.
BulkProcessor
,
wg
*
sync
.
WaitGroup
)
{
wg
.
Add
(
1
)
// 循环时可能查询到重复数据,应该以id 的上下界来查询
// 循环时可能查询到重复数据,应该以id 的上下界来查询
var
sqlStr
=
fmt
.
Sprintf
(
"select id, keyword, year_pv, year_product_click_count, year_add_cart_count, "
+
var
sqlStr
=
fmt
.
Sprintf
(
"select id, keyword, year_pv, year_product_click_count, year_add_cart_count, "
+
"week_pv, week_product_click_count, week_add_cart_count, p_day from app_search_keyword_year_week_p_day where id >= %d and id < %d"
,
idFlag
,
idFlag
+
TABLE_SPLIT_STEP_SIZE
)
"week_pv, week_product_click_count, week_add_cart_count, p_day, "
+
"week_uv, week_product_click_uv, week_add_cart_uv, "
+
"month_pv, month_product_click_count, month_add_cart_count, month_uv, month_product_click_uv, month_add_cart_uv, prepare_tags "
+
"from app_search_keyword_year_week_p_day where id >= %d and id < %d"
,
idFlag
,
idFlag
+
TABLE_SPLIT_STEP_SIZE
)
log
.
Print
(
sqlStr
)
results
,
err
:=
db
.
Query
(
sqlStr
)
results
,
err
:=
db
.
Query
(
sqlStr
)
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
log
.
Print
(
"read database success "
)
for
results
.
Next
()
{
for
results
.
Next
()
{
var
id
int
var
id
int
var
keyword
sql
.
NullString
var
keyword
sql
.
NullString
...
@@ -253,8 +320,37 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
...
@@ -253,8 +320,37 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
var
weekProductClickCount
sql
.
NullInt64
var
weekProductClickCount
sql
.
NullInt64
var
weekAddCartCount
sql
.
NullInt64
var
weekAddCartCount
sql
.
NullInt64
var
pDay
string
var
pDay
string
var
weekUv
sql
.
NullInt64
var
weekClickUv
sql
.
NullInt64
var
weekAddCartUv
sql
.
NullInt64
var
monthPv
sql
.
NullInt64
var
monthClickCount
sql
.
NullInt64
var
monthAddCartCount
sql
.
NullInt64
var
monthUv
sql
.
NullInt64
var
monthProductClickUv
sql
.
NullInt64
var
monthAddCartUv
sql
.
NullInt64
var
prepareTags
sql
.
NullString
err
=
results
.
Scan
(
&
id
,
&
keyword
,
&
yearPv
,
&
yearProductClickCount
,
&
yearAddCartCount
,
&
weekPv
,
&
weekProductClickCount
,
&
weekAddCartCount
,
&
pDay
,
&
weekUv
,
&
weekClickUv
,
&
weekAddCartUv
,
&
monthPv
,
&
monthClickCount
,
&
monthAddCartCount
,
&
monthUv
,
&
monthProductClickUv
,
&
monthAddCartUv
,
&
prepareTags
)
err
=
results
.
Scan
(
&
id
,
&
keyword
,
&
yearPv
,
&
yearProductClickCount
,
&
yearAddCartCount
,
&
weekPv
,
&
weekProductClickCount
,
&
weekAddCartCount
,
&
pDay
)
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
if
keyword
.
Valid
&&
len
(
keyword
.
String
)
>
0
&&
keyword
.
String
!=
""
{
if
keyword
.
Valid
&&
len
(
keyword
.
String
)
>
0
&&
keyword
.
String
!=
""
{
...
@@ -268,7 +364,18 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
...
@@ -268,7 +364,18 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
WeekCount
:
int32
(
weekPv
.
Int64
),
WeekCount
:
int32
(
weekPv
.
Int64
),
WeekClickCount
:
int32
(
weekProductClickCount
.
Int64
),
WeekClickCount
:
int32
(
weekProductClickCount
.
Int64
),
WeekCartCount
:
int32
(
weekAddCartCount
.
Int64
),
WeekCartCount
:
int32
(
weekAddCartCount
.
Int64
),
KeywordVersion
:
pDay
}
KeywordVersion
:
pDay
,
WeekUv
:
int32
(
weekUv
.
Int64
),
WeekClickUv
:
int32
(
weekClickUv
.
Int64
),
WeekAddCartUv
:
int32
(
weekAddCartUv
.
Int64
),
MonthPv
:
int32
(
monthPv
.
Int64
),
MonthClickCount
:
int32
(
monthClickCount
.
Int64
),
MonthAddCartCount
:
int32
(
monthAddCartCount
.
Int64
),
MonthUv
:
int32
(
monthUv
.
Int64
),
MonthProductClickUv
:
int32
(
monthProductClickUv
.
Int64
),
MonthAddCartUv
:
int32
(
monthAddCartUv
.
Int64
)
,
SuggestTags
:
prepareTags
.
String
,
UpdateTime
:
UPDATE_TIME
}
if
v
,
isExist
:=
wordMap
.
Load
(
key
);
isExist
{
if
v
,
isExist
:=
wordMap
.
Load
(
key
);
isExist
{
merge
(
w
,
v
)
merge
(
w
,
v
)
...
@@ -296,6 +403,19 @@ func merge(word *Word, v interface{}) {
...
@@ -296,6 +403,19 @@ func merge(word *Word, v interface{}) {
word
.
WeekCount
+=
t
.
WeekCount
word
.
WeekCount
+=
t
.
WeekCount
word
.
WeekCartCount
+=
t
.
WeekCartCount
word
.
WeekCartCount
+=
t
.
WeekCartCount
word
.
WeekClickCount
+=
t
.
WeekClickCount
word
.
WeekClickCount
+=
t
.
WeekClickCount
word
.
WeekUv
+=
t
.
WeekUv
word
.
WeekClickUv
+=
t
.
WeekClickUv
word
.
WeekAddCartUv
+=
t
.
WeekAddCartUv
word
.
MonthPv
+=
t
.
MonthPv
word
.
MonthClickCount
+=
t
.
MonthClickCount
word
.
MonthAddCartCount
+=
t
.
MonthAddCartCount
word
.
MonthUv
+=
t
.
MonthUv
word
.
MonthProductClickUv
+=
t
.
MonthProductClickUv
word
.
MonthAddCartUv
+=
t
.
MonthAddCartUv
if
len
(
word
.
SuggestTags
)
==
0
||
"null"
==
word
.
SuggestTags
||
"NULL"
==
word
.
SuggestTags
{
word
.
SuggestTags
=
t
.
SuggestTags
}
}
}
func
after
(
executionId
int64
,
requests
[]
elastic
.
BulkableRequest
,
response
*
elastic
.
BulkResponse
,
err
error
)
{
func
after
(
executionId
int64
,
requests
[]
elastic
.
BulkableRequest
,
response
*
elastic
.
BulkResponse
,
err
error
)
{
...
@@ -304,22 +424,26 @@ func after(executionId int64, requests []elastic.BulkableRequest, response *elas
...
@@ -304,22 +424,26 @@ func after(executionId int64, requests []elastic.BulkableRequest, response *elas
func
processWord
(
w
*
Word
)
{
func
processWord
(
w
*
Word
)
{
w
.
KeywordPinYin
=
convertToPinyin
(
w
.
Keyword
)
w
.
KeywordPinYin
=
convertToPinyin
(
w
.
Keyword
)
// 年点击加购率
w
.
YearClickRatio
=
calculateRatio
(
w
.
YearClickCount
,
w
.
YearCount
)
w
.
YearClickRatio
=
calculateRatio
(
w
.
YearClickCount
,
w
.
YearCount
)
w
.
YearCartRatio
=
calculateRatio
(
w
.
YearCartCount
,
w
.
YearCount
)
w
.
YearCartRatio
=
calculateRatio
(
w
.
YearCartCount
,
w
.
YearCount
)
// 周点击加购率
w
.
WeekClickRatio
=
calculateRatio
(
w
.
WeekClickCount
,
w
.
WeekCount
)
w
.
WeekClickRatio
=
calculateRatio
(
w
.
WeekClickCount
,
w
.
WeekCount
)
w
.
WeekCartRatio
=
calculateRatio
(
w
.
WeekCartCount
,
w
.
WeekCount
)
w
.
WeekCartRatio
=
calculateRatio
(
w
.
WeekCartCount
,
w
.
WeekCount
)
//
非默认值,
加权
//
年加购率 再
加权
if
w
.
YearCount
!=
0
&&
w
.
YearCartCount
!=
0
{
if
w
.
YearCount
!=
0
&&
w
.
YearCartCount
!=
0
{
w
.
YearCartRatio
*=
3
w
.
YearCartRatio
*=
3
}
}
//
非默认值,
加权
//
周加购率 再
加权
if
w
.
WeekCount
!=
0
&&
w
.
WeekCartCount
!=
0
{
if
w
.
WeekCount
!=
0
&&
w
.
WeekCartCount
!=
0
{
w
.
WeekCartRatio
*=
3
w
.
WeekCartRatio
*=
3
}
}
//
非默认值,
加权
//
周点击率 再
加权
if
w
.
WeekCount
!=
0
&&
w
.
WeekClickCount
!=
0
{
if
w
.
WeekCount
!=
0
&&
w
.
WeekClickCount
!=
0
{
w
.
WeekClickRatio
*=
2
w
.
WeekClickRatio
*=
2
}
}
...
@@ -352,22 +476,22 @@ func isFilterWord(w *Word) bool {
...
@@ -352,22 +476,22 @@ func isFilterWord(w *Word) bool {
if
w
.
IsSensitive
{
return
true
}
if
w
.
IsSensitive
{
return
true
}
// 过滤掉太长的词 每个中文字占3个byte
// 过滤掉太长的词 每个中文字占3个byte
if
utf8
.
RuneCountInString
(
w
.
Keyword
)
<=
1
||
len
(
w
.
Keyword
)
>
6
0
{
return
true
}
if
utf8
.
RuneCountInString
(
w
.
Keyword
)
<=
1
||
len
(
w
.
Keyword
)
>
5
0
{
return
true
}
// 过滤掉商品id,商品id是有7位数字组成
// 过滤掉商品id,商品id是有7位数字组成
if
len
(
w
.
Keyword
)
>
6
&&
isAllDigit
(
w
.
Keyword
)
{
return
true
}
if
len
(
w
.
Keyword
)
>
6
&&
isAllDigit
(
w
.
Keyword
)
{
return
true
}
// 品牌词 类目词 人工干预词 不做过滤
if
w
.
IsBrand
||
w
.
IsCategory
||
w
.
IsManual
{
return
false
}
// 年数据过滤
if
w
.
YearCount
==
0
||
w
.
YearClickCount
==
0
{
return
true
}
// 前缀过滤
// 前缀过滤
for
_
,
v
:=
range
prefixFilterArr
{
for
_
,
v
:=
range
prefixFilterArr
{
if
strings
.
HasPrefix
(
w
.
Keyword
,
v
)
{
return
true
}
if
strings
.
HasPrefix
(
w
.
Keyword
,
v
)
{
return
true
}
}
}
// 品牌词 类目词 人工干预词 不做过滤
if
w
.
IsBrand
||
w
.
IsCategory
||
w
.
IsManual
{
return
false
}
// 年数据过滤
if
w
.
YearCount
<
2
||
w
.
YearClickCount
<
2
{
return
true
}
// 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5
// 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5
if
isHotSearchWord
(
w
)
{
if
isHotSearchWord
(
w
)
{
// 搜索次数比较多 转化率或者点击率较高的 不过滤
// 搜索次数比较多 转化率或者点击率较高的 不过滤
...
@@ -405,14 +529,22 @@ func calculateRatio(numerator int32, denominator int32) float64 {
...
@@ -405,14 +529,22 @@ func calculateRatio(numerator int32, denominator int32) float64 {
}
}
func
calculateWordRank
(
w
*
Word
)
{
func
calculateWordRank
(
w
*
Word
)
{
wordRank
:=
10000.0
wordRank
:=
10000.0
// 长度因子
wordRank
+=
3000
*
calculateLengthFactor
(
len
(
w
.
Keyword
))
wordRank
+=
3000
*
calculateLengthFactor
(
len
(
w
.
Keyword
))
// 年数量因子
wordRank
+=
2000
*
calculateCountFactor
(
w
.
YearCount
,
1
)
wordRank
+=
2000
*
calculateCountFactor
(
w
.
YearCount
,
1
)
// 周数量因子
wordRank
+=
2000
*
calculateCountFactor
(
w
.
WeekCount
,
52
)
wordRank
+=
2000
*
calculateCountFactor
(
w
.
WeekCount
,
52
)
// 年点击率因子
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
YearClickRatio
,
w
.
YearClickCount
)
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
YearClickRatio
,
w
.
YearClickCount
)
// 周点击率因子
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
WeekClickRatio
,
w
.
WeekClickCount
)
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
WeekClickRatio
,
w
.
WeekClickCount
)
// 年加购率因子
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
YearCartRatio
,
w
.
YearCartCount
)
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
YearCartRatio
,
w
.
YearCartCount
)
// 周加购率因子
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
WeekCartRatio
,
w
.
WeekCartCount
)
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
WeekCartRatio
,
w
.
WeekCartCount
)
if
w
.
IsBrand
{
wordRank
*=
1.8
}
if
w
.
IsBrand
{
wordRank
*=
1.8
}
if
w
.
IsCategory
{
wordRank
*=
1.2
}
if
w
.
IsCategory
{
wordRank
*=
1.2
}
...
@@ -421,19 +553,60 @@ func calculateWordRank(w *Word) {
...
@@ -421,19 +553,60 @@ func calculateWordRank(w *Word) {
}
}
func
calculateWordABRank
(
w
*
Word
)
{
func
calculateWordABRank
(
w
*
Word
)
{
// 月点击加购率
monthClickRatio
:=
calculateRatio
(
w
.
MonthProductClickUv
,
w
.
MonthUv
)
monthCartRatio
:=
calculateRatio
(
w
.
MonthAddCartUv
,
w
.
MonthUv
)
// 周点击加购率(和A相比, count 换成了uv)
weekClickRatioNew
:=
calculateRatio
(
w
.
WeekClickUv
,
w
.
WeekUv
)
weekCartRatioNew
:=
calculateRatio
(
w
.
WeekAddCartUv
,
w
.
WeekUv
)
// 月点击
if
w
.
MonthProductClickUv
!=
0
&&
w
.
MonthUv
!=
0
{
monthClickRatio
*=
1.5
}
// 月加购,加权
if
w
.
MonthAddCartUv
!=
0
&&
w
.
MonthUv
!=
0
{
monthCartRatio
*=
3
}
// 周点击,加权
if
w
.
WeekClickUv
!=
0
&&
w
.
WeekUv
!=
0
{
weekClickRatioNew
*=
2
}
// 周加购,加权
if
w
.
WeekAddCartUv
!=
0
&&
w
.
WeekUv
!=
0
{
weekCartRatioNew
*=
3
}
wordABRank
:=
10000.0
wordABRank
:=
10000.0
// 长度因子
wordABRank
+=
3000
*
calculateLengthFactor
(
len
(
w
.
Keyword
))
wordABRank
+=
3000
*
calculateLengthFactor
(
len
(
w
.
Keyword
))
// 月数量因子
wordABRank
+=
2000
*
calculateCountFactor
(
w
.
MonthUv
,
4
)
// 周数量因子
wordABRank
+=
2000
*
calculateCountFactor
(
w
.
WeekUv
,
52
)
// 年数量因子
wordABRank
+=
2000
*
calculateCountFactor
(
w
.
YearCount
,
1
)
wordABRank
+=
2000
*
calculateCountFactor
(
w
.
YearCount
,
1
)
wordABRank
+=
2000
*
calculateCountFactor
(
w
.
WeekCount
,
52
)
// 点击
// 点击
// 年点击改为 2000
// 月点击率因子
wordABRank
+=
2000
*
calculateRatioFactor
(
w
.
YearClickRatio
,
w
.
YearClickCount
)
wordABRank
+=
3000
*
calculateRatioFactor
(
monthClickRatio
,
w
.
MonthProductClickUv
)
wordABRank
+=
3000
*
calculateRatioFactor
(
w
.
WeekClickRatio
,
w
.
WeekClickCount
)
// 周点击率因子
wordABRank
+=
3000
*
calculateRatioFactor
(
weekClickRatioNew
,
w
.
WeekUv
)
// 加购
// 加购
// 年加购率因子
wordABRank
+=
3000
*
calculateRatioFactor
(
w
.
YearCartRatio
,
w
.
YearCartCount
)
wordABRank
+=
3000
*
calculateRatioFactor
(
w
.
YearCartRatio
,
w
.
YearCartCount
)
wordABRank
+=
3000
*
calculateRatioFactor
(
w
.
WeekCartRatio
,
w
.
WeekCartCount
)
// 月加购率因子
wordABRank
+=
3000
*
calculateRatioFactor
(
monthCartRatio
,
w
.
MonthUv
)
// 周加购率因子
wordABRank
+=
3000
*
calculateRatioFactor
(
weekCartRatioNew
,
w
.
WeekUv
)
if
w
.
IsBrand
{
wordABRank
*=
1.8
}
if
w
.
IsBrand
{
wordABRank
*=
1.8
}
if
w
.
IsCategory
{
wordABRank
*=
1.2
}
if
w
.
IsCategory
{
wordABRank
*=
1.2
}
...
...
main/test.go
View file @
bca1a519
package
main
package
main
import
(
import
(
"encoding/json"
"math"
"strings"
"fmt"
"fmt"
)
)
...
@@ -13,20 +14,29 @@ type B struct {
...
@@ -13,20 +14,29 @@ type B struct {
YearCartCount
int32
`json:"yearCartCount"`
YearCartCount
int32
`json:"yearCartCount"`
ZhaoCount
int32
`json:"-"`
ZhaoCount
int32
`json:"-"`
}
}
func
main
()
{
func
main
()
{
b
:=
B
{
Keyword
:
"赵延超"
,
prefix
:=
strings
.
HasPrefix
(
"tod's"
,
"tod's"
)
KeywordPinYin
:
"zhaoyanchao"
,
fmt
.
Print
(
prefix
)
YearCount
:
1000
,
YearCartCount
:
100
,
}
YearClickCount
:
10
,
ZhaoCount
:
2
}
func
calculateRatioFactor2
(
ratio
float64
,
count
int32
)
float64
{
if
jsonBytes
,
errs
:=
json
.
Marshal
(
b
);
errs
==
nil
{
var
rank
float64
fmt
.
Print
(
string
(
jsonBytes
))
switch
{
case
count
>
1
&&
count
<
10
:
rank
=
1.2
case
count
>=
10
&&
count
<
20
:
rank
=
1.4
case
count
>=
20
&&
count
<
50
:
rank
=
1.6
case
count
>=
50
&&
count
<
100
:
rank
=
1.8
case
count
>=
100
&&
count
<
200
:
rank
=
2.0
case
count
>=
200
&&
count
<
500
:
rank
=
2.2
case
count
>=
500
:
rank
=
2.5
default
:
rank
=
1.0
}
}
//根据搜索转化率,转换为热度因子
return
math
.
Log10
(
math
.
Sqrt
(
ratio
+
10
))
*
rank
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment