Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
suggest-task
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
suggest-task
Commits
f3d1cd5f
Commit
f3d1cd5f
authored
Oct 14, 2020
by
zhaoyanchao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
有内存不足问题,暂时提交,试用旧版本
parent
e8fe507a
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
129 additions
and
46 deletions
+129
-46
suggest-task-dependency.go
main/suggest-task-dependency.go
+55
-6
suggest-task.go
main/suggest-task.go
+72
-30
test.go
main/test.go
+2
-10
No files found.
main/suggest-task-dependency.go
View file @
f3d1cd5f
package
main
import
(
"strings"
"net/http"
"time"
"encoding/json"
"bytes"
"io/ioutil"
"container/list"
"strings"
"github.com/mozillazg/go-pinyin"
"fmt"
"strconv"
)
"fmt"
)
type
ENV
struct
{
DataWareDB
string
...
...
@@ -17,6 +23,12 @@ type ENV struct {
SensitiveFolder
string
}
type
Message
struct
{
Title
string
Phones
*
list
.
List
Body
*
list
.
List
}
var
test_env
=
&
ENV
{
DataWareDB
:
"DataWarehouse_test:FihdZW7o1XKtDETZexOG@tcp(test01-secooDataWarehouse.master.com:3306)/secooDataWarehouse"
,
ErpDB
:
"3306_test:iS6CXpYqgZ8Mhjui@tcp(10.4.3.223:3306)/secooErpDB"
,
...
...
@@ -79,7 +91,7 @@ func convertToPinyin(str string) string {
func
cleanKeyword
(
keyword
string
)
string
{
out
,
err
:=
t2s
.
Convert
(
keyword
)
if
err
!=
nil
{
fmt
.
Println
(
err
)
}
keyword
=
strings
.
ToLower
(
strings
.
Trim
(
DBC2SBC
(
strings
.
TrimSpace
(
out
)),
"
\ufffc
|,"
))
keyword
=
strings
.
ToLower
(
strings
.
Trim
(
DBC2SBC
(
strings
.
TrimSpace
(
out
)),
"
\ufffc
|,
|.
"
))
return
strings
.
Join
(
strings
.
Fields
(
keyword
),
" "
)
}
...
...
@@ -111,4 +123,42 @@ func DBC2SBC(s string) string {
}
}
return
strings
.
Join
(
strLst
,
""
)
}
\ No newline at end of file
}
// 发送POST请求
// url: 请求地址
// data: POST请求提交的数据
// contentType: 请求体格式,如:application/json
func
Post
(
url
string
,
data
interface
{},
contentType
string
)
string
{
// 超时时间:5秒
client
:=
&
http
.
Client
{
Timeout
:
5
*
time
.
Second
}
jsonStr
,
_
:=
json
.
Marshal
(
data
)
resp
,
err
:=
client
.
Post
(
url
,
contentType
,
bytes
.
NewBuffer
(
jsonStr
))
if
err
!=
nil
{
panic
(
err
)
}
defer
resp
.
Body
.
Close
()
result
,
_
:=
ioutil
.
ReadAll
(
resp
.
Body
)
return
string
(
result
)
}
func
sendSuggestNotify
()
{
msg
:=
Message
{
Title
:
"提示词数据异常"
,
}
body
:=
list
.
New
()
body
.
PushBack
(
"提示词数据太少"
)
msg
.
Body
=
body
phones
:=
list
.
New
()
phones
.
PushBack
(
"17621863255,13894895183"
)
msg
.
Phones
=
phones
Post
(
"http://matrix-inform.secoolocal.com/user/sendToUser"
,
msg
,
"application/json"
)
}
main/suggest-task.go
View file @
f3d1cd5f
...
...
@@ -19,7 +19,8 @@ import (
"unicode"
"unicode/utf8"
"crypto/md5"
)
)
type
Word
struct
{
Keyword
string
`json:"keyword"`
...
...
@@ -53,6 +54,7 @@ type Word struct {
MonthProductClickUv
int32
`json:"-"`
MonthAddCartUv
int32
`json:"-"`
SuggestTags
string
`json:"suggestTags"`
UpdateTime
int64
`json:"updateTime"`
}
var
wordMap
sync
.
Map
...
...
@@ -70,16 +72,21 @@ var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabb
const
TABLE_SPLIT_STEP_SIZE
=
10000
const
LEVEL_SIZE
=
1
const
MAX_TAG_SIZE
=
5
var
UPDATE_TIME
=
time
.
Now
()
.
UnixNano
()
/
1e6
func
main
()
{
startTime
:=
time
.
Now
()
datawareDB
,
err
:=
sql
.
Open
(
"mysql"
,
RUN_ENV
.
DataWareDB
)
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
datawareDB
.
SetConnMaxLifetime
(
10
*
time
.
Minute
)
datawareDB
.
SetMaxOpenConns
(
50
)
datawareDB
.
SetMaxIdleConns
(
5
0
)
datawareDB
.
SetMaxOpenConns
(
3
50
)
datawareDB
.
SetMaxIdleConns
(
10
0
)
var
client
*
elastic
.
Client
if
RUN_ENV
.
EsUser
!=
""
{
...
...
@@ -91,7 +98,7 @@ func main() {
bulkProcessor
,
err
:=
elastic
.
NewBulkProcessorService
(
client
)
.
Workers
(
50
)
.
BulkActions
(
5
000
)
.
BulkActions
(
2
000
)
.
FlushInterval
(
500
*
time
.
Millisecond
)
.
After
(
after
)
.
Do
(
context
.
Background
())
...
...
@@ -107,13 +114,25 @@ func main() {
count
:=
arr
[
1
]
/
TABLE_SPLIT_STEP_SIZE
log
.
Printf
(
"maxId/10000=%d
\n
"
,
count
)
for
i
:=
0
;
i
<=
count
;
i
++
{
wg
.
Add
(
1
)
go
queryIndex
(
i
*
TABLE_SPLIT_STEP_SIZE
,
datawareDB
,
bulkProcessor
,
&
wg
)
if
arr
[
1
]
<
2800000
{
log
.
Printf
(
"data is too little ,return"
)
sendSuggestNotify
()
return
}
for
i
:=
0
;
i
<
count
;
i
=
i
+
LEVEL_SIZE
{
for
j
:=
0
;
j
<
LEVEL_SIZE
;
j
++
{
log
.
Printf
(
"add to wait %d"
,
i
+
j
)
wg
.
Add
(
1
)
go
queryIndex
(
(
i
+
j
)
*
TABLE_SPLIT_STEP_SIZE
,
datawareDB
,
bulkProcessor
,
&
wg
)
}
wg
.
Wait
()
log
.
Println
(
"wait finish "
)
}
wg
.
Wait
()
fmt
.
Println
(
"all thread has read maps"
)
checkUnusedData
(
bulkProcessor
)
...
...
@@ -286,9 +305,14 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
"week_uv, week_product_click_uv, week_add_cart_uv, "
+
"month_pv, month_product_click_count, month_add_cart_count, month_uv, month_product_click_uv, month_add_cart_uv, prepare_tags "
+
"from app_search_keyword_year_week_p_day where id >= %d and id < %d"
,
idFlag
,
idFlag
+
TABLE_SPLIT_STEP_SIZE
)
log
.
Print
(
sqlStr
)
results
,
err
:=
db
.
Query
(
sqlStr
)
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
log
.
Print
(
"read database success "
)
for
results
.
Next
()
{
var
id
int
var
keyword
sql
.
NullString
...
...
@@ -335,8 +359,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
if
keyword
.
Valid
&&
len
(
keyword
.
String
)
>
0
&&
keyword
.
String
!=
""
{
key
:=
cleanKeyword
(
keyword
.
String
)
var
w
=
&
Word
{
Keyword
:
key
,
YearCount
:
int32
(
yearPv
.
Int64
),
...
...
@@ -355,7 +377,8 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
MonthUv
:
int32
(
monthUv
.
Int64
),
MonthProductClickUv
:
int32
(
monthProductClickUv
.
Int64
),
MonthAddCartUv
:
int32
(
monthAddCartUv
.
Int64
)
,
SuggestTags
:
prepareTags
.
String
}
SuggestTags
:
prepareTags
.
String
,
UpdateTime
:
UPDATE_TIME
}
if
v
,
isExist
:=
wordMap
.
Load
(
key
);
isExist
{
merge
(
w
,
v
)
...
...
@@ -404,22 +427,26 @@ func after(executionId int64, requests []elastic.BulkableRequest, response *elas
func
processWord
(
w
*
Word
)
{
w
.
KeywordPinYin
=
convertToPinyin
(
w
.
Keyword
)
// 年点击加购率
w
.
YearClickRatio
=
calculateRatio
(
w
.
YearClickCount
,
w
.
YearCount
)
w
.
YearCartRatio
=
calculateRatio
(
w
.
YearCartCount
,
w
.
YearCount
)
// 周点击加购率
w
.
WeekClickRatio
=
calculateRatio
(
w
.
WeekClickCount
,
w
.
WeekCount
)
w
.
WeekCartRatio
=
calculateRatio
(
w
.
WeekCartCount
,
w
.
WeekCount
)
//
非默认值,
加权
//
年加购率 再
加权
if
w
.
YearCount
!=
0
&&
w
.
YearCartCount
!=
0
{
w
.
YearCartRatio
*=
3
}
//
非默认值,
加权
//
周加购率 再
加权
if
w
.
WeekCount
!=
0
&&
w
.
WeekCartCount
!=
0
{
w
.
WeekCartRatio
*=
3
}
//
非默认值,
加权
//
周点击率 再
加权
if
w
.
WeekCount
!=
0
&&
w
.
WeekClickCount
!=
0
{
w
.
WeekClickRatio
*=
2
}
...
...
@@ -452,22 +479,22 @@ func isFilterWord(w *Word) bool {
if
w
.
IsSensitive
{
return
true
}
// 过滤掉太长的词 每个中文字占3个byte
if
utf8
.
RuneCountInString
(
w
.
Keyword
)
<=
1
||
len
(
w
.
Keyword
)
>
6
0
{
return
true
}
if
utf8
.
RuneCountInString
(
w
.
Keyword
)
<=
1
||
len
(
w
.
Keyword
)
>
5
0
{
return
true
}
// 过滤掉商品id,商品id是有7位数字组成
if
len
(
w
.
Keyword
)
>
6
&&
isAllDigit
(
w
.
Keyword
)
{
return
true
}
// 品牌词 类目词 人工干预词 不做过滤
if
w
.
IsBrand
||
w
.
IsCategory
||
w
.
IsManual
{
return
false
}
// 年数据过滤
if
w
.
YearCount
==
0
||
w
.
YearClickCount
==
0
{
return
true
}
// 前缀过滤
for
_
,
v
:=
range
prefixFilterArr
{
if
strings
.
HasPrefix
(
w
.
Keyword
,
v
)
{
return
true
}
}
// 品牌词 类目词 人工干预词 不做过滤
if
w
.
IsBrand
||
w
.
IsCategory
||
w
.
IsManual
{
return
false
}
// 年数据过滤
if
w
.
YearCount
<
2
||
w
.
YearClickCount
<
2
{
return
true
}
// 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5
if
isHotSearchWord
(
w
)
{
// 搜索次数比较多 转化率或者点击率较高的 不过滤
...
...
@@ -505,14 +532,22 @@ func calculateRatio(numerator int32, denominator int32) float64 {
}
func
calculateWordRank
(
w
*
Word
)
{
wordRank
:=
10000.0
// 长度因子
wordRank
+=
3000
*
calculateLengthFactor
(
len
(
w
.
Keyword
))
// 年数量因子
wordRank
+=
2000
*
calculateCountFactor
(
w
.
YearCount
,
1
)
// 周数量因子
wordRank
+=
2000
*
calculateCountFactor
(
w
.
WeekCount
,
52
)
// 年点击率因子
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
YearClickRatio
,
w
.
YearClickCount
)
// 周点击率因子
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
WeekClickRatio
,
w
.
WeekClickCount
)
// 年加购率因子
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
YearCartRatio
,
w
.
YearCartCount
)
// 周加购率因子
wordRank
+=
3000
*
calculateRatioFactor
(
w
.
WeekCartRatio
,
w
.
WeekCartCount
)
if
w
.
IsBrand
{
wordRank
*=
1.8
}
if
w
.
IsCategory
{
wordRank
*=
1.2
}
...
...
@@ -522,8 +557,11 @@ func calculateWordRank(w *Word) {
func
calculateWordABRank
(
w
*
Word
)
{
// 月点击加购率
monthClickRatio
:=
calculateRatio
(
w
.
MonthProductClickUv
,
w
.
MonthUv
)
monthCartRatio
:=
calculateRatio
(
w
.
MonthAddCartUv
,
w
.
MonthUv
)
// 周点击加购率(和A相比, count 换成了uv)
weekClickRatioNew
:=
calculateRatio
(
w
.
WeekClickUv
,
w
.
WeekUv
)
weekCartRatioNew
:=
calculateRatio
(
w
.
WeekAddCartUv
,
w
.
WeekUv
)
...
...
@@ -549,24 +587,28 @@ func calculateWordABRank(w *Word) {
wordABRank
:=
10000.0
// 长度因子
wordABRank
+=
3000
*
calculateLengthFactor
(
len
(
w
.
Keyword
))
wordABRank
+=
2000
*
calculateCountFactor
(
w
.
MonthUv
,
1
)
wordABRank
+=
2000
*
calculateCountFactor
(
w
.
WeekUv
,
4
)
// 点击
// 年
// 月数量因子
wordABRank
+=
2000
*
calculateCountFactor
(
w
.
MonthUv
,
4
)
// 周数量因子
wordABRank
+=
2000
*
calculateCountFactor
(
w
.
WeekUv
,
52
)
// 年数量因子
wordABRank
+=
2000
*
calculateCountFactor
(
w
.
YearCount
,
1
)
// 月
// 点击
// 月点击率因子
wordABRank
+=
3000
*
calculateRatioFactor
(
monthClickRatio
,
w
.
MonthProductClickUv
)
// 周
// 周
点击率因子
wordABRank
+=
3000
*
calculateRatioFactor
(
weekClickRatioNew
,
w
.
WeekUv
)
// 加购
// 年
// 年
加购率因子
wordABRank
+=
3000
*
calculateRatioFactor
(
w
.
YearCartRatio
,
w
.
YearCartCount
)
// 月
// 月
加购率因子
wordABRank
+=
3000
*
calculateRatioFactor
(
monthCartRatio
,
w
.
MonthUv
)
// 周
// 周
加购率因子
wordABRank
+=
3000
*
calculateRatioFactor
(
weekCartRatioNew
,
w
.
WeekUv
)
if
w
.
IsBrand
{
wordABRank
*=
1.8
}
...
...
main/test.go
View file @
f3d1cd5f
...
...
@@ -16,16 +16,8 @@ type B struct {
}
func
main
()
{
var
arr
=
strings
.
Split
(
""
,
","
)
var
s
=
""
for
i
,
leng
:=
0
,
len
(
arr
);
i
<
MAX_TAG_SIZE
&&
i
<
leng
;
i
++
{
if
i
==
MAX_TAG_SIZE
-
1
||
i
==
leng
-
1
{
s
=
s
+
arr
[
i
]
}
else
{
s
=
s
+
arr
[
i
]
+
","
}
}
fmt
.
Print
(
s
)
prefix
:=
strings
.
HasPrefix
(
"tod's"
,
"tod's"
)
fmt
.
Print
(
prefix
)
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment