Commit f3d1cd5f by zhaoyanchao

有内存不足问题,暂时提交,试用旧版本

parent e8fe507a
package main package main
import ( import (
"strings" "net/http"
"time"
"encoding/json"
"bytes"
"io/ioutil"
"container/list"
"strings"
"github.com/mozillazg/go-pinyin" "github.com/mozillazg/go-pinyin"
"fmt"
"strconv" "strconv"
) "fmt"
)
type ENV struct { type ENV struct {
DataWareDB string DataWareDB string
...@@ -17,6 +23,12 @@ type ENV struct { ...@@ -17,6 +23,12 @@ type ENV struct {
SensitiveFolder string SensitiveFolder string
} }
type Message struct {
Title string
Phones *list.List
Body *list.List
}
var test_env = &ENV{ var test_env = &ENV{
DataWareDB: "DataWarehouse_test:FihdZW7o1XKtDETZexOG@tcp(test01-secooDataWarehouse.master.com:3306)/secooDataWarehouse", DataWareDB: "DataWarehouse_test:FihdZW7o1XKtDETZexOG@tcp(test01-secooDataWarehouse.master.com:3306)/secooDataWarehouse",
ErpDB: "3306_test:iS6CXpYqgZ8Mhjui@tcp(10.4.3.223:3306)/secooErpDB", ErpDB: "3306_test:iS6CXpYqgZ8Mhjui@tcp(10.4.3.223:3306)/secooErpDB",
...@@ -79,7 +91,7 @@ func convertToPinyin(str string) string { ...@@ -79,7 +91,7 @@ func convertToPinyin(str string) string {
func cleanKeyword(keyword string) string { func cleanKeyword(keyword string) string {
out, err := t2s.Convert(keyword) out, err := t2s.Convert(keyword)
if err != nil { fmt.Println(err) } if err != nil { fmt.Println(err) }
keyword = strings.ToLower(strings.Trim(DBC2SBC(strings.TrimSpace(out)),"\ufffc|,")) keyword = strings.ToLower(strings.Trim(DBC2SBC(strings.TrimSpace(out)),"\ufffc|,|."))
return strings.Join(strings.Fields(keyword)," ") return strings.Join(strings.Fields(keyword)," ")
} }
...@@ -111,4 +123,42 @@ func DBC2SBC(s string) string { ...@@ -111,4 +123,42 @@ func DBC2SBC(s string) string {
} }
} }
return strings.Join(strLst, "") return strings.Join(strLst, "")
} }
\ No newline at end of file
// 发送POST请求
// url: 请求地址
// data: POST请求提交的数据
// contentType: 请求体格式,如:application/json
func Post(url string, data interface{}, contentType string) string {
// 超时时间:5秒
client := &http.Client{Timeout: 5 * time.Second}
jsonStr, _ := json.Marshal(data)
resp, err := client.Post(url, contentType, bytes.NewBuffer(jsonStr))
if err != nil {
panic(err)
}
defer resp.Body.Close()
result, _ := ioutil.ReadAll(resp.Body)
return string(result)
}
func sendSuggestNotify() {
msg := Message{
Title:"提示词数据异常",
}
body := list.New()
body.PushBack("提示词数据太少")
msg.Body = body
phones := list.New()
phones.PushBack("17621863255,13894895183")
msg.Phones = phones
Post("http://matrix-inform.secoolocal.com/user/sendToUser", msg,"application/json")
}
...@@ -19,7 +19,8 @@ import ( ...@@ -19,7 +19,8 @@ import (
"unicode" "unicode"
"unicode/utf8" "unicode/utf8"
"crypto/md5" "crypto/md5"
) )
type Word struct { type Word struct {
Keyword string `json:"keyword"` Keyword string `json:"keyword"`
...@@ -53,6 +54,7 @@ type Word struct { ...@@ -53,6 +54,7 @@ type Word struct {
MonthProductClickUv int32 `json:"-"` MonthProductClickUv int32 `json:"-"`
MonthAddCartUv int32 `json:"-"` MonthAddCartUv int32 `json:"-"`
SuggestTags string `json:"suggestTags"` SuggestTags string `json:"suggestTags"`
UpdateTime int64 `json:"updateTime"`
} }
var wordMap sync.Map var wordMap sync.Map
...@@ -70,16 +72,21 @@ var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabb ...@@ -70,16 +72,21 @@ var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabb
const TABLE_SPLIT_STEP_SIZE = 10000 const TABLE_SPLIT_STEP_SIZE = 10000
const LEVEL_SIZE = 1
const MAX_TAG_SIZE = 5 const MAX_TAG_SIZE = 5
var UPDATE_TIME = time.Now().UnixNano() / 1e6
func main() { func main() {
startTime := time.Now() startTime := time.Now()
datawareDB, err := sql.Open("mysql", RUN_ENV.DataWareDB) datawareDB, err := sql.Open("mysql", RUN_ENV.DataWareDB)
if err != nil { log.Print(err.Error()) } if err != nil { log.Print(err.Error()) }
datawareDB.SetConnMaxLifetime(10*time.Minute) datawareDB.SetConnMaxLifetime(10*time.Minute)
datawareDB.SetMaxOpenConns(50) datawareDB.SetMaxOpenConns(350)
datawareDB.SetMaxIdleConns(50) datawareDB.SetMaxIdleConns(100)
var client *elastic.Client var client *elastic.Client
if RUN_ENV.EsUser != "" { if RUN_ENV.EsUser != "" {
...@@ -91,7 +98,7 @@ func main() { ...@@ -91,7 +98,7 @@ func main() {
bulkProcessor, err := elastic.NewBulkProcessorService(client). bulkProcessor, err := elastic.NewBulkProcessorService(client).
Workers(50). Workers(50).
BulkActions(5000). BulkActions(2000).
FlushInterval(500*time.Millisecond). FlushInterval(500*time.Millisecond).
After(after). After(after).
Do(context.Background()) Do(context.Background())
...@@ -107,13 +114,25 @@ func main() { ...@@ -107,13 +114,25 @@ func main() {
count := arr[1] / TABLE_SPLIT_STEP_SIZE count := arr[1] / TABLE_SPLIT_STEP_SIZE
log.Printf("maxId/10000=%d\n", count) log.Printf("maxId/10000=%d\n", count)
for i := 0; i <= count; i++ { if arr[1] < 2800000 {
wg.Add(1) log.Printf("data is too little ,return")
go queryIndex(i * TABLE_SPLIT_STEP_SIZE, datawareDB, bulkProcessor, &wg) sendSuggestNotify()
return
} }
for i:= 0; i < count; i = i + LEVEL_SIZE {
for j := 0; j < LEVEL_SIZE; j++ {
log.Printf("add to wait %d", i+j)
wg.Add(1)
go queryIndex( (i + j ) *TABLE_SPLIT_STEP_SIZE, datawareDB, bulkProcessor, &wg)
}
wg.Wait()
log.Println("wait finish ")
}
wg.Wait()
fmt.Println("all thread has read maps") fmt.Println("all thread has read maps")
checkUnusedData(bulkProcessor) checkUnusedData(bulkProcessor)
...@@ -286,9 +305,14 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg ...@@ -286,9 +305,14 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
"week_uv, week_product_click_uv, week_add_cart_uv, " + "week_uv, week_product_click_uv, week_add_cart_uv, " +
"month_pv, month_product_click_count, month_add_cart_count, month_uv, month_product_click_uv, month_add_cart_uv, prepare_tags " + "month_pv, month_product_click_count, month_add_cart_count, month_uv, month_product_click_uv, month_add_cart_uv, prepare_tags " +
"from app_search_keyword_year_week_p_day where id >= %d and id < %d", idFlag, idFlag + TABLE_SPLIT_STEP_SIZE) "from app_search_keyword_year_week_p_day where id >= %d and id < %d", idFlag, idFlag + TABLE_SPLIT_STEP_SIZE)
log.Print(sqlStr)
results, err := db.Query(sqlStr) results, err := db.Query(sqlStr)
if err != nil { log.Print(err.Error()) } if err != nil { log.Print(err.Error()) }
log.Print("read database success ")
for results.Next() { for results.Next() {
var id int var id int
var keyword sql.NullString var keyword sql.NullString
...@@ -335,8 +359,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg ...@@ -335,8 +359,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
if keyword.Valid && len(keyword.String) > 0 && keyword.String != "" { if keyword.Valid && len(keyword.String) > 0 && keyword.String != "" {
key := cleanKeyword(keyword.String) key := cleanKeyword(keyword.String)
var w = &Word{ var w = &Word{
Keyword:key, Keyword:key,
YearCount: int32(yearPv.Int64), YearCount: int32(yearPv.Int64),
...@@ -355,7 +377,8 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg ...@@ -355,7 +377,8 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
MonthUv: int32(monthUv.Int64), MonthUv: int32(monthUv.Int64),
MonthProductClickUv: int32(monthProductClickUv.Int64), MonthProductClickUv: int32(monthProductClickUv.Int64),
MonthAddCartUv: int32(monthAddCartUv.Int64) , MonthAddCartUv: int32(monthAddCartUv.Int64) ,
SuggestTags: prepareTags.String} SuggestTags: prepareTags.String,
UpdateTime: UPDATE_TIME}
if v, isExist := wordMap.Load(key); isExist { if v, isExist := wordMap.Load(key); isExist {
merge(w,v) merge(w,v)
...@@ -404,22 +427,26 @@ func after(executionId int64, requests []elastic.BulkableRequest, response *elas ...@@ -404,22 +427,26 @@ func after(executionId int64, requests []elastic.BulkableRequest, response *elas
func processWord(w *Word) { func processWord(w *Word) {
w.KeywordPinYin = convertToPinyin(w.Keyword) w.KeywordPinYin = convertToPinyin(w.Keyword)
// 年点击加购率
w.YearClickRatio = calculateRatio(w.YearClickCount, w.YearCount) w.YearClickRatio = calculateRatio(w.YearClickCount, w.YearCount)
w.YearCartRatio = calculateRatio(w.YearCartCount, w.YearCount) w.YearCartRatio = calculateRatio(w.YearCartCount, w.YearCount)
// 周点击加购率
w.WeekClickRatio = calculateRatio(w.WeekClickCount, w.WeekCount) w.WeekClickRatio = calculateRatio(w.WeekClickCount, w.WeekCount)
w.WeekCartRatio = calculateRatio(w.WeekCartCount, w.WeekCount) w.WeekCartRatio = calculateRatio(w.WeekCartCount, w.WeekCount)
// 非默认值,加权 // 年加购率 再加权
if w.YearCount != 0 && w.YearCartCount != 0 { if w.YearCount != 0 && w.YearCartCount != 0 {
w.YearCartRatio *= 3 w.YearCartRatio *= 3
} }
// 非默认值,加权 // 周加购率 再加权
if w.WeekCount != 0 && w.WeekCartCount != 0 { if w.WeekCount != 0 && w.WeekCartCount != 0 {
w.WeekCartRatio *= 3 w.WeekCartRatio *= 3
} }
// 非默认值,加权 // 周点击率 再加权
if w.WeekCount != 0 && w.WeekClickCount != 0 { if w.WeekCount != 0 && w.WeekClickCount != 0 {
w.WeekClickRatio *= 2 w.WeekClickRatio *= 2
} }
...@@ -452,22 +479,22 @@ func isFilterWord(w *Word) bool { ...@@ -452,22 +479,22 @@ func isFilterWord(w *Word) bool {
if w.IsSensitive { return true } if w.IsSensitive { return true }
// 过滤掉太长的词 每个中文字占3个byte // 过滤掉太长的词 每个中文字占3个byte
if utf8.RuneCountInString(w.Keyword) <= 1 || len(w.Keyword) > 60 { return true } if utf8.RuneCountInString(w.Keyword) <= 1 || len(w.Keyword) > 50 { return true }
// 过滤掉商品id,商品id是有7位数字组成 // 过滤掉商品id,商品id是有7位数字组成
if len(w.Keyword) > 6 && isAllDigit(w.Keyword) { return true } if len(w.Keyword) > 6 && isAllDigit(w.Keyword) { return true }
// 品牌词 类目词 人工干预词 不做过滤
if w.IsBrand || w.IsCategory || w.IsManual { return false }
// 年数据过滤
if w.YearCount == 0 || w.YearClickCount == 0 { return true }
// 前缀过滤 // 前缀过滤
for _, v := range prefixFilterArr { for _, v := range prefixFilterArr {
if strings.HasPrefix(w.Keyword, v) { return true } if strings.HasPrefix(w.Keyword, v) { return true }
} }
// 品牌词 类目词 人工干预词 不做过滤
if w.IsBrand || w.IsCategory || w.IsManual { return false }
// 年数据过滤
if w.YearCount < 2 || w.YearClickCount < 2 { return true }
// 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5 // 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5
if isHotSearchWord(w) { if isHotSearchWord(w) {
// 搜索次数比较多 转化率或者点击率较高的 不过滤 // 搜索次数比较多 转化率或者点击率较高的 不过滤
...@@ -505,14 +532,22 @@ func calculateRatio(numerator int32, denominator int32) float64 { ...@@ -505,14 +532,22 @@ func calculateRatio(numerator int32, denominator int32) float64 {
} }
func calculateWordRank(w *Word) { func calculateWordRank(w *Word) {
wordRank := 10000.0 wordRank := 10000.0
// 长度因子
wordRank += 3000 * calculateLengthFactor(len(w.Keyword)) wordRank += 3000 * calculateLengthFactor(len(w.Keyword))
// 年数量因子
wordRank += 2000 * calculateCountFactor(w.YearCount, 1) wordRank += 2000 * calculateCountFactor(w.YearCount, 1)
// 周数量因子
wordRank += 2000 * calculateCountFactor(w.WeekCount, 52) wordRank += 2000 * calculateCountFactor(w.WeekCount, 52)
// 年点击率因子
wordRank += 3000 * calculateRatioFactor(w.YearClickRatio, w.YearClickCount) wordRank += 3000 * calculateRatioFactor(w.YearClickRatio, w.YearClickCount)
// 周点击率因子
wordRank += 3000 * calculateRatioFactor(w.WeekClickRatio, w.WeekClickCount) wordRank += 3000 * calculateRatioFactor(w.WeekClickRatio, w.WeekClickCount)
// 年加购率因子
wordRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount) wordRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount)
// 周加购率因子
wordRank += 3000 * calculateRatioFactor(w.WeekCartRatio, w.WeekCartCount) wordRank += 3000 * calculateRatioFactor(w.WeekCartRatio, w.WeekCartCount)
if w.IsBrand { wordRank *= 1.8 } if w.IsBrand { wordRank *= 1.8 }
if w.IsCategory { wordRank *= 1.2 } if w.IsCategory { wordRank *= 1.2 }
...@@ -522,8 +557,11 @@ func calculateWordRank(w *Word) { ...@@ -522,8 +557,11 @@ func calculateWordRank(w *Word) {
func calculateWordABRank(w *Word) { func calculateWordABRank(w *Word) {
// 月点击加购率
monthClickRatio := calculateRatio(w.MonthProductClickUv, w.MonthUv) monthClickRatio := calculateRatio(w.MonthProductClickUv, w.MonthUv)
monthCartRatio := calculateRatio(w.MonthAddCartUv, w.MonthUv) monthCartRatio := calculateRatio(w.MonthAddCartUv, w.MonthUv)
// 周点击加购率(和A相比, count 换成了uv)
weekClickRatioNew := calculateRatio(w.WeekClickUv, w.WeekUv) weekClickRatioNew := calculateRatio(w.WeekClickUv, w.WeekUv)
weekCartRatioNew := calculateRatio(w.WeekAddCartUv, w.WeekUv) weekCartRatioNew := calculateRatio(w.WeekAddCartUv, w.WeekUv)
...@@ -549,24 +587,28 @@ func calculateWordABRank(w *Word) { ...@@ -549,24 +587,28 @@ func calculateWordABRank(w *Word) {
wordABRank := 10000.0 wordABRank := 10000.0
// 长度因子
wordABRank += 3000 * calculateLengthFactor(len(w.Keyword)) wordABRank += 3000 * calculateLengthFactor(len(w.Keyword))
wordABRank += 2000 * calculateCountFactor(w.MonthUv, 1)
wordABRank += 2000 * calculateCountFactor(w.WeekUv, 4)
// 点击 // 月数量因子
// 年 wordABRank += 2000 * calculateCountFactor(w.MonthUv, 4)
// 周数量因子
wordABRank += 2000 * calculateCountFactor(w.WeekUv, 52)
// 年数量因子
wordABRank += 2000 * calculateCountFactor(w.YearCount, 1) wordABRank += 2000 * calculateCountFactor(w.YearCount, 1)
// 月
// 点击
// 月点击率因子
wordABRank += 3000 * calculateRatioFactor(monthClickRatio, w.MonthProductClickUv) wordABRank += 3000 * calculateRatioFactor(monthClickRatio, w.MonthProductClickUv)
// 周 // 周点击率因子
wordABRank += 3000 * calculateRatioFactor(weekClickRatioNew, w.WeekUv) wordABRank += 3000 * calculateRatioFactor(weekClickRatioNew, w.WeekUv)
// 加购 // 加购
// 年 // 年加购率因子
wordABRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount) wordABRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount)
// 月 // 月加购率因子
wordABRank += 3000 * calculateRatioFactor(monthCartRatio, w.MonthUv) wordABRank += 3000 * calculateRatioFactor(monthCartRatio, w.MonthUv)
// 周 // 周加购率因子
wordABRank += 3000 * calculateRatioFactor(weekCartRatioNew, w.WeekUv) wordABRank += 3000 * calculateRatioFactor(weekCartRatioNew, w.WeekUv)
if w.IsBrand { wordABRank *= 1.8 } if w.IsBrand { wordABRank *= 1.8 }
......
...@@ -16,16 +16,8 @@ type B struct { ...@@ -16,16 +16,8 @@ type B struct {
} }
func main() { func main() {
var arr = strings.Split("",",") prefix := strings.HasPrefix("tod's", "tod's")
var s = "" fmt.Print(prefix)
for i,leng := 0, len(arr); i< MAX_TAG_SIZE && i < leng; i++ {
if i == MAX_TAG_SIZE-1 || i == leng -1 {
s = s + arr[i]
} else {
s = s + arr[i] + ","
}
}
fmt.Print(s)
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment