Commit f3d1cd5f by zhaoyanchao

有内存不足问题,暂时提交,试用旧版本

parent e8fe507a
package main
import (
"net/http"
"time"
"encoding/json"
"bytes"
"io/ioutil"
"container/list"
"strings"
"github.com/mozillazg/go-pinyin"
"fmt"
"strconv"
)
"fmt"
)
type ENV struct {
DataWareDB string
......@@ -17,6 +23,12 @@ type ENV struct {
SensitiveFolder string
}
type Message struct {
Title string
Phones *list.List
Body *list.List
}
var test_env = &ENV{
DataWareDB: "DataWarehouse_test:FihdZW7o1XKtDETZexOG@tcp(test01-secooDataWarehouse.master.com:3306)/secooDataWarehouse",
ErpDB: "3306_test:iS6CXpYqgZ8Mhjui@tcp(10.4.3.223:3306)/secooErpDB",
......@@ -79,7 +91,7 @@ func convertToPinyin(str string) string {
func cleanKeyword(keyword string) string {
out, err := t2s.Convert(keyword)
if err != nil { fmt.Println(err) }
keyword = strings.ToLower(strings.Trim(DBC2SBC(strings.TrimSpace(out)),"\ufffc|,"))
keyword = strings.ToLower(strings.Trim(DBC2SBC(strings.TrimSpace(out)),"\ufffc|,|."))
return strings.Join(strings.Fields(keyword)," ")
}
......@@ -112,3 +124,41 @@ func DBC2SBC(s string) string {
}
return strings.Join(strLst, "")
}
// 发送POST请求
// url: 请求地址
// data: POST请求提交的数据
// contentType: 请求体格式,如:application/json
func Post(url string, data interface{}, contentType string) string {
// 超时时间:5秒
client := &http.Client{Timeout: 5 * time.Second}
jsonStr, _ := json.Marshal(data)
resp, err := client.Post(url, contentType, bytes.NewBuffer(jsonStr))
if err != nil {
panic(err)
}
defer resp.Body.Close()
result, _ := ioutil.ReadAll(resp.Body)
return string(result)
}
func sendSuggestNotify() {
msg := Message{
Title:"提示词数据异常",
}
body := list.New()
body.PushBack("提示词数据太少")
msg.Body = body
phones := list.New()
phones.PushBack("17621863255,13894895183")
msg.Phones = phones
Post("http://matrix-inform.secoolocal.com/user/sendToUser", msg,"application/json")
}
......@@ -19,7 +19,8 @@ import (
"unicode"
"unicode/utf8"
"crypto/md5"
)
)
type Word struct {
Keyword string `json:"keyword"`
......@@ -53,6 +54,7 @@ type Word struct {
MonthProductClickUv int32 `json:"-"`
MonthAddCartUv int32 `json:"-"`
SuggestTags string `json:"suggestTags"`
UpdateTime int64 `json:"updateTime"`
}
var wordMap sync.Map
......@@ -70,16 +72,21 @@ var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabb
const TABLE_SPLIT_STEP_SIZE = 10000
const LEVEL_SIZE = 1
const MAX_TAG_SIZE = 5
var UPDATE_TIME = time.Now().UnixNano() / 1e6
func main() {
startTime := time.Now()
datawareDB, err := sql.Open("mysql", RUN_ENV.DataWareDB)
if err != nil { log.Print(err.Error()) }
datawareDB.SetConnMaxLifetime(10*time.Minute)
datawareDB.SetMaxOpenConns(50)
datawareDB.SetMaxIdleConns(50)
datawareDB.SetMaxOpenConns(350)
datawareDB.SetMaxIdleConns(100)
var client *elastic.Client
if RUN_ENV.EsUser != "" {
......@@ -91,7 +98,7 @@ func main() {
bulkProcessor, err := elastic.NewBulkProcessorService(client).
Workers(50).
BulkActions(5000).
BulkActions(2000).
FlushInterval(500*time.Millisecond).
After(after).
Do(context.Background())
......@@ -107,13 +114,25 @@ func main() {
count := arr[1] / TABLE_SPLIT_STEP_SIZE
log.Printf("maxId/10000=%d\n", count)
for i := 0; i <= count; i++ {
if arr[1] < 2800000 {
log.Printf("data is too little ,return")
sendSuggestNotify()
return
}
for i:= 0; i < count; i = i + LEVEL_SIZE {
for j := 0; j < LEVEL_SIZE; j++ {
log.Printf("add to wait %d", i+j)
wg.Add(1)
go queryIndex(i * TABLE_SPLIT_STEP_SIZE, datawareDB, bulkProcessor, &wg)
go queryIndex( (i + j ) *TABLE_SPLIT_STEP_SIZE, datawareDB, bulkProcessor, &wg)
}
wg.Wait()
log.Println("wait finish ")
}
wg.Wait()
fmt.Println("all thread has read maps")
checkUnusedData(bulkProcessor)
......@@ -286,9 +305,14 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
"week_uv, week_product_click_uv, week_add_cart_uv, " +
"month_pv, month_product_click_count, month_add_cart_count, month_uv, month_product_click_uv, month_add_cart_uv, prepare_tags " +
"from app_search_keyword_year_week_p_day where id >= %d and id < %d", idFlag, idFlag + TABLE_SPLIT_STEP_SIZE)
log.Print(sqlStr)
results, err := db.Query(sqlStr)
if err != nil { log.Print(err.Error()) }
log.Print("read database success ")
for results.Next() {
var id int
var keyword sql.NullString
......@@ -335,8 +359,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
if keyword.Valid && len(keyword.String) > 0 && keyword.String != "" {
key := cleanKeyword(keyword.String)
var w = &Word{
Keyword:key,
YearCount: int32(yearPv.Int64),
......@@ -355,7 +377,8 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
MonthUv: int32(monthUv.Int64),
MonthProductClickUv: int32(monthProductClickUv.Int64),
MonthAddCartUv: int32(monthAddCartUv.Int64) ,
SuggestTags: prepareTags.String}
SuggestTags: prepareTags.String,
UpdateTime: UPDATE_TIME}
if v, isExist := wordMap.Load(key); isExist {
merge(w,v)
......@@ -404,22 +427,26 @@ func after(executionId int64, requests []elastic.BulkableRequest, response *elas
func processWord(w *Word) {
w.KeywordPinYin = convertToPinyin(w.Keyword)
// 年点击加购率
w.YearClickRatio = calculateRatio(w.YearClickCount, w.YearCount)
w.YearCartRatio = calculateRatio(w.YearCartCount, w.YearCount)
// 周点击加购率
w.WeekClickRatio = calculateRatio(w.WeekClickCount, w.WeekCount)
w.WeekCartRatio = calculateRatio(w.WeekCartCount, w.WeekCount)
// 非默认值,加权
// 年加购率 再加权
if w.YearCount != 0 && w.YearCartCount != 0 {
w.YearCartRatio *= 3
}
// 非默认值,加权
// 周加购率 再加权
if w.WeekCount != 0 && w.WeekCartCount != 0 {
w.WeekCartRatio *= 3
}
// 非默认值,加权
// 周点击率 再加权
if w.WeekCount != 0 && w.WeekClickCount != 0 {
w.WeekClickRatio *= 2
}
......@@ -452,22 +479,22 @@ func isFilterWord(w *Word) bool {
if w.IsSensitive { return true }
// 过滤掉太长的词 每个中文字占3个byte
if utf8.RuneCountInString(w.Keyword) <= 1 || len(w.Keyword) > 60 { return true }
if utf8.RuneCountInString(w.Keyword) <= 1 || len(w.Keyword) > 50 { return true }
// 过滤掉商品id,商品id是有7位数字组成
if len(w.Keyword) > 6 && isAllDigit(w.Keyword) { return true }
// 品牌词 类目词 人工干预词 不做过滤
if w.IsBrand || w.IsCategory || w.IsManual { return false }
// 年数据过滤
if w.YearCount == 0 || w.YearClickCount == 0 { return true }
// 前缀过滤
for _, v := range prefixFilterArr {
if strings.HasPrefix(w.Keyword, v) { return true }
}
// 品牌词 类目词 人工干预词 不做过滤
if w.IsBrand || w.IsCategory || w.IsManual { return false }
// 年数据过滤
if w.YearCount < 2 || w.YearClickCount < 2 { return true }
// 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5
if isHotSearchWord(w) {
// 搜索次数比较多 转化率或者点击率较高的 不过滤
......@@ -505,14 +532,22 @@ func calculateRatio(numerator int32, denominator int32) float64 {
}
func calculateWordRank(w *Word) {
wordRank := 10000.0
// 长度因子
wordRank += 3000 * calculateLengthFactor(len(w.Keyword))
// 年数量因子
wordRank += 2000 * calculateCountFactor(w.YearCount, 1)
// 周数量因子
wordRank += 2000 * calculateCountFactor(w.WeekCount, 52)
// 年点击率因子
wordRank += 3000 * calculateRatioFactor(w.YearClickRatio, w.YearClickCount)
// 周点击率因子
wordRank += 3000 * calculateRatioFactor(w.WeekClickRatio, w.WeekClickCount)
// 年加购率因子
wordRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount)
// 周加购率因子
wordRank += 3000 * calculateRatioFactor(w.WeekCartRatio, w.WeekCartCount)
if w.IsBrand { wordRank *= 1.8 }
if w.IsCategory { wordRank *= 1.2 }
......@@ -522,8 +557,11 @@ func calculateWordRank(w *Word) {
func calculateWordABRank(w *Word) {
// 月点击加购率
monthClickRatio := calculateRatio(w.MonthProductClickUv, w.MonthUv)
monthCartRatio := calculateRatio(w.MonthAddCartUv, w.MonthUv)
// 周点击加购率(和A相比, count 换成了uv)
weekClickRatioNew := calculateRatio(w.WeekClickUv, w.WeekUv)
weekCartRatioNew := calculateRatio(w.WeekAddCartUv, w.WeekUv)
......@@ -549,24 +587,28 @@ func calculateWordABRank(w *Word) {
wordABRank := 10000.0
// 长度因子
wordABRank += 3000 * calculateLengthFactor(len(w.Keyword))
wordABRank += 2000 * calculateCountFactor(w.MonthUv, 1)
wordABRank += 2000 * calculateCountFactor(w.WeekUv, 4)
// 点击
// 年
// 月数量因子
wordABRank += 2000 * calculateCountFactor(w.MonthUv, 4)
// 周数量因子
wordABRank += 2000 * calculateCountFactor(w.WeekUv, 52)
// 年数量因子
wordABRank += 2000 * calculateCountFactor(w.YearCount, 1)
// 月
// 点击
// 月点击率因子
wordABRank += 3000 * calculateRatioFactor(monthClickRatio, w.MonthProductClickUv)
// 周
// 周点击率因子
wordABRank += 3000 * calculateRatioFactor(weekClickRatioNew, w.WeekUv)
// 加购
// 年
// 年加购率因子
wordABRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount)
// 月
// 月加购率因子
wordABRank += 3000 * calculateRatioFactor(monthCartRatio, w.MonthUv)
// 周
// 周加购率因子
wordABRank += 3000 * calculateRatioFactor(weekCartRatioNew, w.WeekUv)
if w.IsBrand { wordABRank *= 1.8 }
......
......@@ -16,16 +16,8 @@ type B struct {
}
func main() {
var arr = strings.Split("",",")
var s = ""
for i,leng := 0, len(arr); i< MAX_TAG_SIZE && i < leng; i++ {
if i == MAX_TAG_SIZE-1 || i == leng -1 {
s = s + arr[i]
} else {
s = s + arr[i] + ","
}
}
fmt.Print(s)
prefix := strings.HasPrefix("tod's", "tod's")
fmt.Print(prefix)
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment