Unverified Commit bca1a519 by David Star Committed by GitHub

Merge pull request #1 from yanchaosb123/rank_opt

算分优化
parents a59ff1f5 33b7d636
package main package main
import ( import (
"strings" "net/http"
"time"
"encoding/json"
"bytes"
"io/ioutil"
"container/list"
"strings"
"github.com/mozillazg/go-pinyin" "github.com/mozillazg/go-pinyin"
"fmt"
"strconv" "strconv"
"fmt"
) )
type ENV struct { type ENV struct {
...@@ -16,6 +22,13 @@ type ENV struct { ...@@ -16,6 +22,13 @@ type ENV struct {
ManualFolder string ManualFolder string
SensitiveFolder string SensitiveFolder string
} }
type Message struct {
Title string
Phones *list.List
Body *list.List
}
var test_env = &ENV{ var test_env = &ENV{
DataWareDB: "DataWarehouse_test:FihdZW7o1XKtDETZexOG@tcp(test01-secooDataWarehouse.master.com:3306)/secooDataWarehouse", DataWareDB: "DataWarehouse_test:FihdZW7o1XKtDETZexOG@tcp(test01-secooDataWarehouse.master.com:3306)/secooDataWarehouse",
ErpDB: "3306_test:iS6CXpYqgZ8Mhjui@tcp(10.4.3.223:3306)/secooErpDB", ErpDB: "3306_test:iS6CXpYqgZ8Mhjui@tcp(10.4.3.223:3306)/secooErpDB",
...@@ -37,12 +50,28 @@ var prod_env = &ENV { ...@@ -37,12 +50,28 @@ var prod_env = &ENV {
SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"} SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"}
// 重要,该参数 确定是 正式还是 测试环境 // 重要,该参数 确定是 正式还是 测试环境
var RUN_ENV = test_env var RUN_ENV = prod_env
/************************* 下面是 util 方法 *****************************/ /************************* 下面是 util 方法 *****************************/
var CH_EN_PUNC = map[string]string {
",":",",
"。":".",
"!":"!",
"?":"?",
"【":"[",
"】":"]",
"(":"(",
")":")",
"‘":"'",
"’":"'",
"“":"\"",
"”":"\"",
}
func convertToPinyin(str string) string { func convertToPinyin(str string) string {
var ret string var ret string
for _, v := range str { for _, v := range str {
...@@ -62,7 +91,7 @@ func convertToPinyin(str string) string { ...@@ -62,7 +91,7 @@ func convertToPinyin(str string) string {
func cleanKeyword(keyword string) string { func cleanKeyword(keyword string) string {
out, err := t2s.Convert(keyword) out, err := t2s.Convert(keyword)
if err != nil { fmt.Println(err) } if err != nil { fmt.Println(err) }
keyword = strings.ToLower(strings.Trim(DBC2SBC(strings.TrimSpace(out)),"\ufffc|,")) keyword = strings.ToLower(strings.Trim(DBC2SBC(strings.TrimSpace(out)),"\ufffc|,|."))
return strings.Join(strings.Fields(keyword)," ") return strings.Join(strings.Fields(keyword)," ")
} }
...@@ -78,16 +107,58 @@ func DBC2SBC(s string) string { ...@@ -78,16 +107,58 @@ func DBC2SBC(s string) string {
var strLst []string var strLst []string
for _, i := range s { for _, i := range s {
insideCode := i insideCode := i
if insideCode == 12288 { if insideCode == 12288 {
insideCode = 32 insideCode = 32
} else { } else {
insideCode -= 65248 insideCode -= 65248
} }
if insideCode < 32 || insideCode > 126 {
if key,exist := CH_EN_PUNC[string(i)]; exist {
strLst = append(strLst, key)
} else if insideCode < 32 || insideCode > 126 {
strLst = append(strLst, string(i)) strLst = append(strLst, string(i))
} else { } else {
strLst = append(strLst, string(insideCode)) strLst = append(strLst, string(insideCode))
} }
} }
return strings.Join(strLst, "") return strings.Join(strLst, "")
} }
\ No newline at end of file
// 发送POST请求
// url: 请求地址
// data: POST请求提交的数据
// contentType: 请求体格式,如:application/json
func Post(url string, data interface{}, contentType string) string {
// 超时时间:5秒
client := &http.Client{Timeout: 5 * time.Second}
jsonStr, _ := json.Marshal(data)
resp, err := client.Post(url, contentType, bytes.NewBuffer(jsonStr))
if err != nil {
panic(err)
}
defer resp.Body.Close()
result, _ := ioutil.ReadAll(resp.Body)
return string(result)
}
func sendSuggestNotify() {
msg := Message{
Title:"提示词数据异常",
}
body := list.New()
body.PushBack("提示词数据太少")
msg.Body = body
phones := list.New()
phones.PushBack("17621863255,13894895183")
msg.Phones = phones
Post("http://matrix-inform.secoolocal.com/user/sendToUser", msg,"application/json")
}
...@@ -3,8 +3,7 @@ package main ...@@ -3,8 +3,7 @@ package main
import ( import (
"bufio" "bufio"
"context" "context"
"crypto/md5" "database/sql"
"database/sql"
"fmt" "fmt"
_ "github.com/go-sql-driver/mysql" _ "github.com/go-sql-driver/mysql"
"github.com/liuzl/gocc" "github.com/liuzl/gocc"
...@@ -19,7 +18,9 @@ import ( ...@@ -19,7 +18,9 @@ import (
"time" "time"
"unicode" "unicode"
"unicode/utf8" "unicode/utf8"
) "crypto/md5"
)
type Word struct { type Word struct {
Keyword string `json:"keyword"` Keyword string `json:"keyword"`
...@@ -42,6 +43,18 @@ type Word struct { ...@@ -42,6 +43,18 @@ type Word struct {
WordRank float64 `json:"wordRank"` WordRank float64 `json:"wordRank"`
WordABRank float64 `json:"wordABRank"` WordABRank float64 `json:"wordABRank"`
KeywordVersion string `json:"keywordVersion"` KeywordVersion string `json:"keywordVersion"`
WeekUv int32 `json:"-"`
WeekClickUv int32 `json:"-"`
WeekAddCartUv int32 `json:"-"`
MonthPv int32 `json:"-"`
MonthClickCount int32 `json:"-"`
MonthAddCartCount int32 `json:"-"`
MonthUv int32 `json:"-"`
MonthProductClickUv int32 `json:"-"`
MonthAddCartUv int32 `json:"-"`
SuggestTags string `json:"suggestTags"`
UpdateTime int64 `json:"updateTime"`
} }
var wordMap sync.Map var wordMap sync.Map
...@@ -55,18 +68,25 @@ var dateStr = fmt.Sprintf("%d-%02d-%02d",now.Year(),now.Month(),now.Day()) ...@@ -55,18 +68,25 @@ var dateStr = fmt.Sprintf("%d-%02d-%02d",now.Year(),now.Month(),now.Day())
var t2s, _ = gocc.New("t2s") var t2s, _ = gocc.New("t2s")
var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabbana", var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabbana",
"dolce & gabbana", "杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球", "cuccl", "gucii"} "dolce & gabbana", "杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球", "cuccl", "gucii","tod's","iwc7" }
const TABLE_SPLIT_STEP_SIZE = 10000 const TABLE_SPLIT_STEP_SIZE = 10000
const LEVEL_SIZE = 1
const MAX_TAG_SIZE = 5
var UPDATE_TIME = time.Now().UnixNano() / 1e6
func main() { func main() {
startTime := time.Now() startTime := time.Now()
datawareDB, err := sql.Open("mysql", RUN_ENV.DataWareDB) datawareDB, err := sql.Open("mysql", RUN_ENV.DataWareDB)
if err != nil { log.Print(err.Error()) } if err != nil { log.Print(err.Error()) }
datawareDB.SetConnMaxLifetime(10*time.Minute) datawareDB.SetConnMaxLifetime(10*time.Minute)
datawareDB.SetMaxOpenConns(50) datawareDB.SetMaxOpenConns(350)
datawareDB.SetMaxIdleConns(50) datawareDB.SetMaxIdleConns(100)
var client *elastic.Client var client *elastic.Client
if RUN_ENV.EsUser != "" { if RUN_ENV.EsUser != "" {
...@@ -78,8 +98,11 @@ func main() { ...@@ -78,8 +98,11 @@ func main() {
bulkProcessor, err := elastic.NewBulkProcessorService(client). bulkProcessor, err := elastic.NewBulkProcessorService(client).
Workers(50). Workers(50).
BulkActions(5000). BulkActions(2000).
FlushInterval(500*time.Millisecond). FlushInterval(500*time.Millisecond).
Backoff( elastic.NewExponentialBackoff(
time.Duration(10000)*time.Millisecond,
time.Duration(100000)*time.Millisecond) ).
After(after). After(after).
Do(context.Background()) Do(context.Background())
if err != nil { log.Print(err.Error()) } if err != nil { log.Print(err.Error()) }
...@@ -94,12 +117,21 @@ func main() { ...@@ -94,12 +117,21 @@ func main() {
count := arr[1] / TABLE_SPLIT_STEP_SIZE count := arr[1] / TABLE_SPLIT_STEP_SIZE
log.Printf("maxId/10000=%d\n", count) log.Printf("maxId/10000=%d\n", count)
for i := 0; i <= count; i++ { if arr[1] < 2800000 {
go queryIndex(i * TABLE_SPLIT_STEP_SIZE, datawareDB, bulkProcessor, &wg) log.Printf("data is too little ,return")
sendSuggestNotify()
return
}
for j := 0; j < count; j++ {
wg.Add(1)
go queryIndex( j *TABLE_SPLIT_STEP_SIZE, datawareDB, bulkProcessor, &wg)
} }
wg.Wait() wg.Wait()
fmt.Println("all thread has read maps") fmt.Println("all thread has read maps")
checkUnusedData(bulkProcessor) checkUnusedData(bulkProcessor)
err = bulkProcessor.Flush() err = bulkProcessor.Flush()
...@@ -130,10 +162,29 @@ func checkUnusedData(bulkProcessor *elastic.BulkProcessor) { ...@@ -130,10 +162,29 @@ func checkUnusedData(bulkProcessor *elastic.BulkProcessor) {
} }
} }
/** 写入 es 前做下字段清理 */
func cleanForEs(w *Word) {
if w.SuggestTags == "null" || w.SuggestTags == "NULL" {
w.SuggestTags = ""
return
}
var arr = strings.Split(w.SuggestTags,",")
var s = ""
for i,leng := 0, len(arr); i< MAX_TAG_SIZE && i < leng; i++ {
if i == MAX_TAG_SIZE-1 || i == leng -1 {
s = s + arr[i]
} else {
s = s + arr[i] + ","
}
}
w.SuggestTags = s
}
func addWord(w *Word, processor *elastic.BulkProcessor) { func addWord(w *Word, processor *elastic.BulkProcessor) {
processWord(w) processWord(w)
if !isFilterWord(w) { if !isFilterWord(w) {
wordMap.Store(w.Keyword,w) wordMap.Store(w.Keyword,w)
cleanForEs(w)
id := fmt.Sprintf("%x", md5.Sum([]byte(w.Keyword))) id := fmt.Sprintf("%x", md5.Sum([]byte(w.Keyword)))
req := elastic.NewBulkIndexRequest(). req := elastic.NewBulkIndexRequest().
Index("search_suggest_index"). Index("search_suggest_index").
...@@ -149,17 +200,25 @@ func loadErpDB() { ...@@ -149,17 +200,25 @@ func loadErpDB() {
if err != nil { log.Print(err.Error()) } if err != nil { log.Print(err.Error()) }
defer db.Close() defer db.Close()
var brandQuery = fmt.Sprintf("select id,en_name,ch_name from secooErpDB.t_product_brand where is_del = 0 and enabled = 1") var brandQuery = fmt.Sprintf("select id,en_name,ch_name,short_name,nickname from secooErpDB.t_product_brand where is_del = 0 and enabled = 1")
brandResults, err := db.Query(brandQuery) brandResults, err := db.Query(brandQuery)
if err != nil { panic(err.Error()) } if err != nil { panic(err.Error()) }
for brandResults.Next() { for brandResults.Next() {
var id int var id int
var enName string var enName string
var chName string var chName string
err = brandResults.Scan(&id, &enName, &chName) var shortName sql.NullString
var nickName sql.NullString
err = brandResults.Scan(&id, &enName, &chName,&shortName,&nickName)
if err != nil { panic(err.Error()) } if err != nil { panic(err.Error()) }
brandMap[cleanKeyword(enName)] = id brandMap[cleanKeyword(enName)] = id
brandMap[cleanKeyword(chName)] = id brandMap[cleanKeyword(chName)] = id
if _,exist := brandMap[cleanKeyword(shortName.String)]; !exist {
brandMap[cleanKeyword(shortName.String)] = id
}
if _,exist := brandMap[cleanKeyword(nickName.String)]; !exist {
brandMap[cleanKeyword(nickName.String)] = id
}
} }
fmt.Println("brandMap size is :", len(brandMap), ", brandMap is ", brandMap) fmt.Println("brandMap size is :", len(brandMap), ", brandMap is ", brandMap)
...@@ -236,13 +295,21 @@ func queryInfo(db *sql.DB) []int { ...@@ -236,13 +295,21 @@ func queryInfo(db *sql.DB) []int {
} }
func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg *sync.WaitGroup) { func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg *sync.WaitGroup) {
wg.Add(1)
// 循环时可能查询到重复数据,应该以id 的上下界来查询 // 循环时可能查询到重复数据,应该以id 的上下界来查询
var sqlStr = fmt.Sprintf("select id, keyword, year_pv, year_product_click_count, year_add_cart_count, " + var sqlStr = fmt.Sprintf("select id, keyword, year_pv, year_product_click_count, year_add_cart_count, " +
"week_pv, week_product_click_count, week_add_cart_count, p_day from app_search_keyword_year_week_p_day where id >= %d and id < %d", idFlag, idFlag + TABLE_SPLIT_STEP_SIZE) "week_pv, week_product_click_count, week_add_cart_count, p_day, " +
"week_uv, week_product_click_uv, week_add_cart_uv, " +
"month_pv, month_product_click_count, month_add_cart_count, month_uv, month_product_click_uv, month_add_cart_uv, prepare_tags " +
"from app_search_keyword_year_week_p_day where id >= %d and id < %d", idFlag, idFlag + TABLE_SPLIT_STEP_SIZE)
log.Print(sqlStr)
results, err := db.Query(sqlStr) results, err := db.Query(sqlStr)
if err != nil { log.Print(err.Error()) } if err != nil { log.Print(err.Error()) }
log.Print("read database success ")
for results.Next() { for results.Next() {
var id int var id int
var keyword sql.NullString var keyword sql.NullString
...@@ -253,8 +320,37 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg ...@@ -253,8 +320,37 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
var weekProductClickCount sql.NullInt64 var weekProductClickCount sql.NullInt64
var weekAddCartCount sql.NullInt64 var weekAddCartCount sql.NullInt64
var pDay string var pDay string
var weekUv sql.NullInt64
var weekClickUv sql.NullInt64
var weekAddCartUv sql.NullInt64
var monthPv sql.NullInt64
var monthClickCount sql.NullInt64
var monthAddCartCount sql.NullInt64
var monthUv sql.NullInt64
var monthProductClickUv sql.NullInt64
var monthAddCartUv sql.NullInt64
var prepareTags sql.NullString
err = results.Scan(&id,
&keyword,
&yearPv,
&yearProductClickCount,
&yearAddCartCount,
&weekPv,
&weekProductClickCount,
&weekAddCartCount,
&pDay,
&weekUv,
&weekClickUv,
&weekAddCartUv,
&monthPv,
&monthClickCount,
&monthAddCartCount,
&monthUv,
&monthProductClickUv,
&monthAddCartUv,
&prepareTags)
err = results.Scan(&id, &keyword, &yearPv, &yearProductClickCount, &yearAddCartCount, &weekPv, &weekProductClickCount, &weekAddCartCount, &pDay)
if err != nil { log.Print(err.Error()) } if err != nil { log.Print(err.Error()) }
if keyword.Valid && len(keyword.String) > 0 && keyword.String != "" { if keyword.Valid && len(keyword.String) > 0 && keyword.String != "" {
...@@ -268,7 +364,18 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg ...@@ -268,7 +364,18 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
WeekCount: int32(weekPv.Int64), WeekCount: int32(weekPv.Int64),
WeekClickCount: int32(weekProductClickCount.Int64), WeekClickCount: int32(weekProductClickCount.Int64),
WeekCartCount: int32(weekAddCartCount.Int64), WeekCartCount: int32(weekAddCartCount.Int64),
KeywordVersion:pDay} KeywordVersion:pDay,
WeekUv: int32(weekUv.Int64),
WeekClickUv: int32(weekClickUv.Int64),
WeekAddCartUv: int32(weekAddCartUv.Int64),
MonthPv: int32(monthPv.Int64),
MonthClickCount: int32(monthClickCount.Int64),
MonthAddCartCount: int32(monthAddCartCount.Int64),
MonthUv: int32(monthUv.Int64),
MonthProductClickUv: int32(monthProductClickUv.Int64),
MonthAddCartUv: int32(monthAddCartUv.Int64) ,
SuggestTags: prepareTags.String,
UpdateTime: UPDATE_TIME}
if v, isExist := wordMap.Load(key); isExist { if v, isExist := wordMap.Load(key); isExist {
merge(w,v) merge(w,v)
...@@ -296,6 +403,19 @@ func merge(word *Word, v interface{}) { ...@@ -296,6 +403,19 @@ func merge(word *Word, v interface{}) {
word.WeekCount += t.WeekCount word.WeekCount += t.WeekCount
word.WeekCartCount += t.WeekCartCount word.WeekCartCount += t.WeekCartCount
word.WeekClickCount += t.WeekClickCount word.WeekClickCount += t.WeekClickCount
word.WeekUv += t.WeekUv
word.WeekClickUv += t.WeekClickUv
word.WeekAddCartUv += t.WeekAddCartUv
word.MonthPv += t.MonthPv
word.MonthClickCount += t.MonthClickCount
word.MonthAddCartCount += t.MonthAddCartCount
word.MonthUv += t.MonthUv
word.MonthProductClickUv += t.MonthProductClickUv
word.MonthAddCartUv += t.MonthAddCartUv
if len(word.SuggestTags) == 0 || "null" == word.SuggestTags || "NULL" == word.SuggestTags {
word.SuggestTags = t.SuggestTags
}
} }
func after(executionId int64, requests []elastic.BulkableRequest, response *elastic.BulkResponse, err error) { func after(executionId int64, requests []elastic.BulkableRequest, response *elastic.BulkResponse, err error) {
...@@ -304,22 +424,26 @@ func after(executionId int64, requests []elastic.BulkableRequest, response *elas ...@@ -304,22 +424,26 @@ func after(executionId int64, requests []elastic.BulkableRequest, response *elas
func processWord(w *Word) { func processWord(w *Word) {
w.KeywordPinYin = convertToPinyin(w.Keyword) w.KeywordPinYin = convertToPinyin(w.Keyword)
// 年点击加购率
w.YearClickRatio = calculateRatio(w.YearClickCount, w.YearCount) w.YearClickRatio = calculateRatio(w.YearClickCount, w.YearCount)
w.YearCartRatio = calculateRatio(w.YearCartCount, w.YearCount) w.YearCartRatio = calculateRatio(w.YearCartCount, w.YearCount)
// 周点击加购率
w.WeekClickRatio = calculateRatio(w.WeekClickCount, w.WeekCount) w.WeekClickRatio = calculateRatio(w.WeekClickCount, w.WeekCount)
w.WeekCartRatio = calculateRatio(w.WeekCartCount, w.WeekCount) w.WeekCartRatio = calculateRatio(w.WeekCartCount, w.WeekCount)
// 非默认值,加权 // 年加购率 再加权
if w.YearCount != 0 && w.YearCartCount != 0 { if w.YearCount != 0 && w.YearCartCount != 0 {
w.YearCartRatio *= 3 w.YearCartRatio *= 3
} }
// 非默认值,加权 // 周加购率 再加权
if w.WeekCount != 0 && w.WeekCartCount != 0 { if w.WeekCount != 0 && w.WeekCartCount != 0 {
w.WeekCartRatio *= 3 w.WeekCartRatio *= 3
} }
// 非默认值,加权 // 周点击率 再加权
if w.WeekCount != 0 && w.WeekClickCount != 0 { if w.WeekCount != 0 && w.WeekClickCount != 0 {
w.WeekClickRatio *= 2 w.WeekClickRatio *= 2
} }
...@@ -352,22 +476,22 @@ func isFilterWord(w *Word) bool { ...@@ -352,22 +476,22 @@ func isFilterWord(w *Word) bool {
if w.IsSensitive { return true } if w.IsSensitive { return true }
// 过滤掉太长的词 每个中文字占3个byte // 过滤掉太长的词 每个中文字占3个byte
if utf8.RuneCountInString(w.Keyword) <= 1 || len(w.Keyword) > 60 { return true } if utf8.RuneCountInString(w.Keyword) <= 1 || len(w.Keyword) > 50 { return true }
// 过滤掉商品id,商品id是有7位数字组成 // 过滤掉商品id,商品id是有7位数字组成
if len(w.Keyword) > 6 && isAllDigit(w.Keyword) { return true } if len(w.Keyword) > 6 && isAllDigit(w.Keyword) { return true }
// 品牌词 类目词 人工干预词 不做过滤
if w.IsBrand || w.IsCategory || w.IsManual { return false }
// 年数据过滤
if w.YearCount == 0 || w.YearClickCount == 0 { return true }
// 前缀过滤 // 前缀过滤
for _, v := range prefixFilterArr { for _, v := range prefixFilterArr {
if strings.HasPrefix(w.Keyword, v) { return true } if strings.HasPrefix(w.Keyword, v) { return true }
} }
// 品牌词 类目词 人工干预词 不做过滤
if w.IsBrand || w.IsCategory || w.IsManual { return false }
// 年数据过滤
if w.YearCount < 2 || w.YearClickCount < 2 { return true }
// 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5 // 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5
if isHotSearchWord(w) { if isHotSearchWord(w) {
// 搜索次数比较多 转化率或者点击率较高的 不过滤 // 搜索次数比较多 转化率或者点击率较高的 不过滤
...@@ -405,14 +529,22 @@ func calculateRatio(numerator int32, denominator int32) float64 { ...@@ -405,14 +529,22 @@ func calculateRatio(numerator int32, denominator int32) float64 {
} }
func calculateWordRank(w *Word) { func calculateWordRank(w *Word) {
wordRank := 10000.0 wordRank := 10000.0
// 长度因子
wordRank += 3000 * calculateLengthFactor(len(w.Keyword)) wordRank += 3000 * calculateLengthFactor(len(w.Keyword))
// 年数量因子
wordRank += 2000 * calculateCountFactor(w.YearCount, 1) wordRank += 2000 * calculateCountFactor(w.YearCount, 1)
// 周数量因子
wordRank += 2000 * calculateCountFactor(w.WeekCount, 52) wordRank += 2000 * calculateCountFactor(w.WeekCount, 52)
// 年点击率因子
wordRank += 3000 * calculateRatioFactor(w.YearClickRatio, w.YearClickCount) wordRank += 3000 * calculateRatioFactor(w.YearClickRatio, w.YearClickCount)
// 周点击率因子
wordRank += 3000 * calculateRatioFactor(w.WeekClickRatio, w.WeekClickCount) wordRank += 3000 * calculateRatioFactor(w.WeekClickRatio, w.WeekClickCount)
// 年加购率因子
wordRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount) wordRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount)
// 周加购率因子
wordRank += 3000 * calculateRatioFactor(w.WeekCartRatio, w.WeekCartCount) wordRank += 3000 * calculateRatioFactor(w.WeekCartRatio, w.WeekCartCount)
if w.IsBrand { wordRank *= 1.8 } if w.IsBrand { wordRank *= 1.8 }
if w.IsCategory { wordRank *= 1.2 } if w.IsCategory { wordRank *= 1.2 }
...@@ -421,19 +553,60 @@ func calculateWordRank(w *Word) { ...@@ -421,19 +553,60 @@ func calculateWordRank(w *Word) {
} }
func calculateWordABRank(w *Word) { func calculateWordABRank(w *Word) {
// 月点击加购率
monthClickRatio := calculateRatio(w.MonthProductClickUv, w.MonthUv)
monthCartRatio := calculateRatio(w.MonthAddCartUv, w.MonthUv)
// 周点击加购率(和A相比, count 换成了uv)
weekClickRatioNew := calculateRatio(w.WeekClickUv, w.WeekUv)
weekCartRatioNew := calculateRatio(w.WeekAddCartUv, w.WeekUv)
// 月点击
if w.MonthProductClickUv != 0 && w.MonthUv != 0 {
monthClickRatio *= 1.5
}
// 月加购,加权
if w.MonthAddCartUv != 0 && w.MonthUv != 0 {
monthCartRatio *= 3
}
// 周点击,加权
if w.WeekClickUv != 0 && w.WeekUv != 0 {
weekClickRatioNew *= 2
}
// 周加购,加权
if w.WeekAddCartUv != 0 && w.WeekUv != 0 {
weekCartRatioNew *= 3
}
wordABRank := 10000.0 wordABRank := 10000.0
// 长度因子
wordABRank += 3000 * calculateLengthFactor(len(w.Keyword)) wordABRank += 3000 * calculateLengthFactor(len(w.Keyword))
// 月数量因子
wordABRank += 2000 * calculateCountFactor(w.MonthUv, 4)
// 周数量因子
wordABRank += 2000 * calculateCountFactor(w.WeekUv, 52)
// 年数量因子
wordABRank += 2000 * calculateCountFactor(w.YearCount, 1) wordABRank += 2000 * calculateCountFactor(w.YearCount, 1)
wordABRank += 2000 * calculateCountFactor(w.WeekCount, 52)
// 点击 // 点击
// 年点击改为 2000 // 月点击率因子
wordABRank += 2000 * calculateRatioFactor(w.YearClickRatio, w.YearClickCount) wordABRank += 3000 * calculateRatioFactor(monthClickRatio, w.MonthProductClickUv)
wordABRank += 3000 * calculateRatioFactor(w.WeekClickRatio, w.WeekClickCount) // 周点击率因子
wordABRank += 3000 * calculateRatioFactor(weekClickRatioNew, w.WeekUv)
// 加购 // 加购
// 年加购率因子
wordABRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount) wordABRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount)
wordABRank += 3000 * calculateRatioFactor(w.WeekCartRatio, w.WeekCartCount) // 月加购率因子
wordABRank += 3000 * calculateRatioFactor(monthCartRatio, w.MonthUv)
// 周加购率因子
wordABRank += 3000 * calculateRatioFactor(weekCartRatioNew, w.WeekUv)
if w.IsBrand { wordABRank *= 1.8 } if w.IsBrand { wordABRank *= 1.8 }
if w.IsCategory { wordABRank *= 1.2 } if w.IsCategory { wordABRank *= 1.2 }
......
package main package main
import ( import (
"encoding/json" "math"
"strings"
"fmt" "fmt"
) )
...@@ -13,20 +14,29 @@ type B struct { ...@@ -13,20 +14,29 @@ type B struct {
YearCartCount int32 `json:"yearCartCount"` YearCartCount int32 `json:"yearCartCount"`
ZhaoCount int32 `json:"-"` ZhaoCount int32 `json:"-"`
} }
func main() { func main() {
b := B{
Keyword: "赵延超", prefix := strings.HasPrefix("tod's", "tod's")
KeywordPinYin: "zhaoyanchao", fmt.Print(prefix)
YearCount: 1000,
YearCartCount: 100, }
YearClickCount: 10,
ZhaoCount: 2}
func calculateRatioFactor2(ratio float64, count int32) float64 {
if jsonBytes,errs := json.Marshal(b); errs == nil { var rank float64
fmt.Print(string(jsonBytes)) switch {
case count > 1 && count < 10 : rank = 1.2
case count >= 10 && count < 20 : rank = 1.4
case count >= 20 && count < 50 : rank = 1.6
case count >= 50 && count < 100 : rank = 1.8
case count >= 100 && count < 200 : rank = 2.0
case count >= 200 && count < 500 : rank = 2.2
case count >= 500 : rank = 2.5
default:rank = 1.0
} }
//根据搜索转化率,转换为热度因子
return math.Log10(math.Sqrt(ratio + 10)) * rank
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment