Commit cadd36e7 by zhaoyanchao

添加了 AB 分数,适配了数据库的更改而不影响 原逻辑

parent c70de180
...@@ -19,11 +19,13 @@ type ENV struct { ...@@ -19,11 +19,13 @@ type ENV struct {
var test_env = &ENV{ var test_env = &ENV{
DataWareDB: "DataWarehouse_test:FihdZW7o1XKtDETZexOG@tcp(test01-secooDataWarehouse.master.com:3306)/secooDataWarehouse", DataWareDB: "DataWarehouse_test:FihdZW7o1XKtDETZexOG@tcp(test01-secooDataWarehouse.master.com:3306)/secooDataWarehouse",
ErpDB: "3306_test:iS6CXpYqgZ8Mhjui@tcp(10.4.3.223:3306)/secooErpDB", ErpDB: "3306_test:iS6CXpYqgZ8Mhjui@tcp(10.4.3.223:3306)/secooErpDB",
EsInfo: "http://localhost:9200", EsInfo: "http://10.0.254.139:9200",
EsUser: "", EsUser: "suggest",
EsPassword: "", EsPassword: "suggest456",
ManualFolder: "D:\\DataFiles\\suggest_corpus-20180801\\manual", //ManualFolder: "D:\\DataFiles\\suggest_corpus-20180801\\manual",
SensitiveFolder: "D:\\DataFiles\\suggest_corpus-20180801\\sensitive"} //SensitiveFolder: "D:\\DataFiles\\suggest_corpus-20180801\\sensitive"
ManualFolder: "/data/pssmaster/corpus_set/suggest_corpus/manual",
SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"}
var prod_env = &ENV { var prod_env = &ENV {
DataWareDB: "Search_DataWar_R:pY1P9zUj9x1M65ot5szo@tcp(secooDataWarehouse.slave.com:3306)/secooDataWarehouse", DataWareDB: "Search_DataWar_R:pY1P9zUj9x1M65ot5szo@tcp(secooDataWarehouse.slave.com:3306)/secooDataWarehouse",
...@@ -34,8 +36,8 @@ var prod_env = &ENV { ...@@ -34,8 +36,8 @@ var prod_env = &ENV {
ManualFolder: "/data/pssmaster/corpus_set/suggest_corpus/manual", ManualFolder: "/data/pssmaster/corpus_set/suggest_corpus/manual",
SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"} SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"}
// 重要,该参数 确定是 正式还是 测试环境
var RUN_ENV = prod_env var RUN_ENV = test_env
......
...@@ -24,12 +24,12 @@ import ( ...@@ -24,12 +24,12 @@ import (
type Word struct { type Word struct {
Keyword string `json:"keyword"` Keyword string `json:"keyword"`
KeywordPinYin string `json:"keywordPinYin"` KeywordPinYin string `json:"keywordPinYin"`
YearCount int32 `json:"yearCount"`
YearClickCount int32 `json:"yearClickCount"` YearClickCount int32 `json:"yearClickCount"`
YearCartCount int32 `json:"yearCartCount"` YearCartCount int32 `json:"yearCartCount"`
YearCount int32 `json:"yearCount"` WeekCount int32 `json:"weekCount"`
WeekClickCount int32 `json:"weekClickCount"` WeekClickCount int32 `json:"weekClickCount"`
WeekCartCount int32 `json:"weekCartCount"` WeekCartCount int32 `json:"weekCartCount"`
WeekCount int32 `json:"weekCount"`
YearClickRatio float64 `json:"yearClickRatio"` YearClickRatio float64 `json:"yearClickRatio"`
YearCartRatio float64 `json:"yearCartRatio"` YearCartRatio float64 `json:"yearCartRatio"`
WeekClickRatio float64 `json:"weekClickRatio"` WeekClickRatio float64 `json:"weekClickRatio"`
...@@ -40,6 +40,7 @@ type Word struct { ...@@ -40,6 +40,7 @@ type Word struct {
IsSensitive bool `json:"isSensitive"` IsSensitive bool `json:"isSensitive"`
ManualValue int32 `json:"manualValue"` ManualValue int32 `json:"manualValue"`
WordRank float64 `json:"wordRank"` WordRank float64 `json:"wordRank"`
WordABRank float64 `json:"wordABRank"`
KeywordVersion string `json:"keywordVersion"` KeywordVersion string `json:"keywordVersion"`
} }
...@@ -50,15 +51,13 @@ var manualMap = make(map[string]int32) ...@@ -50,15 +51,13 @@ var manualMap = make(map[string]int32)
var sensitiveMap = make(map[string]bool) var sensitiveMap = make(map[string]bool)
var now = time.Now() var now = time.Now()
var dateStr = fmt.Sprintf("%d-%d-%d",now.Year(),now.Month(),now.Day()) var dateStr = fmt.Sprintf("%d-%02d-%02d",now.Year(),now.Month(),now.Day())
var t2s, _ = gocc.New("t2s") var t2s, _ = gocc.New("t2s")
var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabbana", var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabbana",
"dolce & gabbana", "杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球", "cuccl", "gucii"} "dolce & gabbana", "杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球", "cuccl", "gucii"}
// 记录是否是在 读历史记录 中, 读完后设为false const TABLE_SPLIT_STEP_SIZE = 10000
var readhistory = true
func main() { func main() {
startTime := time.Now() startTime := time.Now()
...@@ -92,16 +91,15 @@ func main() { ...@@ -92,16 +91,15 @@ func main() {
var wg sync.WaitGroup var wg sync.WaitGroup
arr := queryInfo(datawareDB) arr := queryInfo(datawareDB)
count := arr[1] / 10000 count := arr[1] / TABLE_SPLIT_STEP_SIZE
log.Printf("maxId/10000=%d\n", count) log.Printf("maxId/10000=%d\n", count)
for i := 0; i <= count; i++ { for i := 0; i <= count; i++ {
go queryIndex(i*10000, datawareDB, bulkProcessor, &wg) go queryIndex(i * TABLE_SPLIT_STEP_SIZE, datawareDB, bulkProcessor, &wg)
} }
wg.Wait() wg.Wait()
fmt.Println("all thread has read maps") fmt.Println("all thread has read maps")
readhistory = false
checkUnusedData(bulkProcessor) checkUnusedData(bulkProcessor)
err = bulkProcessor.Flush() err = bulkProcessor.Flush()
...@@ -141,6 +139,7 @@ func addWord(w *Word, processor *elastic.BulkProcessor) { ...@@ -141,6 +139,7 @@ func addWord(w *Word, processor *elastic.BulkProcessor) {
Index("search_suggest_index"). Index("search_suggest_index").
Type("search_suggest_type").Id(id).Doc(w) Type("search_suggest_type").Id(id).Doc(w)
processor.Add(req) processor.Add(req)
fmt.Println("add to es: " , w)
} }
} }
...@@ -162,6 +161,7 @@ func loadErpDB() { ...@@ -162,6 +161,7 @@ func loadErpDB() {
brandMap[cleanKeyword(enName)] = id brandMap[cleanKeyword(enName)] = id
brandMap[cleanKeyword(chName)] = id brandMap[cleanKeyword(chName)] = id
} }
fmt.Println("brandMap size is :", len(brandMap), ", brandMap is ", brandMap)
var categoryQuery = fmt.Sprintf("select id,name from secooErpDB.t_product_category where is_del = 0 and enabled = 1") var categoryQuery = fmt.Sprintf("select id,name from secooErpDB.t_product_category where is_del = 0 and enabled = 1")
categoryResults, err := db.Query(categoryQuery) categoryResults, err := db.Query(categoryQuery)
...@@ -173,9 +173,7 @@ func loadErpDB() { ...@@ -173,9 +173,7 @@ func loadErpDB() {
if err != nil { panic(err.Error()) } if err != nil { panic(err.Error()) }
categoryMap[cleanKeyword(name)] = id categoryMap[cleanKeyword(name)] = id
} }
fmt.Println("categoryMap size is:", len(categoryMap), ", categoryMap is ", categoryMap)
fmt.Println(brandMap)
fmt.Println(categoryMap)
} }
func loadManual(folder string) { func loadManual(folder string) {
...@@ -184,7 +182,6 @@ func loadManual(folder string) { ...@@ -184,7 +182,6 @@ func loadManual(folder string) {
if !file.IsDir() { if !file.IsDir() {
fi, err := os.Open(folder + "/" + file.Name()) fi, err := os.Open(folder + "/" + file.Name())
if err != nil { fmt.Print(err) } if err != nil { fmt.Print(err) }
fmt.Println(file.Name())
br := bufio.NewReader(fi) br := bufio.NewReader(fi)
for { for {
...@@ -196,8 +193,7 @@ func loadManual(folder string) { ...@@ -196,8 +193,7 @@ func loadManual(folder string) {
} }
} }
} }
fmt.Println("manualMap loaded") fmt.Println("manualMap size is : ",len(manualMap),", manual map is:", manualMap)
fmt.Println(manualMap)
} }
func loadSensitive(folder string) { func loadSensitive(folder string) {
...@@ -219,7 +215,7 @@ func loadSensitive(folder string) { ...@@ -219,7 +215,7 @@ func loadSensitive(folder string) {
} }
} }
} }
fmt.Println(sensitiveMap) fmt.Println("sensitive Map:", sensitiveMap)
} }
func queryInfo(db *sql.DB) []int { func queryInfo(db *sql.DB) []int {
...@@ -242,7 +238,8 @@ func queryInfo(db *sql.DB) []int { ...@@ -242,7 +238,8 @@ func queryInfo(db *sql.DB) []int {
func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg *sync.WaitGroup) { func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg *sync.WaitGroup) {
wg.Add(1) wg.Add(1)
// 循环时可能查询到重复数据,应该以id 的上下界来查询 // 循环时可能查询到重复数据,应该以id 的上下界来查询
var sqlStr = fmt.Sprintf("select * from app_search_keyword_year_week_p_day where id >= %d and id < %d", idFlag, idFlag + 10000) var sqlStr = fmt.Sprintf("select id, keyword, year_pv, year_product_click_count, year_add_cart_count, " +
"week_pv, week_product_click_count, week_add_cart_count, p_day from app_search_keyword_year_week_p_day where id >= %d and id < %d", idFlag, idFlag + TABLE_SPLIT_STEP_SIZE)
results, err := db.Query(sqlStr) results, err := db.Query(sqlStr)
if err != nil { log.Print(err.Error()) } if err != nil { log.Print(err.Error()) }
...@@ -264,8 +261,10 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg ...@@ -264,8 +261,10 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
key := cleanKeyword(keyword.String) key := cleanKeyword(keyword.String)
var w = &Word{ var w = &Word{
Keyword:key,YearCount: int32(yearPv.Int64), Keyword:key,
YearCount: int32(yearPv.Int64),
YearClickCount:int32(yearProductClickCount.Int64), YearClickCount:int32(yearProductClickCount.Int64),
YearCartCount:int32(yearAddCartCount.Int64),
WeekCount: int32(weekPv.Int64), WeekCount: int32(weekPv.Int64),
WeekClickCount: int32(weekProductClickCount.Int64), WeekClickCount: int32(weekProductClickCount.Int64),
WeekCartCount: int32(weekAddCartCount.Int64), WeekCartCount: int32(weekAddCartCount.Int64),
...@@ -343,6 +342,7 @@ func processWord(w *Word) { ...@@ -343,6 +342,7 @@ func processWord(w *Word) {
} }
calculateWordRank(w) calculateWordRank(w)
calculateWordABRank(w)
} }
...@@ -409,6 +409,7 @@ func calculateWordRank(w *Word) { ...@@ -409,6 +409,7 @@ func calculateWordRank(w *Word) {
wordRank += 3000 * calculateLengthFactor(len(w.Keyword)) wordRank += 3000 * calculateLengthFactor(len(w.Keyword))
wordRank += 2000 * calculateCountFactor(w.YearCount, 1) wordRank += 2000 * calculateCountFactor(w.YearCount, 1)
wordRank += 2000 * calculateCountFactor(w.WeekCount, 52) wordRank += 2000 * calculateCountFactor(w.WeekCount, 52)
wordRank += 3000 * calculateRatioFactor(w.YearClickRatio, w.YearClickCount) wordRank += 3000 * calculateRatioFactor(w.YearClickRatio, w.YearClickCount)
wordRank += 3000 * calculateRatioFactor(w.WeekClickRatio, w.WeekClickCount) wordRank += 3000 * calculateRatioFactor(w.WeekClickRatio, w.WeekClickCount)
wordRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount) wordRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount)
...@@ -419,6 +420,27 @@ func calculateWordRank(w *Word) { ...@@ -419,6 +420,27 @@ func calculateWordRank(w *Word) {
w.WordRank = wordRank w.WordRank = wordRank
} }
func calculateWordABRank(w *Word) {
wordABRank := 10000.0
wordABRank += 3000 * calculateLengthFactor(len(w.Keyword))
wordABRank += 2000 * calculateCountFactor(w.YearCount, 1)
wordABRank += 2000 * calculateCountFactor(w.WeekCount, 52)
// 点击
// 年点击改为 2000
wordABRank += 2000 * calculateRatioFactor(w.YearClickRatio, w.YearClickCount)
wordABRank += 3000 * calculateRatioFactor(w.WeekClickRatio, w.WeekClickCount)
// 加购
wordABRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount)
wordABRank += 3000 * calculateRatioFactor(w.WeekCartRatio, w.WeekCartCount)
if w.IsBrand { wordABRank *= 1.8 }
if w.IsCategory { wordABRank *= 1.2 }
if w.IsManual && w.ManualValue > 0 { wordABRank *= math.Sqrt(float64(w.ManualValue)) }
w.WordABRank = wordABRank
}
func calculateLengthFactor(length int) float64 { func calculateLengthFactor(length int) float64 {
//根据文本长度转换为长度因子 //根据文本长度转换为长度因子
return float64(1.0 / float64(2 * length + 1)) return float64(1.0 / float64(2 * length + 1))
......
package main package main
import ( import (
"sync" "encoding/json"
"fmt" "fmt"
"github.com/liuzl/gocc"
) )
var tmap sync.Map type B struct {
var t2s1, _ = gocc.New("t2s") Keyword string `json:"keyword"`
func main() { KeywordPinYin string `json:"keywordPinYin"`
YearCount int32 `json:"yearCount"`
YearClickCount int32 `json:"yearClickCount"`
//var _, err = t2s.Convert("中國") YearCartCount int32 `json:"yearCartCount"`
//if err != nil { fmt.Println("succ")} ZhaoCount int32 `json:"-"`
//var s = "意尔康 男 鞋"
//var re, _ = regexp.Compile("\\s+")
//var st = re.ReplaceAllLiteralString(s," ")
//fields := strings.Fields(s)
//t := time.Now()
//fmt.Println(factorial(5))
//fmt.Print(time.Now().Unix() -t.Unix() )
//var lst = new(list.List)
//for i := 1; i < 10 ; i++ {
// lst.PushBack(i)
//}
//for p := lst.Front(); p != nil ; p = p.Next() {
// fmt.Println(p.Value)
//}
var s = cleanKeyword("zhong ")
fmt.Print(s)
}
func add() {
tmap.Store("a","b")
var val,_ = tmap.Load("a")
fmt.Print(val)
} }
//func cleanKeyword(keyword string) string { func main() {
// out, err := t2s1.Convert(keyword) b := B{
// if err != nil { fmt.Println(err) } Keyword: "赵延超",
// keyword = strings.ToLower(strings.Trim(DBC2SBC(strings.TrimSpace(out)),"\ufffc|,")) KeywordPinYin: "zhaoyanchao",
// return strings.Join(strings.Fields(keyword)," ") YearCount: 1000,
//} YearCartCount: 100,
// YearClickCount: 10,
//// 全角转半角 ZhaoCount: 2}
//func DBC2SBC(s string) string {
// var strLst []string if jsonBytes,errs := json.Marshal(b); errs == nil {
// for _, i := range s { fmt.Print(string(jsonBytes))
// insideCode := i }
// if insideCode == 12288 {
// insideCode = 32
// } else {
// insideCode -= 65248
// }
// if insideCode < 32 || insideCode > 126 {
// strLst = append(strLst, string(i))
// } else {
// strLst = append(strLst, string(insideCode))
// }
// }
// return strings.Join(strLst, "")
//}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment