Commit 388a7fe5 by zhaoyanchao

1. 标签字段写入es

2. 品牌识别时添加昵称,简称
parent c07c067c
...@@ -37,7 +37,7 @@ var prod_env = &ENV { ...@@ -37,7 +37,7 @@ var prod_env = &ENV {
SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"} SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"}
// 重要,该参数 确定是 正式还是 测试环境 // 重要,该参数 确定是 正式还是 测试环境
var RUN_ENV = test_env var RUN_ENV = prod_env
......
...@@ -52,6 +52,7 @@ type Word struct { ...@@ -52,6 +52,7 @@ type Word struct {
MonthUv int32 `json:"-"` MonthUv int32 `json:"-"`
MonthProductClickUv int32 `json:"-"` MonthProductClickUv int32 `json:"-"`
MonthAddCartUv int32 `json:"-"` MonthAddCartUv int32 `json:"-"`
SuggestTags string `json:"suggestTags"`
} }
var wordMap sync.Map var wordMap sync.Map
...@@ -69,6 +70,8 @@ var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabb ...@@ -69,6 +70,8 @@ var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabb
const TABLE_SPLIT_STEP_SIZE = 10000 const TABLE_SPLIT_STEP_SIZE = 10000
const MAX_TAG_SIZE = 5
func main() { func main() {
startTime := time.Now() startTime := time.Now()
...@@ -143,10 +146,29 @@ func checkUnusedData(bulkProcessor *elastic.BulkProcessor) { ...@@ -143,10 +146,29 @@ func checkUnusedData(bulkProcessor *elastic.BulkProcessor) {
} }
} }
/** 写入 es 前做下字段清理 */
func cleanForEs(w *Word) {
if w.SuggestTags == "null" || w.SuggestTags == "NULL" {
w.SuggestTags = ""
return
}
var arr = strings.Split(w.SuggestTags,",")
var s = ""
for i,leng := 0, len(arr); i< MAX_TAG_SIZE && i < leng; i++ {
if i == MAX_TAG_SIZE-1 || i == leng -1 {
s = s + arr[i]
} else {
s = s + arr[i] + ","
}
}
w.SuggestTags = s
}
func addWord(w *Word, processor *elastic.BulkProcessor) { func addWord(w *Word, processor *elastic.BulkProcessor) {
processWord(w) processWord(w)
if !isFilterWord(w) { if !isFilterWord(w) {
wordMap.Store(w.Keyword,w) wordMap.Store(w.Keyword,w)
cleanForEs(w)
id := fmt.Sprintf("%x", md5.Sum([]byte(w.Keyword))) id := fmt.Sprintf("%x", md5.Sum([]byte(w.Keyword)))
req := elastic.NewBulkIndexRequest(). req := elastic.NewBulkIndexRequest().
Index("search_suggest_index"). Index("search_suggest_index").
...@@ -162,17 +184,25 @@ func loadErpDB() { ...@@ -162,17 +184,25 @@ func loadErpDB() {
if err != nil { log.Print(err.Error()) } if err != nil { log.Print(err.Error()) }
defer db.Close() defer db.Close()
var brandQuery = fmt.Sprintf("select id,en_name,ch_name from secooErpDB.t_product_brand where is_del = 0 and enabled = 1") var brandQuery = fmt.Sprintf("select id,en_name,ch_name,short_name,nickname from secooErpDB.t_product_brand where is_del = 0 and enabled = 1")
brandResults, err := db.Query(brandQuery) brandResults, err := db.Query(brandQuery)
if err != nil { panic(err.Error()) } if err != nil { panic(err.Error()) }
for brandResults.Next() { for brandResults.Next() {
var id int var id int
var enName string var enName string
var chName string var chName string
err = brandResults.Scan(&id, &enName, &chName) var shortName sql.NullString
var nickName sql.NullString
err = brandResults.Scan(&id, &enName, &chName,&shortName,&nickName)
if err != nil { panic(err.Error()) } if err != nil { panic(err.Error()) }
brandMap[cleanKeyword(enName)] = id brandMap[cleanKeyword(enName)] = id
brandMap[cleanKeyword(chName)] = id brandMap[cleanKeyword(chName)] = id
if _,exist := brandMap[cleanKeyword(shortName.String)]; !exist {
brandMap[cleanKeyword(shortName.String)] = id
}
if _,exist := brandMap[cleanKeyword(nickName.String)]; !exist {
brandMap[cleanKeyword(nickName.String)] = id
}
} }
fmt.Println("brandMap size is :", len(brandMap), ", brandMap is ", brandMap) fmt.Println("brandMap size is :", len(brandMap), ", brandMap is ", brandMap)
...@@ -254,7 +284,7 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg ...@@ -254,7 +284,7 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
var sqlStr = fmt.Sprintf("select id, keyword, year_pv, year_product_click_count, year_add_cart_count, " + var sqlStr = fmt.Sprintf("select id, keyword, year_pv, year_product_click_count, year_add_cart_count, " +
"week_pv, week_product_click_count, week_add_cart_count, p_day, " + "week_pv, week_product_click_count, week_add_cart_count, p_day, " +
"week_uv, week_product_click_uv, week_add_cart_uv, " + "week_uv, week_product_click_uv, week_add_cart_uv, " +
"month_pv, month_product_click_count, month_add_cart_count, month_uv, month_product_click_uv, month_add_cart_uv " + "month_pv, month_product_click_count, month_add_cart_count, month_uv, month_product_click_uv, month_add_cart_uv, prepare_tags " +
"from app_search_keyword_year_week_p_day where id >= %d and id < %d", idFlag, idFlag + TABLE_SPLIT_STEP_SIZE) "from app_search_keyword_year_week_p_day where id >= %d and id < %d", idFlag, idFlag + TABLE_SPLIT_STEP_SIZE)
results, err := db.Query(sqlStr) results, err := db.Query(sqlStr)
if err != nil { log.Print(err.Error()) } if err != nil { log.Print(err.Error()) }
...@@ -278,6 +308,7 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg ...@@ -278,6 +308,7 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
var monthUv sql.NullInt64 var monthUv sql.NullInt64
var monthProductClickUv sql.NullInt64 var monthProductClickUv sql.NullInt64
var monthAddCartUv sql.NullInt64 var monthAddCartUv sql.NullInt64
var prepareTags sql.NullString
err = results.Scan(&id, err = results.Scan(&id,
&keyword, &keyword,
...@@ -296,13 +327,16 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg ...@@ -296,13 +327,16 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
&monthAddCartCount, &monthAddCartCount,
&monthUv, &monthUv,
&monthProductClickUv, &monthProductClickUv,
&monthAddCartUv) &monthAddCartUv,
&prepareTags)
if err != nil { log.Print(err.Error()) } if err != nil { log.Print(err.Error()) }
if keyword.Valid && len(keyword.String) > 0 && keyword.String != "" { if keyword.Valid && len(keyword.String) > 0 && keyword.String != "" {
key := cleanKeyword(keyword.String) key := cleanKeyword(keyword.String)
var w = &Word{ var w = &Word{
Keyword:key, Keyword:key,
YearCount: int32(yearPv.Int64), YearCount: int32(yearPv.Int64),
...@@ -320,7 +354,8 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg ...@@ -320,7 +354,8 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
MonthAddCartCount: int32(monthAddCartCount.Int64), MonthAddCartCount: int32(monthAddCartCount.Int64),
MonthUv: int32(monthUv.Int64), MonthUv: int32(monthUv.Int64),
MonthProductClickUv: int32(monthProductClickUv.Int64), MonthProductClickUv: int32(monthProductClickUv.Int64),
MonthAddCartUv: int32(monthAddCartUv.Int64) } MonthAddCartUv: int32(monthAddCartUv.Int64) ,
SuggestTags: prepareTags.String}
if v, isExist := wordMap.Load(key); isExist { if v, isExist := wordMap.Load(key); isExist {
merge(w,v) merge(w,v)
...@@ -358,6 +393,9 @@ func merge(word *Word, v interface{}) { ...@@ -358,6 +393,9 @@ func merge(word *Word, v interface{}) {
word.MonthUv += t.MonthUv word.MonthUv += t.MonthUv
word.MonthProductClickUv += t.MonthProductClickUv word.MonthProductClickUv += t.MonthProductClickUv
word.MonthAddCartUv += t.MonthAddCartUv word.MonthAddCartUv += t.MonthAddCartUv
if len(word.SuggestTags) == 0 || "null" == word.SuggestTags || "NULL" == word.SuggestTags {
word.SuggestTags = t.SuggestTags
}
} }
func after(executionId int64, requests []elastic.BulkableRequest, response *elastic.BulkResponse, err error) { func after(executionId int64, requests []elastic.BulkableRequest, response *elastic.BulkResponse, err error) {
......
package main package main
import ( import (
"fmt"
"math" "math"
"strings"
"fmt"
) )
type B struct { type B struct {
...@@ -13,13 +14,19 @@ type B struct { ...@@ -13,13 +14,19 @@ type B struct {
YearCartCount int32 `json:"yearCartCount"` YearCartCount int32 `json:"yearCartCount"`
ZhaoCount int32 `json:"-"` ZhaoCount int32 `json:"-"`
} }
func main() { func main() {
fmt.Println(calculateRatioFactor2(0,0)) var arr = strings.Split("",",")
var s = ""
for i,leng := 0, len(arr); i< MAX_TAG_SIZE && i < leng; i++ {
if i == MAX_TAG_SIZE-1 || i == leng -1 {
s = s + arr[i]
} else {
s = s + arr[i] + ","
}
}
fmt.Print(s)
fmt.Println(calculateRatioFactor2(0.2,1))
fmt.Println(calculateRatioFactor2(0.2,10))
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment