Unverified Commit 6abf6e24 by David Star Committed by GitHub

Merge pull request #2 from yanchaosb123/rank_opt

敏感词放后
parents bca1a519 af4c8ed7
......@@ -70,9 +70,9 @@ var t2s, _ = gocc.New("t2s")
var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabbana",
"dolce & gabbana", "杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球", "cuccl", "gucii","tod's","iwc7" }
const TABLE_SPLIT_STEP_SIZE = 10000
const LEVEL_SIZE = 1
const TABLE_SPLIT_STEP_SIZE = 10000
const MAX_TAG_SIZE = 5
......@@ -80,6 +80,7 @@ var UPDATE_TIME = time.Now().UnixNano() / 1e6
func main() {
startTime := time.Now()
log.SetFlags(log.Lshortfile | log.LstdFlags)
datawareDB, err := sql.Open("mysql", RUN_ENV.DataWareDB)
......@@ -117,7 +118,7 @@ func main() {
count := arr[1] / TABLE_SPLIT_STEP_SIZE
log.Printf("maxId/10000=%d\n", count)
if arr[1] < 2800000 {
if arr[1] < 1000000 {
log.Printf("data is too little ,return")
sendSuggestNotify()
return
......@@ -182,6 +183,7 @@ func cleanForEs(w *Word) {
func addWord(w *Word, processor *elastic.BulkProcessor) {
processWord(w)
if !isFilterWord(w) {
wordMap.Store(w.Keyword,w)
cleanForEs(w)
......@@ -307,7 +309,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
results, err := db.Query(sqlStr)
if err != nil { log.Print(err.Error()) }
log.Print("read database success ")
for results.Next() {
......@@ -467,30 +468,48 @@ func processWord(w *Word) {
calculateWordRank(w)
calculateWordABRank(w)
addNewScoreIfNewHotWord(w)
}
func isFilterWord(w *Word) bool {
// 品牌词 类目词 人工干预词 不做过滤
if w.IsBrand || w.IsCategory || w.IsManual {
w.IsSensitive = false
return false
}
// 敏感词过滤
if w.IsSensitive { return true }
if w.IsSensitive {
return true
}
// 过滤掉太长的词 每个中文字占3个byte
if utf8.RuneCountInString(w.Keyword) <= 1 || len(w.Keyword) > 50 { return true }
if utf8.RuneCountInString(w.Keyword) <= 1 || len(w.Keyword) > 50 {
return true
}
// 过滤掉商品id,商品id是有7位数字组成
if len(w.Keyword) > 6 && isAllDigit(w.Keyword) { return true }
if len(w.Keyword) > 6 && isAllDigit(w.Keyword) {
return true
}
// 前缀过滤
for _, v := range prefixFilterArr {
if strings.HasPrefix(w.Keyword, v) { return true }
if strings.HasPrefix(w.Keyword, v) {
return true
}
}
// 品牌词 类目词 人工干预词 不做过滤
if w.IsBrand || w.IsCategory || w.IsManual { return false }
// 年数据过滤
if w.YearCount < 2 || w.YearClickCount < 2 { return true }
if w.YearCount < 2 || w.YearClickCount < 2 {
return true
}
// 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5
if isHotSearchWord(w) {
......@@ -502,6 +521,29 @@ func isFilterWord(w *Word) bool {
}
}
func addNewScoreIfNewHotWord(w *Word) {
if w == nil {
return
}
// 比例有意义
if w.WeekCount == 0 || w.YearCount == 0 || w.WeekCount < 20 {
return
}
// 周点击占年点击 40% 以上
if w.WeekCount *10 / w.YearCount <= 5 {
return
}
if w.WeekClickCount < 3 || w.WeekUv < 5 {
return
}
// 新词加分大小 类似于 人工干预值
w.WordABRank = w.WordABRank * math.Sqrt(5.0)
fmt.Printf("最新热词添加分数,新词: %s", w.Keyword )
}
func isAllDigit(str string) bool {
for _, x := range str {
// x 的类型是 rune 其实就是对应字符的 utf8 编码
......
package main
import (
"math"
"strings"
"fmt"
)
type B struct {
Keyword string `json:"keyword"`
KeywordPinYin string `json:"keywordPinYin"`
YearCount int32 `json:"yearCount"`
YearClickCount int32 `json:"yearClickCount"`
YearCartCount int32 `json:"yearCartCount"`
ZhaoCount int32 `json:"-"`
import "fmt"
type Phone interface {
call()
}
func main() {
prefix := strings.HasPrefix("tod's", "tod's")
fmt.Print(prefix)
type NokiaPhone struct {
Name string
}
func (nokiaPhone *NokiaPhone) call() {
fmt.Print(nokiaPhone.Name)
}
//
//func (nokiaPhone *NokiaPhone) call() {
// fmt.Print(nokiaPhone.Name)
//}
func main() {
func calculateRatioFactor2(ratio float64, count int32) float64 {
var rank float64
switch {
case count > 1 && count < 10 : rank = 1.2
case count >= 10 && count < 20 : rank = 1.4
case count >= 20 && count < 50 : rank = 1.6
case count >= 50 && count < 100 : rank = 1.8
case count >= 100 && count < 200 : rank = 2.0
case count >= 200 && count < 500 : rank = 2.2
case count >= 500 : rank = 2.5
default:rank = 1.0
}
//根据搜索转化率,转换为热度因子
return math.Log10(math.Sqrt(ratio + 10)) * rank
var phone = NokiaPhone{Name:"zhangsan"}
phone.call()
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment