Commit c07c067c by zhaoyanchao

使用上新字段, 添加另一算分字段

parent a59ff1f5
......@@ -3,8 +3,7 @@ package main
import (
"bufio"
"context"
"crypto/md5"
"database/sql"
"database/sql"
"fmt"
_ "github.com/go-sql-driver/mysql"
"github.com/liuzl/gocc"
......@@ -19,7 +18,8 @@ import (
"time"
"unicode"
"unicode/utf8"
)
"crypto/md5"
)
type Word struct {
Keyword string `json:"keyword"`
......@@ -42,6 +42,16 @@ type Word struct {
WordRank float64 `json:"wordRank"`
WordABRank float64 `json:"wordABRank"`
KeywordVersion string `json:"keywordVersion"`
WeekUv int32 `json:"-"`
WeekClickUv int32 `json:"-"`
WeekAddCartUv int32 `json:"-"`
MonthPv int32 `json:"-"`
MonthClickCount int32 `json:"-"`
MonthAddCartCount int32 `json:"-"`
MonthUv int32 `json:"-"`
MonthProductClickUv int32 `json:"-"`
MonthAddCartUv int32 `json:"-"`
}
var wordMap sync.Map
......@@ -95,11 +105,14 @@ func main() {
log.Printf("maxId/10000=%d\n", count)
for i := 0; i <= count; i++ {
wg.Add(1)
go queryIndex(i * TABLE_SPLIT_STEP_SIZE, datawareDB, bulkProcessor, &wg)
}
wg.Wait()
fmt.Println("all thread has read maps")
checkUnusedData(bulkProcessor)
err = bulkProcessor.Flush()
......@@ -236,10 +249,13 @@ func queryInfo(db *sql.DB) []int {
}
func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg *sync.WaitGroup) {
wg.Add(1)
// 循环时可能查询到重复数据,应该以id 的上下界来查询
var sqlStr = fmt.Sprintf("select id, keyword, year_pv, year_product_click_count, year_add_cart_count, " +
"week_pv, week_product_click_count, week_add_cart_count, p_day from app_search_keyword_year_week_p_day where id >= %d and id < %d", idFlag, idFlag + TABLE_SPLIT_STEP_SIZE)
"week_pv, week_product_click_count, week_add_cart_count, p_day, " +
"week_uv, week_product_click_uv, week_add_cart_uv, " +
"month_pv, month_product_click_count, month_add_cart_count, month_uv, month_product_click_uv, month_add_cart_uv " +
"from app_search_keyword_year_week_p_day where id >= %d and id < %d", idFlag, idFlag + TABLE_SPLIT_STEP_SIZE)
results, err := db.Query(sqlStr)
if err != nil { log.Print(err.Error()) }
......@@ -253,8 +269,35 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
var weekProductClickCount sql.NullInt64
var weekAddCartCount sql.NullInt64
var pDay string
var weekUv sql.NullInt64
var weekClickUv sql.NullInt64
var weekAddCartUv sql.NullInt64
var monthPv sql.NullInt64
var monthClickCount sql.NullInt64
var monthAddCartCount sql.NullInt64
var monthUv sql.NullInt64
var monthProductClickUv sql.NullInt64
var monthAddCartUv sql.NullInt64
err = results.Scan(&id,
&keyword,
&yearPv,
&yearProductClickCount,
&yearAddCartCount,
&weekPv,
&weekProductClickCount,
&weekAddCartCount,
&pDay,
&weekUv,
&weekClickUv,
&weekAddCartUv,
&monthPv,
&monthClickCount,
&monthAddCartCount,
&monthUv,
&monthProductClickUv,
&monthAddCartUv)
err = results.Scan(&id, &keyword, &yearPv, &yearProductClickCount, &yearAddCartCount, &weekPv, &weekProductClickCount, &weekAddCartCount, &pDay)
if err != nil { log.Print(err.Error()) }
if keyword.Valid && len(keyword.String) > 0 && keyword.String != "" {
......@@ -268,7 +311,16 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
WeekCount: int32(weekPv.Int64),
WeekClickCount: int32(weekProductClickCount.Int64),
WeekCartCount: int32(weekAddCartCount.Int64),
KeywordVersion:pDay}
KeywordVersion:pDay,
WeekUv: int32(weekUv.Int64),
WeekClickUv: int32(weekClickUv.Int64),
WeekAddCartUv: int32(weekAddCartUv.Int64),
MonthPv: int32(monthPv.Int64),
MonthClickCount: int32(monthClickCount.Int64),
MonthAddCartCount: int32(monthAddCartCount.Int64),
MonthUv: int32(monthUv.Int64),
MonthProductClickUv: int32(monthProductClickUv.Int64),
MonthAddCartUv: int32(monthAddCartUv.Int64) }
if v, isExist := wordMap.Load(key); isExist {
merge(w,v)
......@@ -296,6 +348,16 @@ func merge(word *Word, v interface{}) {
word.WeekCount += t.WeekCount
word.WeekCartCount += t.WeekCartCount
word.WeekClickCount += t.WeekClickCount
word.WeekUv += t.WeekUv
word.WeekClickUv += t.WeekClickUv
word.WeekAddCartUv += t.WeekAddCartUv
word.MonthPv += t.MonthPv
word.MonthClickCount += t.MonthClickCount
word.MonthAddCartCount += t.MonthAddCartCount
word.MonthUv += t.MonthUv
word.MonthProductClickUv += t.MonthProductClickUv
word.MonthAddCartUv += t.MonthAddCartUv
}
func after(executionId int64, requests []elastic.BulkableRequest, response *elastic.BulkResponse, err error) {
......@@ -421,19 +483,53 @@ func calculateWordRank(w *Word) {
}
func calculateWordABRank(w *Word) {
monthClickRatio := calculateRatio(w.MonthProductClickUv, w.MonthUv)
monthCartRatio := calculateRatio(w.MonthAddCartUv, w.MonthUv)
weekClickRatioNew := calculateRatio(w.WeekClickUv, w.WeekUv)
weekCartRatioNew := calculateRatio(w.WeekAddCartUv, w.WeekUv)
// 月点击
if w.MonthProductClickUv != 0 && w.MonthUv != 0 {
monthClickRatio *= 1.5
}
// 月加购,加权
if w.MonthAddCartUv != 0 && w.MonthUv != 0 {
monthCartRatio *= 3
}
// 周点击,加权
if w.WeekClickUv != 0 && w.WeekUv != 0 {
weekClickRatioNew *= 2
}
// 周加购,加权
if w.WeekAddCartUv != 0 && w.WeekUv != 0 {
weekCartRatioNew *= 3
}
wordABRank := 10000.0
wordABRank += 3000 * calculateLengthFactor(len(w.Keyword))
wordABRank += 2000 * calculateCountFactor(w.YearCount, 1)
wordABRank += 2000 * calculateCountFactor(w.WeekCount, 52)
wordABRank += 2000 * calculateCountFactor(w.MonthUv, 1)
wordABRank += 2000 * calculateCountFactor(w.WeekUv, 4)
// 点击
// 年点击改为 2000
wordABRank += 2000 * calculateRatioFactor(w.YearClickRatio, w.YearClickCount)
wordABRank += 3000 * calculateRatioFactor(w.WeekClickRatio, w.WeekClickCount)
// 年
wordABRank += 2000 * calculateCountFactor(w.YearCount, 1)
// 月
wordABRank += 3000 * calculateRatioFactor(monthClickRatio, w.MonthProductClickUv)
// 周
wordABRank += 3000 * calculateRatioFactor(weekClickRatioNew, w.WeekUv)
// 加购
// 年
wordABRank += 3000 * calculateRatioFactor(w.YearCartRatio, w.YearCartCount)
wordABRank += 3000 * calculateRatioFactor(w.WeekCartRatio, w.WeekCartCount)
// 月
wordABRank += 3000 * calculateRatioFactor(monthCartRatio, w.MonthUv)
// 周
wordABRank += 3000 * calculateRatioFactor(weekCartRatioNew, w.WeekUv)
if w.IsBrand { wordABRank *= 1.8 }
if w.IsCategory { wordABRank *= 1.2 }
......
package main
import (
"encoding/json"
"fmt"
"fmt"
"math"
)
type B struct {
......@@ -15,18 +15,29 @@ type B struct {
}
func main() {
b := B{
Keyword: "赵延超",
KeywordPinYin: "zhaoyanchao",
YearCount: 1000,
YearCartCount: 100,
YearClickCount: 10,
ZhaoCount: 2}
if jsonBytes,errs := json.Marshal(b); errs == nil {
fmt.Print(string(jsonBytes))
fmt.Println(calculateRatioFactor2(0,0))
fmt.Println(calculateRatioFactor2(0.2,1))
fmt.Println(calculateRatioFactor2(0.2,10))
}
func calculateRatioFactor2(ratio float64, count int32) float64 {
var rank float64
switch {
case count > 1 && count < 10 : rank = 1.2
case count >= 10 && count < 20 : rank = 1.4
case count >= 20 && count < 50 : rank = 1.6
case count >= 50 && count < 100 : rank = 1.8
case count >= 100 && count < 200 : rank = 2.0
case count >= 200 && count < 500 : rank = 2.2
case count >= 500 : rank = 2.5
default:rank = 1.0
}
//根据搜索转化率,转换为热度因子
return math.Log10(math.Sqrt(ratio + 10)) * rank
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment