Commit b406349b by zhaoyanchao

1. 空格分隔后的词是相同的集合的词, 在存入es 时进行合并。 如:“nike 鞋 男” 和 “nike 男 鞋” 合并为同一记录

2. 效率考虑,对于空格分隔后长度大于5的集合不做处理,6个词的可能排列为6!, 120 个,效率较低,出现概率也较低
parent 3b53306e
package main
import (
"container/list"
"strings"
"github.com/mozillazg/go-pinyin"
"fmt"
"strconv"
)
type ENV struct {
DataWareDB string
ErpDB string
......@@ -28,4 +36,59 @@ var prod_env = &ENV {
SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"}
var RUN_ENV = prod_env
\ No newline at end of file
var RUN_ENV = test_env
/************************* 下面是 util 方法 *****************************/
// 求阶乘
func factorial(n int64) int64 {
if n == 1 { return 1}
return n * factorial(n-1)
}
// 求数组的全排列, 放置到list 中
func permutation(arr [] string, begin int, lst *list.List) {
if begin == len(arr) {
lst.PushBack(strings.Join(arr, " "))
return
}
for i := begin; i < len(arr); i++ {
tmp := arr[begin]
arr[begin] = arr[i]
arr[i] = tmp
permutation(arr, begin +1,lst)
arr[i] = arr[begin]
arr[begin] = tmp
}
}
func convertToPinyin(str string) string {
var ret string
for _, v := range str {
s := strings.Join(pinyin.LazyPinyin(string(v), pinyin.NewArgs()), "")
if len(s) > 0 {
ret += s
} else {
ret += string(v)
}
}
return ret
}
// traditional chinese to simple chinese
// chinese trim
// english remove redudent blank char
func cleanKeyword(keyword string) string {
out, err := t2s.Convert(keyword)
if err != nil { fmt.Println(err) }
keyword = strings.TrimSpace(strings.ToLower(out))
return strings.Join(strings.Fields(keyword)," ")
}
func strToInt(str string) int32 {
if str == "\\N" || str == "" { return 0 }
v, err := strconv.ParseInt(str, 10, 64)
if err != nil { fmt.Println(err) }
return int32(v)
}
\ No newline at end of file
......@@ -63,9 +63,7 @@ func main() {
startTime := time.Now()
datawareDB, err := sql.Open("mysql", RUN_ENV.DataWareDB)
if err != nil {
log.Print(err.Error())
}
if err != nil { log.Print(err.Error()) }
var client *elastic.Client
if RUN_ENV.EsUser != "" {
......@@ -142,7 +140,6 @@ func addWord(keyword string, processor *elastic.BulkProcessor) {
func loadErpDB() {
//db, err := sql.Open("mysql", "so_Erp_R:5RgzudyyFlApTmve@tcp(192.168.50.40:3306)/secooErpDB")
db, err := sql.Open("mysql", RUN_ENV.ErpDB)
if err != nil { log.Print(err.Error()) }
defer db.Close()
......@@ -247,19 +244,19 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
for results.Next() {
var id int
var keyword sql.NullString
var year_pv sql.NullInt64
var year_product_click_count sql.NullInt64
var year_add_cart_count sql.NullInt64
var week_pv sql.NullInt64
var week_product_click_count sql.NullInt64
var week_add_cart_count sql.NullInt64
var p_day string
err = results.Scan(&id, &keyword, &year_pv, &year_product_click_count, &year_add_cart_count, &week_pv, &week_product_click_count, &week_add_cart_count, &p_day)
var yearPv sql.NullInt64
var yearProductClickCount sql.NullInt64
var yearAddCartCount sql.NullInt64
var weekPv sql.NullInt64
var weekProductClickCount sql.NullInt64
var weekAddCartCount sql.NullInt64
var pDay string
err = results.Scan(&id, &keyword, &yearPv, &yearProductClickCount, &yearAddCartCount, &weekPv, &weekProductClickCount, &weekAddCartCount, &pDay)
if err != nil { log.Print(err.Error()) }
if keyword.Valid && len(keyword.String) > 0 && keyword.String != "" {
key := cleanKeyword(keyword.String)
if v, isExist := m.Load(key); (!isExist && len(key) > 0) || (isExist && int32(year_pv.Int64) > v.(int32)) {
if v, isExist := m.Load(key); (!isExist && len(key) > 0) || (isExist && int32(yearPv.Int64) > v.(int32)) {
var w = new(Word)
w.Keyword = key
w.YearCount = 0
......@@ -268,14 +265,14 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
w.WeekCount = 0
w.WeekClickCount = 0
w.WeekCartCount = 0
w.KeywordVersion = p_day
w.KeywordVersion = pDay
if year_pv.Valid { w.YearCount = int32(year_pv.Int64) }
if year_product_click_count.Valid { w.YearClickCount = int32(year_product_click_count.Int64) }
if year_add_cart_count.Valid { w.YearCartCount = int32(year_add_cart_count.Int64) }
if week_pv.Valid { w.WeekCount = int32(week_pv.Int64) }
if week_product_click_count.Valid { w.WeekClickCount = int32(week_product_click_count.Int64) }
if week_add_cart_count.Valid { w.WeekCartCount = int32(week_add_cart_count.Int64) }
if yearPv.Valid { w.YearCount = int32(yearPv.Int64) }
if yearProductClickCount.Valid { w.YearClickCount = int32(yearProductClickCount.Int64) }
if yearAddCartCount.Valid { w.YearCartCount = int32(yearAddCartCount.Int64) }
if weekPv.Valid { w.WeekCount = int32(weekPv.Int64) }
if weekProductClickCount.Valid { w.WeekClickCount = int32(weekProductClickCount.Int64) }
if weekAddCartCount.Valid { w.WeekCartCount = int32(weekAddCartCount.Int64) }
processWord(w)
if !isFilterWord(w) {
......@@ -340,9 +337,10 @@ func processWord(w *Word) {
calculateWordRank(w)
}
// traditional chinese to simple chinese
// chinese trim
// english remove redudent blank char
func cleanKeyword(keyword string) string {
fmt.Println(keyword)
out, err := t2s.Convert(keyword)
if err != nil { fmt.Println(err) }
keyword = strings.TrimSpace(strings.ToLower(out))
......@@ -467,4 +465,4 @@ func calculateCountFactor(count int32, rank int32) float64 {
//根据搜索次数,转换为热度因子
count = count * rank + 10
return math.Log10(math.Sqrt(float64(count)))
}
\ No newline at end of file
}
package main
import (
"sync"
"fmt"
"github.com/liuzl/gocc"
"strings"
)
var tmap sync.Map
func main() {
var t2s, _ = gocc.New("t2s")
var _, err = t2s.Convert("中國")
if err != nil { fmt.Println("succ")}
//var t2s, _ = gocc.New("t2s")
//var _, err = t2s.Convert("中國")
//if err != nil { fmt.Println("succ")}
var s = "意尔康 男 鞋"
//var s = "意尔康 男 鞋"
//var re, _ = regexp.Compile("\\s+")
//var st = re.ReplaceAllLiteralString(s," ")
fmt.Println(strings.Join(strings.Fields(s),""))
//fields := strings.Fields(s)
//t := time.Now()
//fmt.Println(factorial(5))
//fmt.Print(time.Now().Unix() -t.Unix() )
fmt.Println(H)
//var lst = new(list.List)
//for i := 1; i < 10 ; i++ {
// lst.PushBack(i)
//}
//for p := lst.Front(); p != nil ; p = p.Next() {
// fmt.Println(p.Value)
//}
add()
var val,_ = tmap.Load("a")
fmt.Print(val)
}
func add() {
tmap.Store("a","b")
var val,_ = tmap.Load("a")
fmt.Print(val)
}
//// 求阶乘
//func factorial(n int64) int64 {
// if n == 1 { return 1}
// return n * factorial(n-1)
//}
//
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment