Commit 535c8654 by zhaoyanchao

是否欧洲商品写入索引

parent 6abf6e24
package main package main
import ( import (
"net/http" "net/http"
"time" "time"
"encoding/json" "encoding/json"
"bytes" "bytes"
...@@ -21,6 +21,7 @@ type ENV struct { ...@@ -21,6 +21,7 @@ type ENV struct {
EsPassword string EsPassword string
ManualFolder string ManualFolder string
SensitiveFolder string SensitiveFolder string
EuropeWordFolder string
} }
type Message struct { type Message struct {
...@@ -38,7 +39,8 @@ var test_env = &ENV{ ...@@ -38,7 +39,8 @@ var test_env = &ENV{
//ManualFolder: "D:\\DataFiles\\suggest_corpus-20180801\\manual", //ManualFolder: "D:\\DataFiles\\suggest_corpus-20180801\\manual",
//SensitiveFolder: "D:\\DataFiles\\suggest_corpus-20180801\\sensitive" //SensitiveFolder: "D:\\DataFiles\\suggest_corpus-20180801\\sensitive"
ManualFolder: "/data/pssmaster/corpus_set/suggest_corpus/manual", ManualFolder: "/data/pssmaster/corpus_set/suggest_corpus/manual",
SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"} SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive",
EuropeWordFolder: "/data/pssmaster/corpus_set/suggest_corpus/europe_word"}
var prod_env = &ENV { var prod_env = &ENV {
DataWareDB: "Search_DataWar_R:pY1P9zUj9x1M65ot5szo@tcp(secooDataWarehouse.slave.com:3306)/secooDataWarehouse", DataWareDB: "Search_DataWar_R:pY1P9zUj9x1M65ot5szo@tcp(secooDataWarehouse.slave.com:3306)/secooDataWarehouse",
...@@ -47,7 +49,8 @@ var prod_env = &ENV { ...@@ -47,7 +49,8 @@ var prod_env = &ENV {
EsUser: "search", EsUser: "search",
EsPassword: "search5z0NvEn1D", EsPassword: "search5z0NvEn1D",
ManualFolder: "/data/pssmaster/corpus_set/suggest_corpus/manual", ManualFolder: "/data/pssmaster/corpus_set/suggest_corpus/manual",
SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"} SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive",
EuropeWordFolder: "/data/pssmaster/corpus_set/suggest_corpus/europe_word"}
// 重要,该参数 确定是 正式还是 测试环境 // 重要,该参数 确定是 正式还是 测试环境
var RUN_ENV = prod_env var RUN_ENV = prod_env
......
...@@ -3,7 +3,7 @@ package main ...@@ -3,7 +3,7 @@ package main
import ( import (
"bufio" "bufio"
"context" "context"
"database/sql" "database/sql"
"fmt" "fmt"
_ "github.com/go-sql-driver/mysql" _ "github.com/go-sql-driver/mysql"
"github.com/liuzl/gocc" "github.com/liuzl/gocc"
...@@ -19,7 +19,7 @@ import ( ...@@ -19,7 +19,7 @@ import (
"unicode" "unicode"
"unicode/utf8" "unicode/utf8"
"crypto/md5" "crypto/md5"
) )
type Word struct { type Word struct {
...@@ -43,6 +43,7 @@ type Word struct { ...@@ -43,6 +43,7 @@ type Word struct {
WordRank float64 `json:"wordRank"` WordRank float64 `json:"wordRank"`
WordABRank float64 `json:"wordABRank"` WordABRank float64 `json:"wordABRank"`
KeywordVersion string `json:"keywordVersion"` KeywordVersion string `json:"keywordVersion"`
IsEuropeWord bool `json:"isEuropeWord"`
WeekUv int32 `json:"-"` WeekUv int32 `json:"-"`
WeekClickUv int32 `json:"-"` WeekClickUv int32 `json:"-"`
...@@ -62,6 +63,7 @@ var brandMap = make(map[string]int) ...@@ -62,6 +63,7 @@ var brandMap = make(map[string]int)
var categoryMap = make(map[string]int) var categoryMap = make(map[string]int)
var manualMap = make(map[string]int32) var manualMap = make(map[string]int32)
var sensitiveMap = make(map[string]bool) var sensitiveMap = make(map[string]bool)
var europeWordMap = make(map[string]bool)
var now = time.Now() var now = time.Now()
var dateStr = fmt.Sprintf("%d-%02d-%02d",now.Year(),now.Month(),now.Day()) var dateStr = fmt.Sprintf("%d-%02d-%02d",now.Year(),now.Month(),now.Day())
...@@ -111,6 +113,7 @@ func main() { ...@@ -111,6 +113,7 @@ func main() {
loadErpDB() loadErpDB()
loadManual(RUN_ENV.ManualFolder) loadManual(RUN_ENV.ManualFolder)
loadSensitive(RUN_ENV.SensitiveFolder) loadSensitive(RUN_ENV.SensitiveFolder)
loadEuropeWord(RUN_ENV.EuropeWordFolder)
var wg sync.WaitGroup var wg sync.WaitGroup
arr := queryInfo(datawareDB) arr := queryInfo(datawareDB)
...@@ -142,6 +145,29 @@ func main() { ...@@ -142,6 +145,29 @@ func main() {
fmt.Printf(" %s task finish Cost %d ms\n", dateStr, time.Since(startTime).Nanoseconds()/1e6) fmt.Printf(" %s task finish Cost %d ms\n", dateStr, time.Since(startTime).Nanoseconds()/1e6)
} }
func loadEuropeWord(folder string) {
files, _ := ioutil.ReadDir(folder)
for _,file := range files {
if !file.IsDir() {
fi, err := os.Open(folder + "/" + file.Name())
if err != nil {
fmt.Print(err)
}
br := bufio.NewReader(fi)
for {
bytes, _, e := br.ReadLine()
if e == io.EOF { break }
line := string(bytes)
key := cleanKeyword(line)
europeWordMap[cleanKeyword(key)] = true
}
}
}
fmt.Println("europe Word Map:", europeWordMap)
}
func checkUnusedData(bulkProcessor *elastic.BulkProcessor) { func checkUnusedData(bulkProcessor *elastic.BulkProcessor) {
var tmpMap = make(map[string]bool) var tmpMap = make(map[string]bool)
for brand := range brandMap { for brand := range brandMap {
...@@ -192,7 +218,8 @@ func addWord(w *Word, processor *elastic.BulkProcessor) { ...@@ -192,7 +218,8 @@ func addWord(w *Word, processor *elastic.BulkProcessor) {
Index("search_suggest_index"). Index("search_suggest_index").
Type("search_suggest_type").Id(id).Doc(w) Type("search_suggest_type").Id(id).Doc(w)
processor.Add(req) processor.Add(req)
fmt.Println("add to es: " , w)
fmt.Println("add to es: ", w)
} }
} }
...@@ -466,6 +493,10 @@ func processWord(w *Word) { ...@@ -466,6 +493,10 @@ func processWord(w *Word) {
w.IsSensitive = true w.IsSensitive = true
} }
if _, isExist := europeWordMap[w.Keyword]; isExist {
w.IsEuropeWord = true
}
calculateWordRank(w) calculateWordRank(w)
calculateWordABRank(w) calculateWordABRank(w)
addNewScoreIfNewHotWord(w) addNewScoreIfNewHotWord(w)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment