Commit 535c8654 by zhaoyanchao

是否欧洲商品写入索引

parent 6abf6e24
package main
import (
"net/http"
"net/http"
"time"
"encoding/json"
"bytes"
......@@ -21,6 +21,7 @@ type ENV struct {
EsPassword string
ManualFolder string
SensitiveFolder string
EuropeWordFolder string
}
type Message struct {
......@@ -38,7 +39,8 @@ var test_env = &ENV{
//ManualFolder: "D:\\DataFiles\\suggest_corpus-20180801\\manual",
//SensitiveFolder: "D:\\DataFiles\\suggest_corpus-20180801\\sensitive"
ManualFolder: "/data/pssmaster/corpus_set/suggest_corpus/manual",
SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"}
SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive",
EuropeWordFolder: "/data/pssmaster/corpus_set/suggest_corpus/europe_word"}
var prod_env = &ENV {
DataWareDB: "Search_DataWar_R:pY1P9zUj9x1M65ot5szo@tcp(secooDataWarehouse.slave.com:3306)/secooDataWarehouse",
......@@ -47,7 +49,8 @@ var prod_env = &ENV {
EsUser: "search",
EsPassword: "search5z0NvEn1D",
ManualFolder: "/data/pssmaster/corpus_set/suggest_corpus/manual",
SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive"}
SensitiveFolder: "/data/pssmaster/corpus_set/suggest_corpus/sensitive",
EuropeWordFolder: "/data/pssmaster/corpus_set/suggest_corpus/europe_word"}
// 重要,该参数 确定是 正式还是 测试环境
var RUN_ENV = prod_env
......
......@@ -3,7 +3,7 @@ package main
import (
"bufio"
"context"
"database/sql"
"database/sql"
"fmt"
_ "github.com/go-sql-driver/mysql"
"github.com/liuzl/gocc"
......@@ -19,7 +19,7 @@ import (
"unicode"
"unicode/utf8"
"crypto/md5"
)
)
type Word struct {
......@@ -43,6 +43,7 @@ type Word struct {
WordRank float64 `json:"wordRank"`
WordABRank float64 `json:"wordABRank"`
KeywordVersion string `json:"keywordVersion"`
IsEuropeWord bool `json:"isEuropeWord"`
WeekUv int32 `json:"-"`
WeekClickUv int32 `json:"-"`
......@@ -62,6 +63,7 @@ var brandMap = make(map[string]int)
var categoryMap = make(map[string]int)
var manualMap = make(map[string]int32)
var sensitiveMap = make(map[string]bool)
var europeWordMap = make(map[string]bool)
var now = time.Now()
var dateStr = fmt.Sprintf("%d-%02d-%02d",now.Year(),now.Month(),now.Day())
......@@ -111,6 +113,7 @@ func main() {
loadErpDB()
loadManual(RUN_ENV.ManualFolder)
loadSensitive(RUN_ENV.SensitiveFolder)
loadEuropeWord(RUN_ENV.EuropeWordFolder)
var wg sync.WaitGroup
arr := queryInfo(datawareDB)
......@@ -142,6 +145,29 @@ func main() {
fmt.Printf(" %s task finish Cost %d ms\n", dateStr, time.Since(startTime).Nanoseconds()/1e6)
}
func loadEuropeWord(folder string) {
files, _ := ioutil.ReadDir(folder)
for _,file := range files {
if !file.IsDir() {
fi, err := os.Open(folder + "/" + file.Name())
if err != nil {
fmt.Print(err)
}
br := bufio.NewReader(fi)
for {
bytes, _, e := br.ReadLine()
if e == io.EOF { break }
line := string(bytes)
key := cleanKeyword(line)
europeWordMap[cleanKeyword(key)] = true
}
}
}
fmt.Println("europe Word Map:", europeWordMap)
}
func checkUnusedData(bulkProcessor *elastic.BulkProcessor) {
var tmpMap = make(map[string]bool)
for brand := range brandMap {
......@@ -192,7 +218,8 @@ func addWord(w *Word, processor *elastic.BulkProcessor) {
Index("search_suggest_index").
Type("search_suggest_type").Id(id).Doc(w)
processor.Add(req)
fmt.Println("add to es: " , w)
fmt.Println("add to es: ", w)
}
}
......@@ -466,6 +493,10 @@ func processWord(w *Word) {
w.IsSensitive = true
}
if _, isExist := europeWordMap[w.Keyword]; isExist {
w.IsEuropeWord = true
}
calculateWordRank(w)
calculateWordABRank(w)
addNewScoreIfNewHotWord(w)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment