Commit af4c8ed7 by zhaoyanchao

更改品牌品类优先,避免误识别为无结果词

parent ed50608a
......@@ -70,6 +70,8 @@ var t2s, _ = gocc.New("t2s")
var prefixFilterArr = []string{"https://", "http://", "dg", "d & g", "dolce&gabbana",
"dolce & gabbana", "杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球", "cuccl", "gucii","tod's","iwc7" }
const TABLE_SPLIT_STEP_SIZE = 10000
const MAX_TAG_SIZE = 5
......@@ -78,6 +80,7 @@ var UPDATE_TIME = time.Now().UnixNano() / 1e6
func main() {
startTime := time.Now()
log.SetFlags(log.Lshortfile | log.LstdFlags)
datawareDB, err := sql.Open("mysql", RUN_ENV.DataWareDB)
......@@ -180,6 +183,7 @@ func cleanForEs(w *Word) {
func addWord(w *Word, processor *elastic.BulkProcessor) {
processWord(w)
if !isFilterWord(w) {
wordMap.Store(w.Keyword,w)
cleanForEs(w)
......@@ -305,7 +309,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
results, err := db.Query(sqlStr)
if err != nil { log.Print(err.Error()) }
log.Print("read database success ")
for results.Next() {
......@@ -472,25 +475,41 @@ func processWord(w *Word) {
func isFilterWord(w *Word) bool {
// 品牌词 类目词 人工干预词 不做过滤
if w.IsBrand || w.IsCategory || w.IsManual {
w.IsSensitive = false
return false
}
// 敏感词过滤
if w.IsSensitive { return true }
if w.IsSensitive {
return true
}
// 过滤掉太长的词 每个中文字占3个byte
if utf8.RuneCountInString(w.Keyword) <= 1 || len(w.Keyword) > 50 { return true }
if utf8.RuneCountInString(w.Keyword) <= 1 || len(w.Keyword) > 50 {
return true
}
// 过滤掉商品id,商品id是有7位数字组成
if len(w.Keyword) > 6 && isAllDigit(w.Keyword) { return true }
if len(w.Keyword) > 6 && isAllDigit(w.Keyword) {
return true
}
// 前缀过滤
for _, v := range prefixFilterArr {
if strings.HasPrefix(w.Keyword, v) { return true }
if strings.HasPrefix(w.Keyword, v) {
return true
}
}
// 品牌词 类目词 人工干预词 不做过滤
if w.IsBrand || w.IsCategory || w.IsManual { return false }
// 年数据过滤
if w.YearCount < 2 || w.YearClickCount < 2 { return true }
if w.YearCount < 2 || w.YearClickCount < 2 {
return true
}
// 判断是否是热搜词 一年内搜索次数大于50或者一周内搜索次数大于5
if isHotSearchWord(w) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment