Commit d1972867 by xupeng

add script

parent 1fa5e35f
#! /bin/bash
# zyc
# 删除 suggest 服务 7天前的 access_log 日志
# 因为 today 取的当前时间, fileDate 取得是文件创建当天0:00 的时间,所以不确保是整整 24*7 个小时
. /etc/profile
today=`date +%s`
basepath="/data/secoo_tomcat/javaapp/so-suggest-rest/logs"
for file in `ls $basepath`
do
if [[ $file == access* ]]; then
echo $file
fileDate=`date +%s -d ${file:0-14:10}`
echo $fileDate
timePast=$(($today -$fileDate))
if [[ $timePast -gt 3600*24*7 ]] ; then
rm -f $basepath"/"$file
fi
fi
done
basePath="/data/crontab/suggest/logs"
cd $basePath
size=`ls -l| grep "service.log"|awk '{ print $5}'`
if [[ "$size" -gt $((80*1024*1024)) ]] ; then
cp service.log service-`date +%Y-%m-%d`.log
# 删除日志
rm -f service.log
# 删除七天前的备份日志
rm -f service-`date +%Y-%m-%d -d "-7 days"`.log
fi
import requests
DEFAULT_HEADERS = {
"platform-type": "0",
"device-id": "c79d1dfeb84c4500_38864DF8:9009:4FB9:83F3:B3FAAE91606C",
"app-ver": "6.0.18",
'User-Agent': 'Secoo-iPhone/6.0.12 (iPhone; iOS 11.4.1; Scale/2.00)',
'Authorization': 'Basic c2VhcmNoOnNlYXJjaDV6ME52RW4xRA==',
'Content-Type': 'application/json'
}
def getMostDocTimestamp():
body = {
"size":0,
"aggs": {
"updateTimeAgg": {
"terms": {
"field": "updateTime"
}
}
}
}
resp = requests.post('http://yunhead.siku.cn/search_suggest_index/search_suggest_type/_search',
headers=DEFAULT_HEADERS,
json=body)
res_data = resp.json()
buckets = res_data['aggregations']['updateTimeAgg']['buckets']
most_doc_timestamp = -1
most_doc_count = 0
for bucket in buckets:
if bucket['doc_count'] > most_doc_count:
most_doc_timestamp = bucket['key']
most_doc_count = bucket['doc_count']
return most_doc_timestamp
def delNoneMostDocs(timestamp):
most_doc_count = checkMostDocCountByUpdateTime(timestamp)
print('check most doc count ' + str(most_doc_count))
if most_doc_count < 300000:
return
body = {
"query":{
"bool": {
"must_not": {
"term": {
"updateTime": timestamp
}
}
}
}
}
resp = requests.post('http://yunhead.siku.cn/search_suggest_index/search_suggest_type/_delete_by_query',
headers=DEFAULT_HEADERS,
json=body)
print(resp.text)
def checkMostDocCountByUpdateTime(timestamp):
body = {
"query":{
"bool": {
"must": {
"term": {
"updateTime": timestamp
}
}
}
}
}
resp = requests.post('http://yunhead.siku.cn/search_suggest_index/search_suggest_type/_search',
headers=DEFAULT_HEADERS,
json=body)
return resp.json()['hits']['total']
def start():
most_doc_timestamp = getMostDocTimestamp()
delNoneMostDocs(most_doc_timestamp)
if __name__ == "__main__":
start()
# -*- coding:utf-8 -*-
import json
import requests
import os
import sys
import datetime
from optparse import OptionParser
def secooSendFeiShu(phones, title, content):
phone_list = phones.strip().split(',')
phone_list = map(lambda x: x.strip(), phone_list)
now_time = datetime.datetime.now()
date_str = datetime.datetime.strftime(now_time, "%Y-%m-%d %H:%M:%S")
content += ('\n' + date_str)
data = {
"phones": phone_list,
"title": title,
"body": [content.strip()]
}
# host = 'http://apims.siku.cn/mock/303/user/sendToUser'
host = 'http://matrix-inform.secoolocal.com/user/sendToUser'
headers = {"Content-Type": "application/json"}
print('req:', data)
rep = requests.post(host, json.dumps(data), headers=headers)
print('rep:', rep.content)
return rep.content
def get_option_parser():
usage = "usage: %prog [options] arg1 arg2"
parser = OptionParser(usage=usage)
parser.add_option("-p", "--phones", dest="phones", action="store", type="string")
parser.add_option("-t", "--title", dest="title", action="store", type="string")
parser.add_option("-c", "--content", dest="content", action="store", type="string")
return parser
if __name__ == '__main__':
# reload(sys)
# sys.setdefaultencoding('utf-8')
optParser = get_option_parser()
options, args = optParser.parse_args(sys.argv[1:])
print(options)
if options.phones is None or options.title is None or options.content is None:
optParser.print_help()
sys.exit(1)
else:
secooSendFeiShu(options.phones, options.title, options.content)
print('Done.')
cd /data/crontab/suggest/
source /etc/profile
today=`date "+%Y-%m-%d"`
# 获取新日志
rm service.log
rm service.log.1
scp client1.secoo-inc.com:/data/hdfs/check_no_result/logs/service.log .
scp client1.secoo-inc.com:/data/hdfs/check_no_result/logs/service.log.1 .
cat ./service.log.1 >> service.log
#java -cp $CLASSPATH:./get_no_result/* com.secoo.so.searchword.task.LogExtractor ./service.log $today no_result_word > /data/pssmaster/corpus_set/suggest_corpus/sensitive/no_result_all.txt
#java -cp $CLASSPATH:./get_no_result/* com.secoo.so.searchword.task.LogExtractor ./service.log $today europe_word > /data/pssmaster/corpus_set/suggest_corpus/europe_word/europe_word.txt
cat ./service.log |awk -F'no result word:::' '{print $2}' | sort | uniq > /data/pssmaster/corpus_set/suggest_corpus/sensitive/no_result_all.txt
no_result_all_count=`wc -l /data/pssmaster/corpus_set/suggest_corpus/sensitive/no_result_all.txt`
echo "no result all count $no_result_all_count"
#go script
#./suggest-task >> /data/crontab/suggest/logs/message.log 2>&1
#java
./suggest-task.sh
if [[ $? -ne 0 ]] ; then
echo "error happened"
python notify_util.py -p "13426233960" -t "失败:es提示词更新" -c ""
else
python notify_util.py -p "13426233960" -t "es提示词更新成功" -c ""
python delete_old_version.py
fi
#!/usr/bin/env bash
java -Xms2048m -Xmx4096m -Dsuggest.saveToFile=true -cp so-suggest-task-1.0-SNAPSHOT.jar com.secoo.so.suggest.task.SuggestTask
#!/usr/bin/env bash
java -cp so-suggest-task-1.0-SNAPSHOT.jar com.secoo.so.suggest.task.SuggestTask
java -Xms2048m -Xmx4096m -cp so-suggest-task-1.0-SNAPSHOT.jar com.secoo.so.suggest.task.SuggestTask
package com.secoo.so.suggest;
import com.alibaba.fastjson.JSON;
import com.secoo.so.suggest.entity.EsSuggestKeywordInfo;
import com.secoo.so.suggest.es.EsClient;
import com.secoo.so.suggest.es.EsObject;
import com.secoo.so.suggest.task.SuggestTask;
import com.secoo.so.suggest.util.FileUtils;
import com.secoo.so.suggest.util.PinYinUtils;
import com.secoo.so.suggest.util.StringUtils;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
/**
* @author xupeng
* @date: 2022/2/8
*/
public class TestCode {
public static void main7(String[] args) throws Exception {
System.out.println(StringUtils.dbc2Sbc("模糊字母"));
System.out.println(PinYinUtils.convertToSimplifiedChinese("模糊字母"));
}
public static void main(String[] args) throws Exception {
List<String> esLines = FileUtils.readLines(new File("d:\\suggest-es.json"));
List<String> newLines = FileUtils.readLines(new File("d:\\suggest-index-keyword-20220209142219.txt"));
int count = 0;
for (String esLine : esLines) {
if (!newLines.contains(esLine)) {
System.out.println(++count + "\tonlyEs: " + esLine);
}
}
count = 0;
for (String newLine : newLines) {
if (!esLines.contains(newLine)) {
System.out.println(++count + "\tonlyNew: " + newLine);
}
}
}
public static void main5(String[] args) throws Exception {
EsClient esClient = EsClient.buildEsClient("http://yunhead.siku.cn", "search", "search5z0NvEn1D");
BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery();
List<EsSuggestKeywordInfo> list = esClient.search("search_suggest_index", queryBuilder, EsSuggestKeywordInfo.class);
StringBuilder keywordBuilder = new StringBuilder();
for (EsSuggestKeywordInfo suggestKeywordInfo : list) {
keywordBuilder.append(suggestKeywordInfo.getKeyword()).append("\n");
}
FileUtils.saveToFile(keywordBuilder.toString(), "d:\\suggest-es.json", false);
esClient.close();
}
public static void main4(String[] args) throws Exception {
EsClient esClient = EsClient.buildEsClient("http://yunhead.siku.cn", "search", "search5z0NvEn1D");
BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery()
.must(QueryBuilders.termQuery("keyword", "高跟鞋女秋冬新款"));
List<EsSuggestKeywordInfo> list = esClient.search("search_suggest_index", queryBuilder, EsSuggestKeywordInfo.class);
System.out.println(JSON.toJSONString(list));
List<EsObject> esList = new ArrayList<>();
for (EsSuggestKeywordInfo esSuggestKeywordInfo : list) {
esSuggestKeywordInfo.setKeyword(esSuggestKeywordInfo.getKeyword() + "红色");
esList.add(new EsObject(StringUtils.md5(esSuggestKeywordInfo.getKeyword()), esSuggestKeywordInfo));
}
esClient.batch("search_suggest_index", "search_suggest_type", esList);
list = esClient.search("search_suggest_index", queryBuilder, EsSuggestKeywordInfo.class);
System.out.println(JSON.toJSONString(list));
esClient.close();
}
public static void main3(String[] args) {
String json = "{\"isBrand\":false,\"isCategory\":false,\"isEuropeWord\":false,\"isManual\":false,\"isSensitive\":false,\"keyword\":\"高跟鞋女秋冬新款\",\"keywordPinYin\":\"gaogenxienvqiudongxinkuan\",\"keywordVersion\":\"2022-02-07\",\"manualValue\":0,\"suggestTags\":\"\",\"updateTime\":1644287409631,\"weekCartCount\":0,\"weekCartRatio\":0.0,\"weekClickCount\":0,\"weekClickRatio\":0.0,\"weekCount\":0,\"wordABRank\":21251.870636624943,\"wordRank\":19772.059911048906,\"yearCartCount\":1,\"yearCartRatio\":0.1111111111111111,\"yearClickCount\":33,\"yearClickRatio\":1.2222222222222223,\"yearCount\":27}";
EsSuggestKeywordInfo suggestKeywordInfo = JSON.parseObject(json, EsSuggestKeywordInfo.class);
System.out.println(SuggestTask.calculateWordRank(suggestKeywordInfo));
suggestKeywordInfo.setKeyword(suggestKeywordInfo.getKeyword() + "红色");
}
public static void main2(String[] args) {
String word = StringUtils.dbc2Sbc("我爱。.·").replaceAll("\ufffc|,|,|\\.", " ");
System.out.println(word);
}
public static void main1(String[] args) {
// TODO Auto-generated method stub
try {
System.out.println("中文a".getBytes("UTF-8").length); // 7
System.out.println("中文a".getBytes("GBK").length); // 7
System.out.println(StringUtils.getByteLength("中文a", "utf-8")); // 7
System.out.println(StringUtils.getByteLength("中文a", "gBK")); // 7
System.out.println(StringUtils.getByteLength("中文a")); // 7
System.out.println("中文a".length()); // 3
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment