Commit 9bc3ae0b by 徐鹏

Merge branch 'suggest-task-java' into 'master'

Suggest task java

See merge request !1
parents 60d8f5e5 611e1909
......@@ -2,4 +2,47 @@
main/info.log
main/test.go
main/suggest-task
main/suggest-task.exe
\ No newline at end of file
main/suggest-task.exe
# Compiled class file
**/target
**/*.class
**/.classpath
**/.settings
**/.project
**/.idea
**/.vscode
**/*.iml
**/.DS_Store
**/node_modules
**/dist
*.suo
*.ntvs*
*.njsproj
*.sln
*.sublime-project
*.sublime-workspace
**/coverage
**/test/unit/coverage/
**/test/e2e/reports/
# Log file
*.log
**/*.log
# BlueJ files
*.ctxt
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.jar
*.war
*.ear
*.zip
*.tar.gz
*.rar
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
**/hs_err_pid*
\ No newline at end of file
<assembly>
<id>dependency</id>
<formats>
<format>jar</format>
</formats>
<!-- 压缩包下是否生成和项目名相同的根目录-->
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<!-- 是否把当前项目的输出jar包并使用,true则会把当前项目输出为jar包到输出目录,false不输出 -->
<useProjectArtifact>false</useProjectArtifact>
<!-- 是否解压依赖包 -->
<unpack>true</unpack>
<scope>system</scope>
</dependencySet>
<dependencySet>
<!-- 是否解压依赖包 -->
<unpack>true</unpack>
<!-- 将scope为runtime的依赖包打包 -->
<scope>runtime</scope>
<excludes>
</excludes>
<includes>
</includes>
</dependencySet>
</dependencySets>
<fileSets>
<fileSet>
<directory>${project.build.outputDirectory}</directory>
<outputDirectory>/</outputDirectory>
</fileSet>
</fileSets>
</assembly>
\ No newline at end of file
#! /bin/bash
# zyc
# 删除 suggest 服务 7天前的 access_log 日志
# 因为 today 取的当前时间, fileDate 取得是文件创建当天0:00 的时间,所以不确保是整整 24*7 个小时
. /etc/profile
today=`date +%s`
basepath="/data/secoo_tomcat/javaapp/so-suggest-rest/logs"
for file in `ls $basepath`
do
if [[ $file == access* ]]; then
echo $file
fileDate=`date +%s -d ${file:0-14:10}`
echo $fileDate
timePast=$(($today -$fileDate))
if [[ $timePast -gt 3600*24*7 ]] ; then
rm -f $basepath"/"$file
fi
fi
done
basePath="/data/crontab/suggest/logs"
cd $basePath
size=`ls -l| grep "service.log"|awk '{ print $5}'`
if [[ "$size" -gt $((80*1024*1024)) ]] ; then
cp service.log service-`date +%Y-%m-%d`.log
# 删除日志
rm -f service.log
# 删除七天前的备份日志
rm -f service-`date +%Y-%m-%d -d "-7 days"`.log
fi
# 清理离线任务的日志
#48 9 * * * /data/crontab/suggest/clear_offline_log.sh >> /data/crontab/suggest/logs/clear.log 2>&1
# 清理suggest 服务的 access_log 日志
#0 0 */2 * * /data/crontab/suggest/clear_access_log.sh >> /data/crontab/suggest/logs/clear_access.log 2>&1
# suggest 离线索引任务
32 6,9 * * * /data/crontab/suggest/start_suggest_task.sh >> /data/crontab/suggest/logs/service.log 2>&1
import requests
DEFAULT_HEADERS = {
"platform-type": "0",
"device-id": "c79d1dfeb84c4500_38864DF8:9009:4FB9:83F3:B3FAAE91606C",
"app-ver": "6.0.18",
'User-Agent': 'Secoo-iPhone/6.0.12 (iPhone; iOS 11.4.1; Scale/2.00)',
'Authorization': 'Basic c2VhcmNoOnNlYXJjaDV6ME52RW4xRA==',
'Content-Type': 'application/json'
}
def getMostDocTimestamp():
body = {
"size":0,
"aggs": {
"updateTimeAgg": {
"terms": {
"field": "updateTime"
}
}
}
}
resp = requests.post('http://yunhead.siku.cn/search_suggest_index/search_suggest_type/_search',
headers=DEFAULT_HEADERS,
json=body)
res_data = resp.json()
buckets = res_data['aggregations']['updateTimeAgg']['buckets']
most_doc_timestamp = -1
most_doc_count = 0
for bucket in buckets:
if bucket['doc_count'] > most_doc_count:
most_doc_timestamp = bucket['key']
most_doc_count = bucket['doc_count']
return most_doc_timestamp
def delNoneMostDocs(timestamp):
most_doc_count = checkMostDocCountByUpdateTime(timestamp)
print('check most doc count ' + str(most_doc_count))
if most_doc_count < 300000:
return
body = {
"query":{
"bool": {
"must_not": {
"term": {
"updateTime": timestamp
}
}
}
}
}
resp = requests.post('http://yunhead.siku.cn/search_suggest_index/search_suggest_type/_delete_by_query',
headers=DEFAULT_HEADERS,
json=body)
print(resp.text)
def checkMostDocCountByUpdateTime(timestamp):
body = {
"query":{
"bool": {
"must": {
"term": {
"updateTime": timestamp
}
}
}
}
}
resp = requests.post('http://yunhead.siku.cn/search_suggest_index/search_suggest_type/_search',
headers=DEFAULT_HEADERS,
json=body)
return resp.json()['hits']['total']
def start():
most_doc_timestamp = getMostDocTimestamp()
delNoneMostDocs(most_doc_timestamp)
if __name__ == "__main__":
start()
# -*- coding:utf-8 -*-
import json
import requests
import os
import sys
import datetime
from optparse import OptionParser
def secooSendFeiShu(phones, title, content):
phone_list = phones.strip().split(',')
phone_list = map(lambda x: x.strip(), phone_list)
now_time = datetime.datetime.now()
date_str = datetime.datetime.strftime(now_time, "%Y-%m-%d %H:%M:%S")
content += ('\n' + date_str)
data = {
"phones": phone_list,
"title": title,
"body": [content.strip()]
}
# host = 'http://apims.siku.cn/mock/303/user/sendToUser'
host = 'http://matrix-inform.secoolocal.com/user/sendToUser'
headers = {"Content-Type": "application/json"}
print('req:', data)
rep = requests.post(host, json.dumps(data), headers=headers)
print('rep:', rep.content)
return rep.content
def get_option_parser():
usage = "usage: %prog [options] arg1 arg2"
parser = OptionParser(usage=usage)
parser.add_option("-p", "--phones", dest="phones", action="store", type="string")
parser.add_option("-t", "--title", dest="title", action="store", type="string")
parser.add_option("-c", "--content", dest="content", action="store", type="string")
return parser
if __name__ == '__main__':
# reload(sys)
# sys.setdefaultencoding('utf-8')
optParser = get_option_parser()
options, args = optParser.parse_args(sys.argv[1:])
print(options)
if options.phones is None or options.title is None or options.content is None:
optParser.print_help()
sys.exit(1)
else:
secooSendFeiShu(options.phones, options.title, options.content)
print('Done.')
cd /data/crontab/suggest/
source /etc/profile
today=`date "+%Y-%m-%d"`
# 获取新日志
rm service.log
rm service.log.1
scp client1.secoo-inc.com:/data/hdfs/check_no_result/logs/service.log .
scp client1.secoo-inc.com:/data/hdfs/check_no_result/logs/service.log.1 .
cat ./service.log.1 >> service.log
#java -cp $CLASSPATH:./get_no_result/* com.secoo.so.searchword.task.LogExtractor ./service.log $today no_result_word > /data/pssmaster/corpus_set/suggest_corpus/sensitive/no_result_all.txt
#java -cp $CLASSPATH:./get_no_result/* com.secoo.so.searchword.task.LogExtractor ./service.log $today europe_word > /data/pssmaster/corpus_set/suggest_corpus/europe_word/europe_word.txt
cat ./service.log |awk -F'no result word:::' '{print $2}' | sort | uniq > /data/pssmaster/corpus_set/suggest_corpus/sensitive/no_result_all.txt
no_result_all_count=`wc -l /data/pssmaster/corpus_set/suggest_corpus/sensitive/no_result_all.txt`
echo "no result all count $no_result_all_count"
#go script
#./suggest-task >> /data/crontab/suggest/logs/message.log 2>&1
#java
./suggest-task.sh
if [[ $? -ne 0 ]] ; then
echo "error happened"
python notify_util.py -p "13426233960" -t "失败:es提示词更新" -c ""
else
python notify_util.py -p "13426233960" -t "es提示词更新成功" -c ""
python delete_old_version.py
fi
#!/usr/bin/env bash
java -Xms2048m -Xmx4096m -Dsuggest.saveToFile=true -cp so-suggest-task-1.0-SNAPSHOT.jar com.secoo.so.suggest.task.SuggestTask
#!/usr/bin/env bash
java -Xms2048m -Xmx4096m -cp so-suggest-task-1.0-SNAPSHOT.jar com.secoo.so.suggest.task.SuggestTask
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.secoo.so</groupId>
<artifactId>so-suggest-task</artifactId>
<version>1.0-SNAPSHOT</version>
<name>so-suggest-task</name>
<profiles>
<profile>
<id>prod</id>
<build>
<resources>
<resource>
<directory>src/main/profiles/prod</directory>
</resource>
</resources>
</build>
</profile>
<profile>
<id>test</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<resources>
<resource>
<directory>src/main/profiles/test</directory>
</resource>
</resources>
</build>
</profile>
</profiles>
<dependencies>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.36</version>
</dependency>
<dependency>
<groupId>commons-dbcp</groupId>
<artifactId>commons-dbcp</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.16.6</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.12</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.12</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.58</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpasyncclient</artifactId>
<version>4.1.1</version>
</dependency>
<dependency>
<groupId>com.secoo.search.third-patry</groupId>
<artifactId>third-patry-jpinyin</artifactId>
<version>1.1.8</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/third-patry-jpinyin-1.1.8.jar</systemPath>
</dependency>
<!-- es -->
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>6.4.2</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>6.4.2</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>6.4.2</version>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
<!-- 打成 withDependencies jar 包-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.6</version>
<configuration>
<!-- not append assembly id in release file name -->
<appendAssemblyId>false</appendAssemblyId>
<descriptors>
<descriptor>assembly.xml</descriptor>
</descriptors>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
package com.secoo.so.suggest.config;
import lombok.extern.slf4j.Slf4j;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Map;
import java.util.Properties;
@Slf4j
public class ConfigUtil {
private static final String CONFIG_FILE = "config.properties";
private static ConfigUtil INST;
private Properties prop;
private ConfigUtil() {
prop = load();
}
public synchronized static void init() {
ConfigUtil configUtil = new ConfigUtil();
INST = configUtil;
}
private Properties load() {
Properties tmp = new Properties();
try {
log.info(this.getClass().getClassLoader().getResource(CONFIG_FILE).getPath());
tmp.load(new InputStreamReader(this.getClass().getClassLoader().getResourceAsStream(CONFIG_FILE), "UTF-8"));
} catch (IOException e) {
log.error(e.getMessage(), e);
}
return tmp;
}
public static void printAll() {
if (INST == null) {
init();
}
log.info("[" + CONFIG_FILE + "] =============== start print config properties ===============");
if (INST != null && INST.prop != null) {
for (Map.Entry<Object, Object> entry : INST.prop.entrySet()) {
log.info("[" + CONFIG_FILE + "] " + entry.getKey() + "=" + entry.getValue());
}
}
log.info("[" + CONFIG_FILE + "] =============== end print config properties ===============");
}
public static String getString(String key) {
if (INST == null) {
init();
}
return INST.prop.getProperty(key);
}
public static String getString(String key, String defaultValue) {
String val = getString(key);
if (val == null) {
return defaultValue;
}
return val;
}
public static int getInt(String key, int defaultValue) {
String val = getString(key);
if (val != null) {
try {
return Integer.parseInt(val);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
return defaultValue;
}
public static long getLong(String key, long defaultValue) {
String val = getString(key);
if (val != null) {
try {
return Long.parseLong(val);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
return defaultValue;
}
}
package com.secoo.so.suggest.db;
import com.secoo.so.suggest.entity.SearchKeywordInfo;
import com.secoo.so.suggest.util.ObjectUtils;
import com.secoo.so.suggest.util.StringUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.dbcp.BasicDataSource;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.*;
@Slf4j
public class DwDataSource {
private DwDataSource() {
}
private static class DataSourceHolder {
private static BasicDataSource dataSource = new BasicDataSource();
static {
Properties prop = new Properties();
try {
prop.load(DwDataSource.class.getClassLoader().getResourceAsStream("db.properties"));
} catch (IOException e) {
log.error("init db config error", e);
}
dataSource.setDriverClassName("com.mysql.jdbc.Driver");
dataSource.setUrl(prop.getProperty("dw.read.url"));
dataSource.setUsername(prop.getProperty("dw.read.user"));
dataSource.setPassword(prop.getProperty("dw.read.password"));
dataSource.setTestWhileIdle(true);
dataSource.setTestOnReturn(true);
dataSource.setTimeBetweenEvictionRunsMillis(30000);
dataSource.setMaxActive(30);
dataSource.setTestOnBorrow(true);
dataSource.setValidationQuery("select 1 from dual");
}
}
public static Connection getConnection() {
try {
return DataSourceHolder.dataSource.getConnection();
} catch (SQLException e) {
log.error("获取链接异常", e);
throw new RuntimeException("获取链接异常:" + e.getMessage(), e);
}
}
public static void close() {
try {
DataSourceHolder.dataSource.close();
} catch (SQLException e) {
log.error("close error", e);
}
}
public static Map<String, Long> querySearchWordCountAndMaxId() {
Map<String, Long> result = new HashMap<>();
Connection conn = DwDataSource.getConnection();
PreparedStatement stmt = null;
ResultSet rs = null;
try {
String sql = "select count(*) as cnt, max(id) as max_id, min(id) as min_id from app_search_keyword_year_week_p_day";
stmt = conn.prepareStatement(sql);
rs = stmt.executeQuery();
while (rs.next()) {
Long count = rs.getLong("cnt");
Long maxId = rs.getLong("max_id");
Long minId = rs.getLong("min_id");
result.put("count", count);
result.put("maxId", maxId);
result.put("minId", minId);
}
} catch (Exception e) {
log.error("querySearchWordCountAndMaxId error", e);
} finally {
ObjectUtils.safeClose(conn, stmt, rs);
}
return result;
}
/**
* 查询品牌信息
*/
public static List<SearchKeywordInfo> querySearchKeywordInfoList(long startId, long endId) {
List<SearchKeywordInfo> searchKeywordInfoList = new ArrayList<>();
Connection conn = DwDataSource.getConnection();
PreparedStatement stmt = null;
ResultSet rs = null;
try {
String sql = "select id, keyword, year_pv, year_product_click_count, year_add_cart_count, "
+ " week_pv, week_product_click_count, week_add_cart_count, p_day, "
+ " week_uv, week_product_click_uv, week_add_cart_uv, "
+ " month_pv, month_product_click_count, month_add_cart_count, month_uv,"
+ " month_product_click_uv, month_add_cart_uv, prepare_tags "
+ " from app_search_keyword_year_week_p_day where id >= ? and id < ?";
stmt = conn.prepareStatement(sql);
stmt.setLong(1, startId);
stmt.setLong(2, endId);
rs = stmt.executeQuery();
while (rs.next()) {
Long id = rs.getLong("id");
String keyword = rs.getString("keyword");
if (StringUtils.isBlank(keyword)) {
continue;
}
String prepareTags = rs.getString("prepare_tags");
Integer yearPv = rs.getInt("year_pv");
Integer yearProductClickCount = rs.getInt("year_product_click_count");
Integer yearAddCartCount = rs.getInt("year_add_cart_count");
Long weekPv = rs.getLong("week_pv");
Long weekProductClickCount = rs.getLong("week_product_click_count");
Long weekAddCartCount = rs.getLong("week_add_cart_count");
Long weekUv = rs.getLong("week_uv");
Long weekProductClickUv = rs.getLong("week_product_click_uv");
Long weekAddCartUv = rs.getLong("week_add_cart_uv");
Long monthPv = rs.getLong("month_pv");
Long monthProductClickCount = rs.getLong("month_product_click_count");
Long monthAddCartCount = rs.getLong("month_add_cart_count");
Long monthUv = rs.getLong("month_uv");
Long monthProductClickUv = rs.getLong("month_product_click_uv");
Long monthAddCartUv = rs.getLong("month_add_cart_uv");
String pDay = rs.getString("p_day");
SearchKeywordInfo searchKeywordInfo = new SearchKeywordInfo();
searchKeywordInfo.setId(id);
searchKeywordInfo.setKeyword(keyword);
searchKeywordInfo.setPrepareTags(prepareTags);
searchKeywordInfo.setYearPv(yearPv);
searchKeywordInfo.setYearProductClickCount(yearProductClickCount);
searchKeywordInfo.setYearAddCartCount(yearAddCartCount);
searchKeywordInfo.setWeekPv(weekPv);
searchKeywordInfo.setWeekProductClickCount(weekProductClickCount);
searchKeywordInfo.setWeekAddCartCount(weekAddCartCount);
searchKeywordInfo.setWeekUv(weekUv);
searchKeywordInfo.setWeekProductClickUv(weekProductClickUv);
searchKeywordInfo.setWeekAddCartUv(weekAddCartUv);
searchKeywordInfo.setMonthPv(monthPv);
searchKeywordInfo.setMonthProductClickCount(monthProductClickCount);
searchKeywordInfo.setMonthAddCartCount(monthAddCartCount);
searchKeywordInfo.setMonthUv(monthUv);
searchKeywordInfo.setMonthProductClickUv(monthProductClickUv);
searchKeywordInfo.setMonthAddCartUv(monthAddCartUv);
searchKeywordInfo.setPDay(pDay);
searchKeywordInfoList.add(searchKeywordInfo);
}
} catch (Exception e) {
log.error("querySearchKeywordInfoList error", e);
} finally {
ObjectUtils.safeClose(conn, stmt, rs);
}
return searchKeywordInfoList;
}
}
package com.secoo.so.suggest.db;
import com.secoo.so.suggest.entity.BrandInfo;
import com.secoo.so.suggest.entity.CategoryInfo;
import com.secoo.so.suggest.util.ObjectUtils;
import com.secoo.so.suggest.util.StringUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.dbcp.BasicDataSource;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
@Slf4j
public class ErpDataSource {
private ErpDataSource() {
}
private static class DataSourceHolder {
private static BasicDataSource dataSource = new BasicDataSource();
static {
Properties prop = new Properties();
try {
prop.load(ErpDataSource.class.getClassLoader().getResourceAsStream("db.properties"));
} catch (IOException e) {
log.error("init config error", e);
}
dataSource.setDriverClassName("com.mysql.jdbc.Driver");
dataSource.setUrl(prop.getProperty("erp.read.url"));
dataSource.setUsername(prop.getProperty("erp.read.user"));
dataSource.setPassword(prop.getProperty("erp.read.password"));
dataSource.setTestWhileIdle(true);
dataSource.setTestOnReturn(true);
dataSource.setTimeBetweenEvictionRunsMillis(30000);
dataSource.setMaxActive(30);
dataSource.setTestOnBorrow(true);
dataSource.setValidationQuery("select 1 from dual");
}
}
public static Connection getConnection() {
try {
return DataSourceHolder.dataSource.getConnection();
} catch (SQLException e) {
log.error("获取链接异常", e);
throw new RuntimeException("获取链接异常:" + e.getMessage(), e);
}
}
public static void close() {
try {
DataSourceHolder.dataSource.close();
} catch (SQLException e) {
log.error("close error", e);
}
}
/**
* 查询品牌信息
*/
public static List<BrandInfo> queryBrandInfoList() {
List<BrandInfo> brandInfoList = new ArrayList<>();
Connection conn = ErpDataSource.getConnection();
PreparedStatement stmt = null;
ResultSet rs = null;
try {
String sql = "select id,en_name,ch_name,short_name,nickname from secooErpDB.t_product_brand where is_del = 0 and enabled = 1";
stmt = conn.prepareStatement(sql);
rs = stmt.executeQuery();
while (rs.next()) {
Long id = rs.getLong("id");
if (id == null || id <= 0) {
continue;
}
String enName = rs.getString("en_name");
String chName = rs.getString("ch_name");
String shortName = rs.getString("short_name");
String nickName = rs.getString("nickname");
BrandInfo brandInfo = new BrandInfo();
brandInfo.setId(id);
brandInfo.setEnName(enName);
brandInfo.setChName(chName);
brandInfo.setShortName(shortName);
brandInfo.setNickName(nickName);
brandInfoList.add(brandInfo);
}
} catch (Exception e) {
log.error("queryBrandInfoList error", e);
} finally {
ObjectUtils.safeClose(conn, stmt, rs);
}
return brandInfoList;
}
/**
* 查询品牌信息
*/
public static List<CategoryInfo> queryCategoryInfoList() {
List<CategoryInfo> categoryInfoList = new ArrayList<>();
Connection conn = ErpDataSource.getConnection();
PreparedStatement stmt = null;
ResultSet rs = null;
try {
String sql = "select id,name from secooErpDB.t_product_category where is_del = 0 and enabled = 1";
stmt = conn.prepareStatement(sql);
rs = stmt.executeQuery();
while (rs.next()) {
Long id = rs.getLong("id");
String name = rs.getString("name");
if (id == null || id <= 0 || StringUtils.isBlank(name)) {
continue;
}
CategoryInfo categoryInfo = new CategoryInfo();
categoryInfo.setId(id);
categoryInfo.setName(name);
categoryInfoList.add(categoryInfo);
}
} catch (Exception e) {
log.error("queryCategoryInfoList error", e);
} finally {
ObjectUtils.safeClose(conn, stmt, rs);
}
return categoryInfoList;
}
}
package com.secoo.so.suggest.db;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.dbcp.BasicDataSource;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Properties;
@Slf4j
public class SeoDataSource {
private SeoDataSource() {
}
private static class DataSourceHolder {
private static BasicDataSource dataSource = new BasicDataSource();
static {
Properties prop = new Properties();
try {
prop.load(SeoDataSource.class.getClassLoader().getResourceAsStream("db.properties"));
} catch (IOException e) {
log.error("init config error", e);
}
dataSource.setDriverClassName("com.mysql.jdbc.Driver");
dataSource.setUrl(prop.getProperty("seo.read.url"));
dataSource.setUsername(prop.getProperty("seo.read.user"));
dataSource.setPassword(prop.getProperty("seo.read.password"));
dataSource.setTestWhileIdle(true);
dataSource.setTestOnReturn(true);
dataSource.setTimeBetweenEvictionRunsMillis(30000);
dataSource.setMaxActive(30);
dataSource.setTestOnBorrow(true);
dataSource.setValidationQuery("select 1 from dual");
}
}
public static Connection getConnection() {
try {
return DataSourceHolder.dataSource.getConnection();
} catch (SQLException e) {
log.error("获取链接异常", e);
throw new RuntimeException("获取链接异常:" + e.getMessage(), e);
}
}
public static void close() {
try {
DataSourceHolder.dataSource.close();
} catch (SQLException e) {
log.error("close error", e);
}
}
}
package com.secoo.so.suggest.entity;
import lombok.Data;
import java.io.Serializable;
/**
* 品牌信息
*/
@Data
public class BrandInfo implements Serializable {
private static final long serialVersionUID = -6388347520294644169L;
private Long id;
private String enName;
private String chName;
private String shortName;
private String nickName;
}
package com.secoo.so.suggest.entity;
import lombok.Data;
import java.io.Serializable;
/**
* 品类信息
*/
@Data
public class CategoryInfo implements Serializable {
private static final long serialVersionUID = -12528308204568143L;
private Long id;
private String name;
}
package com.secoo.so.suggest.entity;
import lombok.Data;
import java.io.Serializable;
@Data
public class EsSuggestKeywordInfo implements Serializable {
private static final long serialVersionUID = -2891215162084524117L;
private String keyword;
private String keywordPinYin;
private Integer yearCount;
private Integer yearClickCount;
private Integer yearCartCount;
private Long weekCount;
private Long weekClickCount;
private Long weekCartCount;
private Double yearClickRatio;
private Double yearCartRatio;
private Double weekClickRatio;
private Double weekCartRatio;
private Boolean isBrand;
private Boolean isCategory;
private Boolean isManual;
private Boolean isSensitive;
private Integer manualValue;
private Double wordRank;
private Double wordABRank;
private String keywordVersion;
private Boolean isEuropeWord;
private String suggestTags;
private Long updateTime;
}
package com.secoo.so.suggest.entity;
import lombok.Data;
import java.io.Serializable;
/**
* 搜索词信息
* app_search_keyword_year_week_p_day
*/
@Data
public class SearchKeywordInfo implements Serializable {
private static final long serialVersionUID = 5479160854636000122L;
private Long id;
private String keyword;
private String prepareTags;
private Integer yearPv;
private Integer yearProductClickCount;
private Integer yearAddCartCount;
private Long weekPv;
private Long weekProductClickCount;
private Long weekAddCartCount;
private Long weekUv;
private Long weekProductClickUv;
private Long weekAddCartUv;
private Long monthPv;
private Long monthProductClickCount;
private Long monthAddCartCount;
private Long monthUv;
private Long monthProductClickUv;
private Long monthAddCartUv;
private String pDay;
}
package com.secoo.so.suggest.es;
public class ESException extends Exception {
private static final long serialVersionUID = -4947060289056203488L;
private String msg;
private int code = 500;
public ESException(String msg) {
super(msg);
this.msg = msg;
}
public ESException(String msg, Throwable e) {
super(msg, e);
this.msg = msg;
}
public ESException(String msg, int code) {
super(msg);
this.msg = msg;
this.code = code;
}
public ESException(String msg, int code, Throwable e) {
super(msg, e);
this.msg = msg;
this.code = code;
}
public String getMsg() {
return msg;
}
public void setMsg(String msg) {
this.msg = msg;
}
public int getCode() {
return code;
}
public void setCode(int code) {
this.code = code;
}
}
package com.secoo.so.suggest.es;
import org.elasticsearch.action.DocWriteRequest;
import java.io.Serializable;
public class EsObject implements Serializable {
private static final long serialVersionUID = -3593470306368703625L;
public static final DocWriteRequest.OpType INDEX = DocWriteRequest.OpType.INDEX;
public static final DocWriteRequest.OpType CREATE = DocWriteRequest.OpType.CREATE;
public static final DocWriteRequest.OpType UPDATE = DocWriteRequest.OpType.UPDATE;
public static final DocWriteRequest.OpType DELETE = DocWriteRequest.OpType.DELETE;
private String id;
private Object object;
private DocWriteRequest.OpType opType = DocWriteRequest.OpType.INDEX;
public EsObject() {
}
public EsObject(String id, Object object) {
this.id = id;
this.object = object;
}
/**
* DocWriteRequest.OpType.INDEX : 覆盖更新
* DocWriteRequest.OpType.UPDATE : 只更新发送的字段
*
* @author xupeng
* @date: 2019-01-23
*/
public EsObject(String id, Object object, DocWriteRequest.OpType opType) {
this.id = id;
this.object = object;
this.opType = opType;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Object getObject() {
return object;
}
public void setObject(Object object) {
this.object = object;
}
public DocWriteRequest.OpType getOpType() {
return opType;
}
public void setOpType(DocWriteRequest.OpType opType) {
this.opType = opType;
}
}
package com.secoo.so.suggest.es;
import java.util.List;
import java.util.Map;
/**
*
* 分页查询时可以使用的回调接口
**/
public interface EsSearchCallback {
/**
* 分页查询时的callback,如果有该实现,search接口最终将返回空数据
* @param results
*/
public void callback(List<Map<String, Object>> results);
}
package com.secoo.so.suggest.helper;
import com.secoo.so.suggest.db.ErpDataSource;
import com.secoo.so.suggest.util.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.HashSet;
import java.util.Set;
/**
* 加载敏感词汇
*/
public class AdvWordsHelper {
private static final Logger LOG = LoggerFactory.getLogger(AdvWordsHelper.class);
/**
* 敏感词key
*/
public static final String ADV_WORDS = "so.advWords";
/**
* 汉字匹配
*/
public static final String regEx1 = "[\\u4e00-\\u9fa5]";
private static final String SENSITIVE_SQL = "SELECT sensitive_words FROM t_sensitive_info WHERE status=1 and del_flag=0";
/**
* 获取敏感词
*/
public static String getAdvWords() {
Connection conn = ErpDataSource.getConnection();
Statement stmt = null;
ResultSet rs = null;
Set<String> advWordSet = new HashSet<>();
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(SENSITIVE_SQL);
while (rs.next()) {
String sensitiveWord = rs.getString(1);
if (StringUtils.isNotEmpty(sensitiveWord.trim())) {
advWordSet.add(sensitiveWord);
}
}
} catch (Exception e) {
LOG.error("获取敏感词异常: " + e.getMessage(), e);
} finally {
ObjectUtils.safeClose(conn, stmt, rs);
}
if (advWordSet.size() > 0) {
return StringUtils.join(advWordSet, ",");
}
return null;
}
}
package com.secoo.so.suggest.util;
/**
* @author xupeng
* @date: 2022/1/27
*/
public class CalculateUtils {
public static Double calculateRatio(Integer numerator, Integer denominator) {
if (numerator == null || numerator == 0 || denominator == null || numerator == 0) {
return 0D;
}
return numerator.doubleValue() / denominator.doubleValue();
}
public static Double calculateLengthFactor(Integer length) {
//根据文本长度转换为长度因子
return 1.0 / new Double(2 * length + 1);
}
public static Double calculateRatioFactor(Double ratio, Integer count) {
Double rank = 1.0;
if (count > 1 && count < 10) {
rank = 1.2;
} else if (count >= 10 && count < 20) {
rank = 1.4;
} else if (count >= 20 && count < 50) {
rank = 1.6;
} else if (count >= 50 && count < 100) {
rank = 1.8;
} else if (count >= 100 && count < 200) {
rank = 2.0;
} else if (count >= 200 && count < 500) {
rank = 2.2;
} else if (count >= 500) {
rank = 2.5;
}
//根据搜索转化率,转换为热度因子
return Math.log10(Math.sqrt(ratio + 10)) * rank;
}
public static Double calculateCountFactor(Integer count, Integer rank) {
//根据搜索次数,转换为热度因子
count = count * rank + 10;
return Math.log10(Math.sqrt(new Double(count)));
}
}
package com.secoo.so.suggest.util;
import java.text.SimpleDateFormat;
import java.util.Date;
public class DateUtils {
public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd";
public static final String DEFAULT_DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss";
public static String currentDate(String format) {
return new SimpleDateFormat(format).format(new Date());
}
public static String currentDate() {
return currentDate(DEFAULT_DATE_FORMAT);
}
public static String currentDatetime(String format) {
return new SimpleDateFormat(format).format(new Date());
}
public static String currentDatetime() {
return currentDatetime(DEFAULT_DATETIME_FORMAT);
}
public static String formatDate(long ms) {
return formatDate(new Date(ms));
}
public static String formatDate(long ms, String format) {
return formatDate(new Date(ms), format);
}
public static String formatDate(Date date) {
return new SimpleDateFormat(DEFAULT_DATETIME_FORMAT).format(date);
}
public static String formatDate(Date date, String format) {
return new SimpleDateFormat(format).format(date);
}
}
package com.secoo.so.suggest.util;
import com.alibaba.fastjson.JSON;
import com.secoo.so.suggest.config.ConfigUtil;
import org.apache.http.HttpStatus;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.util.*;
import java.util.concurrent.*;
public class FeiShuUtil {
private static final Logger LOGGER = LoggerFactory.getLogger(FeiShuUtil.class);
private static CloseableHttpClient client = HttpClientBuilder.create().build();
private static final URI FEI_SHU_URL = URI.create("http://matrix-inform.secoolocal.com/user/sendToUser");
/**
* 单线程的线程池发送消息,避免阻塞主线程
*/
private static ExecutorService executor = new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<>(1024), Executors.defaultThreadFactory(), new ThreadPoolExecutor.DiscardPolicy());
public static void sendMessage(String title, String message) {
sendMessage(title, message, null);
}
public static void sendMessage(String title, String message, List<String> phones) {
if (StringUtils.isBlank(message)) {
return;
}
if (StringUtils.isBlank(title)) {
title = "异常通知";
}
final Map<String, Object> params = new HashMap<>(16);
params.put("title", title);
params.put("body", Collections.singletonList(message));
params.put("phones", phones);
if(CollectionUtils.isEmpty(phones)){
phones = StringUtils.splitToList(ConfigUtil.getString("suggestTask.warningPhones"), ",");
}
final String fTitle = title;
final String fMessage = message;
final String fPhones = StringUtils.join(phones, ",");
Runnable runnable = new Runnable() {
@Override
public void run() {
CloseableHttpResponse res;
try {
HttpPost post = new HttpPost();
post.setURI(FEI_SHU_URL);
post.setEntity(new StringEntity(JSON.toJSON(params).toString(), ContentType.APPLICATION_JSON));
res = client.execute(post);
if (res == null || res.getEntity() == null) {
LOGGER.error("发送飞书消息失败: title:{}, message:{}, phones:{}", fTitle, fMessage, fPhones);
return;
}
if (res.getStatusLine() == null || res.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
LOGGER.error("发送飞书消息失败,title:{}, message:{}, phones:{}, res:{}", fTitle, fMessage, fPhones, EntityUtils.toString(res.getEntity()));
return;
}
} catch (IOException e) {
LOGGER.error("发送飞书消息失败: title:{}, message:{}, phones:{}", fTitle, fMessage, fPhones, e);
}
}
};
executor.submit(runnable);
}
public static void waitForFinish() throws InterruptedException {
executor.shutdown();
executor.awaitTermination(5, TimeUnit.MINUTES);
}
public static void main(String[] args) throws InterruptedException {
FeiShuUtil.sendMessage("测试", "hello wolrd", Arrays.asList("13426233960"));
FeiShuUtil.waitForFinish();
}
}
\ No newline at end of file
package com.secoo.so.suggest.util;
import com.github.stuxuhai.jpinyin.ChineseHelper;
import com.github.stuxuhai.jpinyin.PinyinFormat;
import com.github.stuxuhai.jpinyin.PinyinHelper;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
@Slf4j
public class PinYinUtils {
/**
* 简体转换为繁体
*/
public static String convertToTraditionalChinese(String str) {
String tempStr = null;
try {
tempStr = ChineseHelper.convertToTraditionalChinese(str);
} catch (Exception e) {
tempStr = str;
log.error("convertToTraditionalChinese error", e);
}
return tempStr;
}
/**
* 繁体转换为简体
*/
public static String convertToSimplifiedChinese(String str) {
String tempStr = null;
try {
tempStr = ChineseHelper.convertToSimplifiedChinese(str);
} catch (Exception e) {
tempStr = str;
log.error("convertToSimplifiedChinese error", e);
}
return tempStr;
}
/**
* 转换为有声调的拼音字符串
*
* @param str 汉字
* @return 有声调的拼音字符串
*/
public static String changeToToneMarkPinYin(String str) {
String tempStr = null;
try {
tempStr = PinyinHelper.convertToPinyinString(str, " ", PinyinFormat.WITH_TONE_MARK);
} catch (Exception e) {
log.error("changeToToneMarkPinYin error", e);
}
return tempStr;
}
/**
* 转换为数字声调字符串
*
* @param str 需转换的汉字
* @return 转换完成的拼音字符串
*/
public static String changeToToneNumberPinYin(String str) {
String tempStr = null;
try {
tempStr = PinyinHelper.convertToPinyinString(str, " ", PinyinFormat.WITH_TONE_NUMBER);
} catch (Exception e) {
log.error("changeToToneNumberPinYin error", e);
}
return tempStr;
}
/**
* 转换为不带音调的拼音字符串
*
* @param str 需转换的汉字
* @return 拼音字符串
*/
public static String changeToWithoutTonePinYin(String str) {
return changeToWithoutTonePinYin(str, " ");
}
/**
* 转换为不带音调的拼音字符串
*
* @param str 需转换的汉字
* @return 拼音字符串
*/
public static String changeToWithoutTonePinYinNoSeparator(String str) {
return changeToWithoutTonePinYin(str, "");
}
/**
* 转换为不带音调的拼音字符串
*
* @param str 需转换的汉字
* @return 拼音字符串
*/
public static String changeToWithoutTonePinYin(String str, String separator) {
String tempStr = null;
try {
tempStr = PinyinHelper.convertToPinyinString(str, separator, PinyinFormat.WITHOUT_TONE);
} catch (Exception e) {
log.error("changeToWithoutTonePinYin error: str:{}", str, e);
}
return tempStr;
}
/**
* 转换为每个汉字对应拼音首字母字符串
*
* @param str 需转换的汉字
* @return 拼音字符串
*/
public static String changeToGetShortPinYin(String str) {
String tempStr = null;
try {
tempStr = PinyinHelper.getShortPinyin(str);
} catch (Exception e) {
log.error("changeToGetShortPinYin error", e);
}
return tempStr;
}
/**
* 检查汉字是否为多音字
*
* @param str 需检查的汉字
* @return true 多音字,false 不是多音字
*/
public static boolean checkHasMultiPinyin(char str) {
boolean check = false;
try {
check = PinyinHelper.hasMultiPinyin(str);
} catch (Exception e) {
log.error("checkHasMultiPinyin error", e);
}
return check;
}
/**
* 是否是拼音
*
* @param str
* @return
*/
public static boolean isPinYin(String str) {
if (org.apache.commons.lang3.StringUtils.isBlank(str)) {
return false;
}
char[] chars = str.toCharArray();
for (char c : chars) {
boolean isPinYin = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
if (!isPinYin) {
return false;
}
}
return true;
}
/**
* 是否首字母是拼音
*/
public static boolean isFirstPinYin(String str) {
if (StringUtils.isBlank(str)) {
return false;
}
char[] chars = str.toCharArray();
char c = chars[0];
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
/**
* 是否是英文词
*/
public static boolean isEnglishWord(String str) {
if (StringUtils.isBlank(str)) {
return false;
}
char[] chars = str.toCharArray();
for (char c : chars) {
boolean isValid = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || Character.isWhitespace(c);
if (!isValid) {
return false;
}
}
return true;
}
public static void main(String[] args) {
System.out.println(PinYinUtils.convertToTraditionalChinese("中国人"));
System.out.println(PinYinUtils.convertToSimplifiedChinese("中国人"));
System.out.println(PinYinUtils.changeToWithoutTonePinYin("中国人"));
System.out.println(PinYinUtils.changeToWithoutTonePinYinNoSeparator("博柏利 运动鞋"));
System.out.println(PinYinUtils.changeToWithoutTonePinYinNoSeparator("silk in"));
System.out.println(PinYinUtils.changeToWithoutTonePinYinNoSeparator("化妆品bb霜遮瑕"));
}
}
# suggestTask
suggestTask.prefixFilterList=["https://", "http://", "dg", "d & g", "dolce&gabbana","dolce & gabbana", "杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球", "cuccl", "gucii","tod's","iwc7"]
suggestTask.ManualFolder=/data/pssmaster/corpus_set/suggest_corpus/manual
suggestTask.SensitiveFolder=/data/pssmaster/corpus_set/suggest_corpus/sensitive
suggestTask.EuropeWordFolder=/data/pssmaster/corpus_set/suggest_corpus/europe_word
suggestTask.batchSize=10000
suggestTask.threadPoolSize=10
suggestTask.searchWordWarningCount=1000000
suggestTask.suggestTagMaxSize=5
suggestTask.warningPhones=13426233960
suggestTask.es.url=http://bigdataescluster.secoolocal.com:9200
suggestTask.es.user=search
suggestTask.es.password=search5z0NvEn1D
suggestTask.es.index=search_suggest_index
suggestTask.es.type=search_suggest_type
suggestTask.es.batchSize=2000
erp.read.url=jdbc:mysql://192.168.50.40:3306/secooErpDB?useUnicode=true&amp;characterEncoding=utf8&amp;noAccessToProcedureBodies=true&amp;zeroDateTimeBehavior=convertToNull&amp;allowMultiQueries=true
erp.read.user=so_Erp_R
erp.read.password=5RgzudyyFlApTmve
seo.read.url=jdbc:mysql://secooSeoDB.master.com:3307/secooSeoDB?useUnicode=true&amp;characterEncoding=utf8&amp;zeroDateTimeBehavior=convertToNull
seo.read.user=sem_Seo_W
seo.read.password=C2IiHfNKYpT1onsR
dw.read.url=jdbc:mysql://secooDataWarehouse.slave.com:3306/secooDataWarehouse?useUnicode=true&amp;characterEncoding=utf8&amp;zeroDateTimeBehavior=convertToNull
dw.read.user=Search_DataWar_R
dw.read.password=pY1P9zUj9x1M65ot5szo
\ No newline at end of file
# suggestTask
suggestTask.prefixFilterList=["https://", "http://", "dg", "d & g", "dolce&gabbana","dolce & gabbana", "杜嘉班纳", "避孕", "情趣", "cucci", "乒乓球", "cuccl", "gucii","tod's","iwc7"]
suggestTask.ManualFolder=/data/pssmaster/corpus_set/suggest_corpus/manual
suggestTask.SensitiveFolder=/data/pssmaster/corpus_set/suggest_corpus/sensitive
suggestTask.EuropeWordFolder=/data/pssmaster/corpus_set/suggest_corpus/europe_word
suggestTask.batchSize=10000
suggestTask.threadPoolSize=10
suggestTask.suggestTagMaxSize=5
suggestTask.searchWordWarningCount=1000000
suggestTask.es.url=http://10.0.254.139:9200
suggestTask.es.user=suggest
suggestTask.es.password=suggest456
suggestTask.es.index=search_suggest_index
suggestTask.es.type=search_suggest_type
suggestTask.es.batchSize=2000
\ No newline at end of file
erp.read.url=jdbc:mysql://10.4.3.223:3306/secooErpDB?useUnicode=true&amp;characterEncoding=utf8&amp;noAccessToProcedureBodies=true&amp;zeroDateTimeBehavior=convertToNull&amp;allowMultiQueries=true
erp.read.user=3306_test
erp.read.password=iS6CXpYqgZ8Mhjui
seo.read.url=jdbc:mysql://10.4.3.223:3306/secooSeoDB?useUnicode=true&amp;characterEncoding=utf8&amp;zeroDateTimeBehavior=convertToNull
seo.read.user=SeoDB_test
seo.read.password=Cxkfq57huej0fTpK
\ No newline at end of file
log4j.rootLogger=INFO, FILE
# logger
log4j.logger.com.secoo.so.mysql.binlog=INFO
log4j.logger.org.apache.zookeeper=WARN
log4j.logger.com.alibaba.dubbo=WARN
log4j.appender.FILE=org.apache.log4j.RollingFileAppender
log4j.appender.FILE.File=logs/service.log
log4j.appender.FILE.MaxFileSize=100MB
log4j.appender.FILE.MaxBackupIndex=5
log4j.appender.FILE.Threshold=INFO
log4j.appender.FILE.layout=org.apache.log4j.PatternLayout
log4j.appender.FILE.layout.ConversionPattern=[%d{yyyy-MM-dd HH:mm:ss}]%.8t %-5p %c:%L %m%n
{
"state": "open",
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "search_suggest_index",
"creation_date": "1551702662623",
"analysis": {
"analyzer": {
"suggest_analyzer": {
"tokenizer": "suggest_tokenizer"
}
},
"tokenizer": {
"suggest_tokenizer": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
}
},
"number_of_replicas": "2",
"uuid": "GdxvBgzsSICrpSddf6bqIQ",
"version": {
"created": "6040099"
}
}
},
"mappings": {
"search_suggest_type": {
"properties": {
"isEuropeWord": {
"type": "boolean"
},
"yearCount": {
"type": "integer"
},
"yearCartRatio": {
"type": "double"
},
"weekClickRatio": {
"type": "double"
},
"weekCount": {
"type": "integer"
},
"wordABRank": {
"type": "float"
},
"IsEuropeWord": {
"type": "boolean"
},
"analyzer": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"weekClickCount": {
"type": "integer"
},
"text": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"keyword": {
"analyzer": "suggest_analyzer",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"isManual": {
"type": "boolean"
},
"keywordPinYin": {
"analyzer": "suggest_analyzer",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"query": {
"properties": {
"bool": {
"properties": {
"must": {
"properties": {
"term": {
"properties": {
"keywordVersion": {
"type": "date"
}
}
}
}
}
}
}
}
},
"weekCartRatio": {
"type": "double"
},
"yearClickCount": {
"type": "integer"
},
"updateTime": {
"type": "long"
},
"yearCartCount": {
"type": "integer"
},
"keywordVersion": {
"type": "keyword"
},
"yearClickRatio": {
"type": "double"
},
"isCategory": {
"type": "boolean"
},
"field": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"isSensitive": {
"type": "boolean"
},
"suggestTags": {
"type": "keyword"
},
"weekCartCount": {
"type": "integer"
},
"doc": {
"properties": {
"isManual": {
"type": "boolean"
},
"keywordPinYin": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"weekCartRatio": {
"type": "float"
},
"yearClickCount": {
"type": "long"
},
"updateTime": {
"type": "long"
},
"yearCartCount": {
"type": "long"
},
"yearCount": {
"type": "long"
},
"yearCartRatio": {
"type": "float"
},
"weekClickRatio": {
"type": "float"
},
"weekCount": {
"type": "long"
},
"wordABRank": {
"type": "float"
},
"keywordVersion": {
"type": "date"
},
"yearClickRatio": {
"type": "float"
},
"isCategory": {
"type": "boolean"
},
"isSensitive": {
"type": "boolean"
},
"suggestTags": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"weekClickCount": {
"type": "long"
},
"weekCartCount": {
"type": "long"
},
"wordRank": {
"type": "float"
},
"keyword": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"manualValue": {
"type": "long"
},
"isBrand": {
"type": "boolean"
}
}
},
"wordRank": {
"type": "double"
},
"manualValue": {
"type": "integer"
},
"isBrand": {
"type": "boolean"
}
}
}
},
"aliases": [
],
"primary_terms": {
"0": 3
},
"in_sync_allocations": {
"0": [
"rmaxShfDRkCpdv91Iz4nkQ",
"tWXAarrcTQmXvB07MuWLYg",
"lcW9Sv9MTgSkKb9XmmUzOQ"
]
}
}
\ No newline at end of file
package com.secoo.so.suggest;
import com.alibaba.fastjson.JSON;
import com.secoo.so.suggest.entity.EsSuggestKeywordInfo;
import com.secoo.so.suggest.es.EsClient;
import com.secoo.so.suggest.es.EsObject;
import com.secoo.so.suggest.task.SuggestTask;
import com.secoo.so.suggest.util.FileUtils;
import com.secoo.so.suggest.util.PinYinUtils;
import com.secoo.so.suggest.util.StringUtils;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
/**
* @author xupeng
* @date: 2022/2/8
*/
public class TestCode {
public static void main7(String[] args) throws Exception {
System.out.println(StringUtils.dbc2Sbc("模糊字母"));
System.out.println(PinYinUtils.convertToSimplifiedChinese("模糊字母"));
}
public static void main(String[] args) throws Exception {
List<String> esLines = FileUtils.readLines(new File("d:\\suggest-es.json"));
List<String> newLines = FileUtils.readLines(new File("d:\\suggest-index-keyword-20220209142219.txt"));
int count = 0;
for (String esLine : esLines) {
if (!newLines.contains(esLine)) {
System.out.println(++count + "\tonlyEs: " + esLine);
}
}
count = 0;
for (String newLine : newLines) {
if (!esLines.contains(newLine)) {
System.out.println(++count + "\tonlyNew: " + newLine);
}
}
}
public static void main5(String[] args) throws Exception {
EsClient esClient = EsClient.buildEsClient("http://yunhead.siku.cn", "search", "search5z0NvEn1D");
BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery();
List<EsSuggestKeywordInfo> list = esClient.search("search_suggest_index", queryBuilder, EsSuggestKeywordInfo.class);
StringBuilder keywordBuilder = new StringBuilder();
for (EsSuggestKeywordInfo suggestKeywordInfo : list) {
keywordBuilder.append(suggestKeywordInfo.getKeyword()).append("\n");
}
FileUtils.saveToFile(keywordBuilder.toString(), "d:\\suggest-es.json", false);
esClient.close();
}
public static void main4(String[] args) throws Exception {
EsClient esClient = EsClient.buildEsClient("http://yunhead.siku.cn", "search", "search5z0NvEn1D");
BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery()
.must(QueryBuilders.termQuery("keyword", "高跟鞋女秋冬新款"));
List<EsSuggestKeywordInfo> list = esClient.search("search_suggest_index", queryBuilder, EsSuggestKeywordInfo.class);
System.out.println(JSON.toJSONString(list));
List<EsObject> esList = new ArrayList<>();
for (EsSuggestKeywordInfo esSuggestKeywordInfo : list) {
esSuggestKeywordInfo.setKeyword(esSuggestKeywordInfo.getKeyword() + "红色");
esList.add(new EsObject(StringUtils.md5(esSuggestKeywordInfo.getKeyword()), esSuggestKeywordInfo));
}
esClient.batch("search_suggest_index", "search_suggest_type", esList);
list = esClient.search("search_suggest_index", queryBuilder, EsSuggestKeywordInfo.class);
System.out.println(JSON.toJSONString(list));
esClient.close();
}
public static void main3(String[] args) {
String json = "{\"isBrand\":false,\"isCategory\":false,\"isEuropeWord\":false,\"isManual\":false,\"isSensitive\":false,\"keyword\":\"高跟鞋女秋冬新款\",\"keywordPinYin\":\"gaogenxienvqiudongxinkuan\",\"keywordVersion\":\"2022-02-07\",\"manualValue\":0,\"suggestTags\":\"\",\"updateTime\":1644287409631,\"weekCartCount\":0,\"weekCartRatio\":0.0,\"weekClickCount\":0,\"weekClickRatio\":0.0,\"weekCount\":0,\"wordABRank\":21251.870636624943,\"wordRank\":19772.059911048906,\"yearCartCount\":1,\"yearCartRatio\":0.1111111111111111,\"yearClickCount\":33,\"yearClickRatio\":1.2222222222222223,\"yearCount\":27}";
EsSuggestKeywordInfo suggestKeywordInfo = JSON.parseObject(json, EsSuggestKeywordInfo.class);
System.out.println(SuggestTask.calculateWordRank(suggestKeywordInfo));
suggestKeywordInfo.setKeyword(suggestKeywordInfo.getKeyword() + "红色");
}
public static void main2(String[] args) {
String word = StringUtils.dbc2Sbc("我爱。.·").replaceAll("\ufffc|,|,|\\.", " ");
System.out.println(word);
}
public static void main1(String[] args) {
// TODO Auto-generated method stub
try {
System.out.println("中文a".getBytes("UTF-8").length); // 7
System.out.println("中文a".getBytes("GBK").length); // 7
System.out.println(StringUtils.getByteLength("中文a", "utf-8")); // 7
System.out.println(StringUtils.getByteLength("中文a", "gBK")); // 7
System.out.println(StringUtils.getByteLength("中文a")); // 7
System.out.println("中文a".length()); // 3
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment