Commit 5c1d537f by 王玉龙

Merge branch 'FEATURE-SEARCH-903-queryFeature' into 'master'

1.query特征数据优化

See merge request tianchuan/search-model-data!6
parents d37803c9 2a9c67b8
......@@ -58,7 +58,7 @@ public class KeywordFeatureExtractJob extends Configured implements Tool {
job.setReducerClass(KeywordFeatureExtractReduce.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path("hdfs://tesla-cluster/apps/hive/warehouse/secoo_search.db/search_data_query_original_feature"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://tesla-cluster/apps/hive/warehouse/secoo_search.db/search_data_query_semantic_feature"));
boolean success = job.waitForCompletion(true);
if (!success) {
......
......@@ -2,21 +2,43 @@ create external table if not exists secoo_search.search_data_query_original_fea
(
keyword string comment 'query词',
query_cat_1 bigint comment '识别类目1',
query_cat_2 bigint comment '识别类目2',
query_cat_3 bigint comment '识别类目3',
query_cat_4 bigint comment '识别类目4',
query_cat_5 bigint comment '识别类目5',
query_cat_1 bigint comment '识别类目1',
query_cat_2 bigint comment '识别类目2',
query_cat_3 bigint comment '识别类目3',
query_cat_4 bigint comment '识别类目4',
query_cat_5 bigint comment '识别类目5',
query_brand_1 bigint comment '识别品牌1',
query_brand_2 bigint comment '识别品牌2',
query_brand_3 bigint comment '识别品牌3',
query_brand_1 bigint comment '识别品牌1',
query_brand_2 bigint comment '识别品牌2',
query_brand_3 bigint comment '识别品牌3',
query_gender tinyint comment '识别性别,1是男,2是女, 0是没有',
query_gender tinyint comment '识别性别,1是男,2是女, 0是没有',
query_contains_other_word tinyint comment '是否含其他词',
query_word_size tinyint comment 'query分词个数',
query_search_pv bigint comment 'query搜索次数',
query_search_uv bigint comment 'query搜索人数'
query_search_uv bigint comment 'query搜索人数',
cart_brand_1 bigint comment 'query最近加购的品牌Top5',
cart_brand_2 bigint comment 'query最近加购的品牌Top5',
cart_brand_3 bigint comment 'query最近加购的品牌Top5',
cart_brand_4 bigint comment 'query最近加购的品牌Top5',
cart_brand_5 bigint comment 'query最近加购的品牌Top5',
cart_category_1 bigint comment 'query最近加购的品类Top5',
cart_category_2 bigint comment 'query最近加购的品类Top5',
cart_category_3 bigint comment 'query最近加购的品类Top5',
cart_category_4 bigint comment 'query最近加购的品类Top5',
cart_category_5 bigint comment 'query最近加购的品类Top5',
pay_brand_1 bigint comment 'query最近购买的品牌Top5',
pay_brand_2 bigint comment 'query最近购买的品牌Top5',
pay_brand_3 bigint comment 'query最近购买的品牌Top5',
pay_brand_4 bigint comment 'query最近购买的品牌Top5',
pay_brand_5 bigint comment 'query最近购买的品牌Top5',
pay_category_1 bigint comment 'query最近购买的品类Top5',
pay_category_2 bigint comment 'query最近购买的品类Top5',
pay_category_3 bigint comment 'query最近购买的品类Top5',
pay_category_4 bigint comment 'query最近购买的品类Top5',
pay_category_5 bigint comment 'query最近购买的品类Top5'
) comment 'query原始特征'
row format delimited fields terminated by '\t'
stored as textfile;
\ No newline at end of file
create external table if not exists secoo_search.search_data_query_semantic_feature
(
keyword string comment 'query词',
query_cat_1 bigint comment '识别类目1',
query_cat_2 bigint comment '识别类目2',
query_cat_3 bigint comment '识别类目3',
query_cat_4 bigint comment '识别类目4',
query_cat_5 bigint comment '识别类目5',
query_brand_1 bigint comment '识别品牌1',
query_brand_2 bigint comment '识别品牌2',
query_brand_3 bigint comment '识别品牌3',
query_gender tinyint comment '识别性别,1是男,2是女, 0是没有',
query_contains_other_word tinyint comment '是否含其他词',
query_word_size tinyint comment 'query分词个数',
query_search_pv bigint comment 'query搜索次数',
query_search_uv bigint comment 'query搜索人数'
) comment 'query原始特征'
row format delimited fields terminated by '\t'
stored as textfile;
\ No newline at end of file
INSERT overwrite TABLE secoo_search.search_data_query_original_feature
SELECT
T1.keyword,
T1.query_cat_1,
T1.query_cat_2,
T1.query_cat_3,
T1.query_cat_4,
T1.query_cat_5,
T1.query_brand_1,
T1.query_brand_2,
T1.query_brand_3,
T1.query_gender,
T1.query_contains_other_word,
T1.query_word_size,
T1.query_search_pv,
T1.query_search_uv,
T2.cart_brand_1,
T2.cart_brand_2,
T2.cart_brand_3,
T2.cart_brand_4,
T2.cart_brand_5,
T2.cart_category_1,
T2.cart_category_2,
T2.cart_category_3,
T2.cart_category_4,
T2.cart_category_5,
T2.pay_brand_1,
T2.pay_brand_2,
T2.pay_brand_3,
T2.pay_brand_4,
T2.pay_brand_5,
T2.pay_category_1,
T2.pay_category_2,
T2.pay_category_3,
T2.pay_category_4,
T2.pay_category_5
FROM secoo_search.search_data_query_semantic_feature T1
LEFT JOIN secoo_search.search_query_brand_category T2
ON T1.keyword = T2.key_word AND T2.p_day = ${yesterday}
\ No newline at end of file
work_dir="/data/zhaoyanchao/java/shell/query_feature/"
#搜索query源数据建表
hive -e "drop table secoo_search.search_data_original_query_last_year;"
hive -f "$work_dir"create_query_original_table.sql
hive -e "drop table secoo_search.search_data_query_original_feature;"
hive -f "$work_dir"create_query_feature_table.sql
#搜索query源数据表数据写入
today_param=$1
delta_day=1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
echo ${yesterday}
hive --hivevar yesterday="'$yesterday'" -f "$work_dir"insert_query_original_table.sql
# 删除原输出文件
hdfs dfs -rm -r hdfs://tesla-cluster/apps/hive/warehouse/secoo_search.db/search_data_query_original_feature
# 提取特征到输出表
work_jar_dir="/data/soft/data-warehouse_jar/"
yarn jar "${work_jar_dir}"search-model-data-1.0-SNAPSHOT-jar-with-dependencies.jar com.secoo.search.job.keyword.KeywordFeatureExtractJob
yarn jar /data/soft/data-warehouse_jar/search-model-data-1.0-SNAPSHOT-jar-with-dependencies.jar com.secoo.search.job.keyword.KeywordFeatureExtractJob
hive --hivevar yesterday="'$yesterday'" -f "$work_dir"insert_query_feature_table.sql
work_dir="/data/zhaoyanchao/java/shell/query_feature/"
#搜索query源数据建表
hive -e "drop table secoo_search.search_data_original_query_last_year;"
hive -f "$work_dir"create_query_original_table.sql
#搜索query源数据表数据写入
today_param=$1
delta_day=1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
echo ${yesterday}
hive --hivevar yesterday="'$yesterday'" -f "$work_dir"insert_query_original_table.sql
# 删除原输出文件
hdfs dfs -rm -r hdfs://tesla-cluster/apps/hive/warehouse/secoo_search.db/search_data_query_semantic_feature
# 提取特征到输出表
work_jar_dir="/data/soft/data-warehouse_jar/"
yarn jar "${work_jar_dir}"search-model-data-1.0-SNAPSHOT-jar-with-dependencies.jar com.secoo.search.job.keyword.KeywordFeatureExtractJob
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment