Commit 881a4ed1 by wangyulong

1.query特征与query-user交叉特征Redis同步

parent 0bc95b5b
create external table if not exists secoo_search.search_query_user_cross_ext
(
key_word_user string comment 'query_user拼接串',
userqueryfeatures string comment 'query和用户交叉特征字符串'
)comment '搜索用户query交叉特征'
partitioned by (p_day date comment '分区日期');
insert overwrite TABLE secoo_search.search_query_user_cross_ext partition(p_day=${yesterday})
select
concat_ws('_', key_word, device_id) AS key_word_user,
concat_ws(',',
(case round(nvl(user_query_first_category_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(user_query_second_category_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(user_query_thrid_category_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(user_query_brand_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(user_query_cart_first_category_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(user_query_cart_second_category_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(user_query_cart_thrid_category_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(user_query_cart_brand_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(user_query_pay_first_category_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(user_query_pay_second_category_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(user_query_pay_thrid_category_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(user_query_pay_brand_cross,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end)
) AS queryuserfeatures
FROM secoo_search.search_query_user_cross
WHERE p_day = ${yesterday}
work_dir="/data/zhaoyanchao/java/shell/cross_feature/query_user/"
delta_day=1
today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
#搜索user-query交叉特征表数据写入
hive -f "${work_dir}"create_cross_feature_query_user_ext.sql
echo "yesterday is '${yesterday}'"
hive --hivevar yesterday="'$yesterday'" -f "$work_dir"insert_cross_feature_query_user_ext.sql
\ No newline at end of file
create external table if not exists secoo_search.search_data_query_feature_ext
(
keyword string comment 'query词',
query_cat_1 bigint comment '识别类目1',
query_cat_2 bigint comment '识别类目2',
query_cat_3 bigint comment '识别类目3',
query_cat_4 bigint comment '识别类目4',
query_cat_5 bigint comment '识别类目5',
query_brand_1 bigint comment '识别品牌1',
query_brand_2 bigint comment '识别品牌2',
query_brand_3 bigint comment '识别品牌3',
cart_brand_1 bigint comment 'query最近加购的品牌Top5',
cart_brand_2 bigint comment 'query最近加购的品牌Top5',
cart_brand_3 bigint comment 'query最近加购的品牌Top5',
cart_brand_4 bigint comment 'query最近加购的品牌Top5',
cart_brand_5 bigint comment 'query最近加购的品牌Top5',
cart_category_1 bigint comment 'query最近加购的品类Top5',
cart_category_2 bigint comment 'query最近加购的品类Top5',
cart_category_3 bigint comment 'query最近加购的品类Top5',
cart_category_4 bigint comment 'query最近加购的品类Top5',
cart_category_5 bigint comment 'query最近加购的品类Top5',
pay_brand_1 bigint comment 'query最近购买的品牌Top5',
pay_brand_2 bigint comment 'query最近购买的品牌Top5',
pay_brand_3 bigint comment 'query最近购买的品牌Top5',
pay_brand_4 bigint comment 'query最近购买的品牌Top5',
pay_brand_5 bigint comment 'query最近购买的品牌Top5',
pay_category_1 bigint comment 'query最近购买的品类Top5',
pay_category_2 bigint comment 'query最近购买的品类Top5',
pay_category_3 bigint comment 'query最近购买的品类Top5',
pay_category_4 bigint comment 'query最近购买的品类Top5',
pay_category_5 bigint comment 'query最近购买的品类Top5',
query_gender tinyint comment '识别性别,1是男,2是女, 0是没有',
query_contains_other_word tinyint comment '是否含其他词',
query_word_size tinyint comment 'query分词个数',
query_search_pv bigint comment 'query搜索次数',
query_search_uv bigint comment 'query搜索人数',
queryfeatures string COMMENT 'query特征',
querylabels string COMMENT 'query词labelEncoding标签'
) comment 'query缓存特征'
partitioned by (p_day date comment '分区日期');
\ No newline at end of file
INSERT overwrite TABLE secoo_search.search_data_query_feature_ext partition(p_day=${yesterday})
SELECT
keyword,
query_cat_1,
query_cat_2,
query_cat_3,
query_cat_4,
query_cat_5,
query_brand_1,
query_brand_2,
query_brand_3,
cart_brand_1,
cart_brand_2,
cart_brand_3,
cart_brand_4,
cart_brand_5,
cart_category_1,
cart_category_2,
cart_category_3,
cart_category_4,
cart_category_5,
pay_brand_1,
pay_brand_2,
pay_brand_3,
pay_brand_4,
pay_brand_5,
pay_category_1,
pay_category_2,
pay_category_3,
pay_category_4,
pay_category_5,
query_gender,
query_contains_other_word,
query_word_size,
query_search_pv,
query_search_uv,
concat_ws(',',
(case round(nvl(query_gender,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
(case round(nvl(query_contains_other_word,0),0) when 0 then '1,0' when 1 then '0,1' else '1,0' end),
cast(round(nvl(query_word_size,0),4) as string),
cast(round(nvl(query_search_pv,0),4) as string),
cast(round(nvl(query_search_uv,0),4) as string)
) queryfeatures,
concat_ws(',',
cast(round(nvl(query_cat_1,0),0) as string),
cast(round(nvl(query_cat_2,0),0) as string),
cast(round(nvl(query_cat_3,0),0) as string),
cast(round(nvl(query_cat_4,0),0) as string),
cast(round(nvl(query_cat_5,0),0) as string),
cast(round(nvl(query_brand_1,0),0) as string),
cast(round(nvl(query_brand_2,0),0) as string),
cast(round(nvl(query_brand_3,0),0) as string),
cast(round(nvl(cart_brand_1,0),0) as string),
cast(round(nvl(cart_brand_2,0),0) as string),
cast(round(nvl(cart_brand_3,0),0) as string),
cast(round(nvl(cart_brand_4,0),0) as string),
cast(round(nvl(cart_brand_5,0),0) as string),
cast(round(nvl(cart_category_1,0),0) as string),
cast(round(nvl(cart_category_2,0),0) as string),
cast(round(nvl(cart_category_3,0),0) as string),
cast(round(nvl(cart_category_4,0),0) as string),
cast(round(nvl(cart_category_5,0),0) as string),
cast(round(nvl(pay_brand_1,0),0) as string),
cast(round(nvl(pay_brand_2,0),0) as string),
cast(round(nvl(pay_brand_3,0),0) as string),
cast(round(nvl(pay_brand_4,0),0) as string),
cast(round(nvl(pay_brand_5,0),0) as string),
cast(round(nvl(pay_category_1,0),0) as string),
cast(round(nvl(pay_category_2,0),0) as string),
cast(round(nvl(pay_category_3,0),0) as string),
cast(round(nvl(pay_category_4,0),0) as string),
cast(round(nvl(pay_category_5,0),0) as string)
) AS querylabels
FROM secoo_search.search_data_query_original_feature
WHERE keyword IS NOT NULL
\ No newline at end of file
work_dir="/data/zhaoyanchao/java/shell/query_feature/"
delta_day=1
today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
#搜索query源数据表数据写入
hive -f "${work_dir}"create_query_feature_ext_table.sql
hive --hivevar yesterday="'${yesterday}'" -f "$work_dir"insert_query_feature_ext_table.sql
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment