Commit bdf04c7b by lishihang

'ADD:feature'

parent 80ff1d09
create external table if not exists secoo_search.search_query_user_cross(
key_word string comment 'query',
device_id string comment 'device_id',
user_query_first_category_cross bigint comment '用户一级品类偏好和Query的识别品类的交叉',
user_query_second_category_cross bigint comment '用户二级品类偏好和Query的识别品类的交叉',
user_query_thrid_category_cross bigint comment '用户三级品类偏好和Query的识别品类的交叉',
user_query_brand_cross bigint comment '用户品牌偏好和Query的识别品牌的交叉',
user_query_cart_first_category_cross bigint comment '用户一级品类偏好和Query的加购品类的交叉',
user_query_cart_second_category_cross bigint comment '用户二级品类偏好和Query的加购品类的交叉',
user_query_cart_thrid_category_cross bigint comment '用户三级品类偏好和Query的加购品类的交叉',
user_query_cart_brand_cross bigint comment '用户品牌偏好和Query的加购品牌的交叉',
user_query_pay_first_category_cross bigint comment '用户一级品类偏好和Query的购买品类的交叉',
user_query_pay_second_category_cross bigint comment '用户二级品类偏好和Query的购买品类的交叉',
user_query_pay_thrid_category_cross bigint comment '用户三级品类偏好和Query的购买品类的交叉',
user_query_pay_brand_cross bigint comment '用户品牌偏好和Query的购买品牌的交叉'
)comment '搜索用户query交叉特征'
partitioned by (p_day date comment '分区日期')
stored as parquet;
work_dir="/data/zhaoyanchao/java/shell/cross_feature/query_user/"
delta_day=1
today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
partition=`hive -e "show partitions secoo_search.search_query_user_cross" |tail -n 1 | head -n 1`
partition_day=${partition:6:10}
echo "today_param is $today_param yesterday is $yesterday and partition_day is $partition_day"
hive --hivevar yesterday="'$yesterday'" -f "$work_dir"insert_cross_feature_query_user.sql
create external table if not exists secoo_search.search_query_brand_category(
key_word string comment 'query',
cart_brand_1 int comment 'query最近加购的品牌Top5',
cart_brand_2 int comment 'query最近加购的品牌Top5',
cart_brand_3 int comment 'query最近加购的品牌Top5',
cart_brand_4 int comment 'query最近加购的品牌Top5',
cart_brand_5 int comment 'query最近加购的品牌Top5',
cart_category_1 int comment 'query最近加购的品类Top5',
cart_category_2 int comment 'query最近加购的品类Top5',
cart_category_3 int comment 'query最近加购的品类Top5',
cart_category_4 int comment 'query最近加购的品类Top5',
cart_category_5 int comment 'query最近加购的品类Top5',
pay_brand_1 int comment 'query最近购买的品牌Top5',
pay_brand_2 int comment 'query最近购买的品牌Top5',
pay_brand_3 int comment 'query最近购买的品牌Top5',
pay_brand_4 int comment 'query最近购买的品牌Top5',
pay_brand_5 int comment 'query最近购买的品牌Top5',
pay_category_1 int comment 'query最近购买的品类Top5',
pay_category_2 int comment 'query最近购买的品类Top5',
pay_category_3 int comment 'query最近购买的品类Top5',
pay_category_4 int comment 'query最近购买的品类Top5',
pay_category_5 int comment 'query最近购买的品类Top5'
)comment '搜索query品牌品类特征特征'
partitioned by (p_day date comment '分区日期')
stored as parquet;
\ No newline at end of file
drop table tmp.tmp_query_base_lsh;
create table if not exists tmp.tmp_query_base_lsh as
select key_word,product_brand_id,product_category_id,is_action_add_cart,is_pay_success
from secoo_fact_hour.fact_search_detail_union_p_hour_inrc
where p_day >= date_sub(current_date(), 15) and p_day <= date_sub(current_date(), 1);
--query最近加购的品牌 Top5
drop table tmp.tmp_query_cart_brand_lsh;
create table if not exists tmp.tmp_query_cart_brand_lsh as
select key_word,
max(case when rk=1 then product_brand_id else 0 end) cart_brand_1,
max(case when rk=2 then product_brand_id else 0 end) cart_brand_2,
max(case when rk=3 then product_brand_id else 0 end) cart_brand_3,
max(case when rk=4 then product_brand_id else 0 end) cart_brand_4,
max(case when rk=5 then product_brand_id else 0 end) cart_brand_5
from
(select key_word,product_brand_id,
row_number() over(partition by key_word order by nums desc) rk
from
(select key_word,product_brand_id,count(*) nums
from tmp.tmp_query_base_lsh
where is_action_add_cart=1
group by key_word,product_brand_id) t0) t
where rk <= 5
group by key_word;
--query最近加购的品类 Top5
drop table tmp.tmp_query_cart_category_lsh;
create table if not exists tmp.tmp_query_cart_category_lsh as
select key_word,
max(case when rk=1 then product_category_id else 0 end) cart_category_1,
max(case when rk=2 then product_category_id else 0 end) cart_category_2,
max(case when rk=3 then product_category_id else 0 end) cart_category_3,
max(case when rk=4 then product_category_id else 0 end) cart_category_4,
max(case when rk=5 then product_category_id else 0 end) cart_category_5
from
(select key_word,product_category_id,
row_number() over(partition by key_word order by nums desc) rk
from
(select key_word,product_category_id,count(*) nums
from tmp.tmp_query_base_lsh
where is_action_add_cart=1
group by key_word,product_category_id) t0) t
where rk <= 5
group by key_word;
--query最近购买的品牌Top5
drop table tmp.tmp_query_pay_brand_lsh;
create table if not exists tmp.tmp_query_pay_brand_lsh as
select key_word,
max(case when rk=1 then product_brand_id else 0 end) pay_brand_1,
max(case when rk=2 then product_brand_id else 0 end) pay_brand_2,
max(case when rk=3 then product_brand_id else 0 end) pay_brand_3,
max(case when rk=4 then product_brand_id else 0 end) pay_brand_4,
max(case when rk=5 then product_brand_id else 0 end) pay_brand_5
from
(select key_word,product_brand_id,
row_number() over(partition by key_word order by nums desc) rk
from
(select key_word,product_brand_id,count(*) nums
from tmp.tmp_query_base_lsh
where is_pay_success=1
group by key_word,product_brand_id) t0) t
where rk <= 5
group by key_word;
--query最近购买的品类 Top5
drop table tmp.tmp_query_pay_category_lsh;
create table if not exists tmp.tmp_query_pay_category_lsh as
select key_word,
max(case when rk=1 then product_category_id else 0 end) pay_category_1,
max(case when rk=2 then product_category_id else 0 end) pay_category_2,
max(case when rk=3 then product_category_id else 0 end) pay_category_3,
max(case when rk=4 then product_category_id else 0 end) pay_category_4,
max(case when rk=5 then product_category_id else 0 end) pay_category_5
from
(select key_word,product_category_id,
row_number() over(partition by key_word order by nums desc) rk
from
(select key_word,product_category_id,count(*) nums
from tmp.tmp_query_base_lsh
where is_pay_success=1
group by key_word,product_category_id) t0) t
where rk <= 5
group by key_word;
--drop table tmp.tmp_query_final_lsh;
--create table if not exists tmp.tmp_query_final_lsh as
insert overwrite table secoo_search.search_query_brand_category partition(p_day=${yesterday})
select t0.key_word,
cart_brand_1,cart_brand_2,cart_brand_3,cart_brand_4,cart_brand_5,
cart_category_1,cart_category_2,cart_category_3,cart_category_4,cart_category_5,
pay_brand_1,pay_brand_2,pay_brand_3,pay_brand_4,pay_brand_5,
pay_category_1,pay_category_2,pay_category_3,pay_category_4,pay_category_5
from
(select key_word
from
(select key_word from tmp.tmp_query_cart_brand_lsh union all
select key_word from tmp.tmp_query_cart_category_lsh union all
select key_word from tmp.tmp_query_pay_brand_lsh union all
select key_word from tmp.tmp_query_pay_category_lsh) s0
group by key_word) t0
left join tmp.tmp_query_cart_brand_lsh t1 on t0.key_word = t1.key_word
left join tmp.tmp_query_cart_category_lsh t2 on t0.key_word = t2.key_word
left join tmp.tmp_query_pay_brand_lsh t3 on t0.key_word = t3.key_word
left join tmp.tmp_query_pay_category_lsh t4 on t0.key_word = t4.key_word;
\ No newline at end of file
work_dir="/data/zhaoyanchao/java/shell/cross_feature/query_feature/"
delta_day=1
today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
partition=`hive -e "show partitions secoo_search.search_query_brand_category" |tail -n 1 | head -n 1`
partition_day=${partition:6:10}
echo "today_param is $today_param yesterday is $yesterday and partition_day is $partition_day"
hive --hivevar yesterday="'$yesterday'" -f "$work_dir"insert_query_brand_category_table.sql
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment