Commit 2ce837d7 by wangyulong

1.商品特征,用户商品交叉特征

parent 6afe09c3
create external table if not exists secoo_search.search_data_cross_feature_user_product create external table if not exists secoo_search.search_data_cross_feature_user_product
( (
device_id string comment '设备id', device_id string comment '设备id',
product_id string comment '商品id', product_id string comment '商品id',
prodanduserpurchspecial string comment '购买过的商品是特例品 & 商品本身是特例品', prodanduserpurchspecial string comment '购买过的商品是特例品 & 商品本身是特例品',
prodinusercat1 string comment '商品的一级品类是否在用户的一级品类偏好中', prodinusercat1 string comment '商品的一级品类是否在用户的一级品类偏好中',
prodinusercat2 string comment '商品的二级品类是否在用户的二级品类偏好中', prodinusercat2 string comment '商品的二级品类是否在用户的二级品类偏好中',
prodanduserpurchnew string comment '商品的价格是否在用户购买的价格段中', prodanduserpurchnew string comment '商品的价格是否在用户购买的价格段中',
prodanduserpurchpromotion string comment '购买过的商品是促销品 & 商品本身是促销品', prodanduserpurchpromotion string comment '购买过的商品是促销品 & 商品本身是促销品',
prodanduserpurchbig100 string comment '购买过百大品牌 & 商品是百大品牌', prodanduserpurchbig100 string comment '购买过百大品牌 & 商品是百大品牌',
prodpriceinuserbrowsing string comment '商品的价格是否在用户浏览的价格段中', prodpriceinuserbrowsing string comment '商品的价格是否在用户浏览的价格段中',
prodpriceinuserpurch string comment '商品的价格是否在用户购买的价格段中' prodpriceinuserpurch string comment '商品的价格是否在用户购买的价格段中',
user_product_buy_is_match string comment '用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:1/0',
user_product_buy_price_level string comment '用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:price_level/0'
) comment '用户商品交叉特征' ) comment '用户商品交叉特征'
partitioned by ( partitioned by (p_day date comment '分区日期')
p_day date comment '分区日期'
)
row format delimited fields terminated by '\t' row format delimited fields terminated by '\t'
stored as textfile; stored as textfile;
create external table if not exists secoo_search.search_data_cross_feature_user_product_buy
(
device_id string comment '设备id',
product_id string comment '商品id',
category2_id string comment '二级类目id',
cate_2_price_level string comment '商品二级类目价格段1-10',
user_product_buy_is_match string comment '用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:1/0',
user_product_buy_price_level string comment '用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:price_level/0'
) comment '用户商品购买交叉特征'
partitioned by (p_day date comment '分区日期');
\ No newline at end of file
insert overwrite table secoo_search.search_data_cross_feature_user_product_buy partition(p_day=${yesterday})
select
UPPER(T1.uuid) as device_id,
T1.product_id as product_id,
T1.category_id_2 as category2_id,
T1.buy_sku_cat2_price_level as cate_2_price_level,
case
when T1.buy_sku_cat2_price_level = T2.cate_2_price_level then 1
else 0
end as user_product_buy_is_match,
case
when T1.buy_sku_cat2_price_level = T2.cate_2_price_level then T1.buy_sku_cat2_price_level
else 0
end as user_product_buy_price_level
from secoo_app.app_search_uuid_sku_price_level T1
left join
(select
W.product_id as product_id,
split(W.category_org_code,'_')[2] as category_id_2,
P.cate_2_price_level as cate_2_price_level
from secoo_fact.fact_search_product_wide_p_day W
join secoo_fact.fact_search_product_55_30d_click_product_p_day P on W.main_id = P.product_main_id and P.p_day = ${yesterday}
where W.p_day = ${yesterday}
) T2 on T1.product_id = T2.product_id
where T1.p_day = ${yesterday}
work_dir="/data/zhaoyanchao/java/shell/cross_feature/user_product/"
# 用户商品交叉特征-购买
delta_day=1
today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
hive -f "$work_dir"create_cross_feature_user_product_buy.sql
hive --hivevar yesterday="'${yesterday}'" -f "$work_dir"insert_cross_feature_user_product_buy.sql
...@@ -2,11 +2,11 @@ ...@@ -2,11 +2,11 @@
work_dir="/data/zhaoyanchao/java/shell/cross_feature/user_product/" work_dir="/data/zhaoyanchao/java/shell/cross_feature/user_product/"
# 检查推荐最新表名 # 检查推荐最新表名
delta_day=0 delta_day=1
today_param=$1 today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"` yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'` date_str=`echo ${today_param}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userProductCrossFeatures_"${date_str} table_name="secoo_rcmd_features.userProductCrossFeatures_"${date_str}
echo ${table_name} echo ${table_name}
...@@ -16,9 +16,9 @@ while [[ ${result} -ne 0 ]]; ...@@ -16,9 +16,9 @@ while [[ ${result} -ne 0 ]];
do do
delta_day=$(($delta_day+1)) delta_day=$(($delta_day+1))
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"` today_param_1=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'` date_str=`echo ${today_param_1}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userProductCrossFeatures_"${date_str} table_name="secoo_rcmd_features.userProductCrossFeatures_"${date_str}
hive -e "desc $table_name" hive -e "desc $table_name"
...@@ -30,19 +30,24 @@ done ...@@ -30,19 +30,24 @@ done
# 如果无表,建表 # 如果无表,建表
hive -f "$work_dir"create_cross_feature_user_product.sql hive -f "$work_dir"create_cross_feature_user_product.sql
hive -e "insert overwrite table secoo_search.search_data_cross_feature_user_product partition(p_day=date'$today_param') hive -e "insert overwrite table secoo_search.search_data_cross_feature_user_product partition(p_day=date'${today_param}')
select select
device_id, T1.device_id,
product_id, T1.product_id,
prodanduserpurchspecial, T1.prodanduserpurchspecial,
prodinusercat1, T1.prodinusercat1,
prodinusercat2, T1.prodinusercat2,
prodanduserpurchnew, T1.prodanduserpurchnew,
prodanduserpurchpromotion, T1.prodanduserpurchpromotion,
prodanduserpurchbig100, T1.prodanduserpurchbig100,
prodpriceinuserbrowsing, T1.prodpriceinuserbrowsing,
prodpriceinuserpurch T1.prodpriceinuserpurch,
from $table_name;" T2.user_product_buy_is_match,
T2.user_product_buy_price_level
from ${table_name} T1
left join secoo_search.search_data_cross_feature_user_product_buy T2 on T1.device_id = T2.device_id and T1.product_id = T2.product_id
where T2.p_day = '${yesterday}';"
hive -e "SELECT hive -e "SELECT
sum(prodanduserpurchspecial), sum(prodanduserpurchspecial),
sum(prodinusercat1), sum(prodinusercat1),
...@@ -51,9 +56,10 @@ hive -e "SELECT ...@@ -51,9 +56,10 @@ hive -e "SELECT
sum(prodanduserpurchpromotion), sum(prodanduserpurchpromotion),
sum(prodanduserpurchbig100), sum(prodanduserpurchbig100),
sum(prodpriceinuserbrowsing), sum(prodpriceinuserbrowsing),
sum(prodpriceinuserpurch) sum(prodpriceinuserpurch),
FROM secoo_search.search_data_cross_feature_user_product sum(user_product_buy_is_match)
WHERE p_day = '$today_param'" FROM secoo_search.search_data_cross_feature_user_product WHERE p_day = '${today_param}'"
hive -e "SELECT hive -e "SELECT
assert_true(sum(prodanduserpurchspecial) > 0), assert_true(sum(prodanduserpurchspecial) > 0),
assert_true(sum(prodinusercat1) > 0), assert_true(sum(prodinusercat1) > 0),
...@@ -62,6 +68,6 @@ hive -e "SELECT ...@@ -62,6 +68,6 @@ hive -e "SELECT
assert_true(sum(prodanduserpurchpromotion) > 0), assert_true(sum(prodanduserpurchpromotion) > 0),
assert_true(sum(prodanduserpurchbig100) > 0), assert_true(sum(prodanduserpurchbig100) > 0),
assert_true(sum(prodpriceinuserbrowsing) > 0), assert_true(sum(prodpriceinuserbrowsing) > 0),
assert_true(sum(prodpriceinuserpurch) > 0) assert_true(sum(prodpriceinuserpurch) > 0),
FROM secoo_search.search_data_cross_feature_user_product assert_true(sum(user_product_buy_is_match) > 0)
WHERE p_day = '$today_param'" FROM secoo_search.search_data_cross_feature_user_product WHERE p_day = '${today_param}'"
\ No newline at end of file \ No newline at end of file
-- 商品特征扩展表 -- 商品特征扩展表
create external table if not exists secoo_search.search_data_product_feature_ext create external table if not exists secoo_search.search_data_product_feature_ext
( (
`product_id` bigint COMMENT '商品ID', `product_id` bigint comment '商品ID',
`area_type` tinyint comment '货源地,(0大陆 1香港 2美国 3日本 4意大利)', `area_type` tinyint comment '货源地,(0大陆 1香港 2美国 3日本 4意大利)',
`sale_qty_180` bigint comment '前180天至今销售数量', `sale_qty_180` bigint comment '前180天至今销售数量',
`sale_qty_90` bigint comment '前90天至今销售数量', `sale_qty_90` bigint comment '前90天至今销售数量',
`sale_qty_30` bigint comment '前30天至今销售数量', `sale_qty_30` bigint comment '前30天至今销售数量',
`sale_qty_15` bigint comment '前15天至今销售数量', `sale_qty_15` bigint comment '前15天至今销售数量',
`v2p_rate_30` double COMMENT '30天内商品转化率', `v2p_rate_30` double COMMENT '30天内商品转化率',
`v2p_rate_3` double COMMENT '3天内商品转化率', `v2p_rate_3` double COMMENT '3天内商品转化率',
`suit_gender` int COMMENT '商品适用性别', `suit_gender` int COMMENT '商品适用性别',
`product_category_id_1` string COMMENT '商品一级品类ID', `product_category_id_1` string COMMENT '商品一级品类ID',
`gmv_one_week` double COMMENT '商品一周内GMV', `gmv_one_week` double COMMENT '商品一周内GMV',
`gmv_one_month` double COMMENT '商品一月内GMV', `gmv_one_month` double COMMENT '商品一月内GMV',
`order_count_one_week` bigint COMMENT '商品在一周内的销售数量', `order_count_one_week` bigint COMMENT '商品在一周内的销售数量',
`order_count_one_month` bigint COMMENT '商品在一个月内的销售数量', `order_count_one_month` bigint COMMENT '商品在一个月内的销售数量',
`secoo_price` double COMMENT '寺库价格', `secoo_price` double COMMENT '寺库价格',
`product_price_level` string COMMENT '商品在二级品类下的价格档位 1:低,2:中,3:高', `product_price_level` string COMMENT '商品在二级品类下的价格档位 1:低,2:中,3:高',
`android_product_3day_ctr` double COMMENT '商品在android上3天的点击率', `android_product_3day_ctr` double COMMENT '商品在android上3天的点击率',
`android_product_7day_ctr` double COMMENT '商品在android上7天的点击率', `android_product_7day_ctr` double COMMENT '商品在android上7天的点击率',
`android_product_30day_ctr` double COMMENT '商品在android上30天的点击率', `android_product_30day_ctr` double COMMENT '商品在android上30天的点击率',
`ios_product_3day_ctr` double COMMENT '商品在ios上3天的点击率', `ios_product_3day_ctr` double COMMENT '商品在ios上3天的点击率',
`ios_product_7day_ctr` double COMMENT '商品在ios上7天的点击率', `ios_product_7day_ctr` double COMMENT '商品在ios上7天的点击率',
`ios_product_30day_ctr` double COMMENT '商品在ios上30天的点击率', `ios_product_30day_ctr` double COMMENT '商品在ios上30天的点击率',
`all_system_product_3day_ctr` double COMMENT '商品在平台上3天的点击率', `all_system_product_3day_ctr` double COMMENT '商品在平台上3天的点击率',
`all_system_product_7day_ctr` double COMMENT '商品在平台上7天的点击率', `all_system_product_7day_ctr` double COMMENT '商品在平台上7天的点击率',
`all_system_product_30day_ctr` double COMMENT '商品在平台上30天的点击率', `all_system_product_30day_ctr` double COMMENT '商品在平台上30天的点击率',
`android_product_3day_collection` bigint COMMENT '商品在android上3天的收藏数', `android_product_3day_collection` bigint COMMENT '商品在android上3天的收藏数',
`android_product_7day_collection` bigint COMMENT '商品在android上7天的收藏数', `android_product_7day_collection` bigint COMMENT '商品在android上7天的收藏数',
`android_product_30day_collection` bigint COMMENT '商品在android上30天的收藏数', `android_product_30day_collection` bigint COMMENT '商品在android上30天的收藏数',
`ios_product_3day_collection` bigint COMMENT '商品在ios上3天的收藏数', `ios_product_3day_collection` bigint COMMENT '商品在ios上3天的收藏数',
`ios_product_7day_collection` bigint COMMENT '商品在ios上7天的收藏数', `ios_product_7day_collection` bigint COMMENT '商品在ios上7天的收藏数',
`ios_product_30day_collection` bigint COMMENT '商品在ios上30天的收藏数', `ios_product_30day_collection` bigint COMMENT '商品在ios上30天的收藏数',
`all_system_product_3day_collection` bigint COMMENT '商品在平台上3天的收藏数', `all_system_product_3day_collection` bigint COMMENT '商品在平台上3天的收藏数',
`all_system_product_7day_collection` bigint COMMENT '商品在平台上7天的收藏数', `all_system_product_7day_collection` bigint COMMENT '商品在平台上7天的收藏数',
`all_system_product_30day_collection` bigint COMMENT '商品在平台上30天的收藏数', `all_system_product_30day_collection` bigint COMMENT '商品在平台上30天的收藏数',
`android_product_3day_add_cart` bigint COMMENT '商品在android上3天的加购数', `android_product_3day_add_cart` bigint COMMENT '商品在android上3天的加购数',
`android_product_7day_add_cart` bigint COMMENT '商品在android上7天的加购数', `android_product_7day_add_cart` bigint COMMENT '商品在android上7天的加购数',
`android_product_30day_add_cart` bigint COMMENT '商品在android上30天的加购数', `android_product_30day_add_cart` bigint COMMENT '商品在android上30天的加购数',
`ios_product_3day_add_cart` bigint COMMENT '商品在ios上3天的加购数', `ios_product_3day_add_cart` bigint COMMENT '商品在ios上3天的加购数',
`ios_product_7day_add_cart` bigint COMMENT '商品在ios上7天的加购数', `ios_product_7day_add_cart` bigint COMMENT '商品在ios上7天的加购数',
`ios_product_30day_add_cart` bigint COMMENT '商品在ios上30天的加购数', `ios_product_30day_add_cart` bigint COMMENT '商品在ios上30天的加购数',
`all_system_product_3day_add_cart` bigint COMMENT '商品在平台上3天的加购数', `all_system_product_3day_add_cart` bigint COMMENT '商品在平台上3天的加购数',
`all_system_product_7day_add_cart` bigint COMMENT '商品在平台上7天的加购数', `all_system_product_7day_add_cart` bigint COMMENT '商品在平台上7天的加购数',
`all_system_product_30day_add_cart` bigint COMMENT '商品在平台上30天的加购数', `all_system_product_30day_add_cart` bigint COMMENT '商品在平台上30天的加购数',
`product_brand_id` string COMMENT '商品品牌ID', `product_brand_id` string COMMENT '商品品牌ID',
`pv_order_code` int COMMENT '根据pv和订单分析得出的编码(1:热销款2:转化率低热品...)', `pv_order_code` int COMMENT '根据pv和订单分析得出的编码(1:热销款2:转化率低热品...)',
`product_category_id_3` string COMMENT '商品三级品类ID', `product_category_id_3` string COMMENT '商品三级品类ID',
`product_category_id_2` string COMMENT '商品二级品类ID', `product_category_id_2` string COMMENT '商品二级品类ID',
`product_group_id` bigint COMMENT '商品所属组ID', `product_group_id` bigint COMMENT '商品所属组ID',
`product_group_layering_code` int COMMENT '商品所属组的分层码', `product_group_layering_code` int COMMENT '商品所属组的分层码',
`price_grade` double COMMENT '商品一级品类下的购买力等级', `price_grade` double COMMENT '商品一级品类下的购买力等级',
`is_new` int COMMENT '是不是新品1:是,0:不是', `is_new` int COMMENT '是不是新品1:是,0:不是',
`pv` bigint COMMENT '商品近30天pv', `pv` bigint COMMENT '商品近30天pv',
`discount_see_number` int COMMENT '今日折扣频道曝光次数', `discount_see_number` int COMMENT '今日折扣频道曝光次数',
`discount_click_number` int COMMENT '今日折扣频道点击次数', `discount_click_number` int COMMENT '今日折扣频道点击次数',
`discount_add_cart_number` int COMMENT '今日折扣频道加购次数', `discount_add_cart_number` int COMMENT '今日折扣频道加购次数',
`discount_pay_number` int COMMENT '今日折扣频道支付次数', `discount_pay_number` int COMMENT '今日折扣频道支付次数',
`discount_click_rate` double COMMENT '今日折扣频道曝光点击率', `discount_click_rate` double COMMENT '今日折扣频道曝光点击率',
`discount_add_cart_rate` double COMMENT '今日折扣频道点击加购率', `discount_add_cart_rate` double COMMENT '今日折扣频道点击加购率',
`discount_pay_rate` double COMMENT '今日折扣频道加购支付率', `discount_pay_rate` double COMMENT '今日折扣频道加购支付率',
`second_category_price_grade` double COMMENT '商品二级品类下的购买力等级', `second_category_price_grade` double COMMENT '商品二级品类下的购买力等级',
`third_category_price_grade` double COMMENT '商品三级品类下的购买力等级', `third_category_price_grade` double COMMENT '商品三级品类下的购买力等级',
`extreme_hot_score` double COMMENT '爆款分,0.0的为非爆款', `extreme_hot_score` double COMMENT '爆款分,0.0的为非爆款',
`brand_grade` bigint COMMENT '品牌等级,1、2、3、4、5', `brand_grade` bigint COMMENT '品牌等级,1、2、3、4、5',
`productlevel` string COMMENT '商品等级<N、S、A、AB、B>', `productlevel` string COMMENT '商品等级<N、S、A、AB、B>',
`ispromotionproduct` string COMMENT '是否是促销商品', `ispromotionproduct` string COMMENT '是否是促销商品',
`productpricelevel` string COMMENT '商品价格', `productpricelevel` string COMMENT '商品价格',
`isspecialproduct` string COMMENT '是否是特例品(便宜处理的)', `isspecialproduct` string COMMENT '是否是特例品(便宜处理的)',
`isnewstyle` string COMMENT '是否是新款', `isnewstyle` string COMMENT '是否是新款',
`prodgender` string COMMENT '性别', `prodgender` string COMMENT '性别',
`isnewcustomerproduct` string COMMENT '是否新客商品', `isnewcustomerproduct` string COMMENT '是否新客商品',
`isbrand100` string COMMENT '是否是百大品牌', `isbrand100` string COMMENT '是否是百大品牌',
`isnewproduct` string COMMENT '是否是新品', `isnewproduct` string COMMENT '是否是新品',
`ismemberproduct` string COMMENT '是否会员商品', `ismemberproduct` string COMMENT '是否会员商品',
`isselfoperating` string COMMENT '是否自营品', `isselfoperating` string COMMENT '是否自营品',
`productpopularity` string COMMENT '浏览、收藏、加购综合算的热度值', `productpopularity` string COMMENT '浏览、收藏、加购综合算的热度值',
`productbrand` string COMMENT '商品的品牌', `productbrand` string COMMENT '商品的品牌',
`productcategory` string COMMENT '商品的品类', `productcategory` string COMMENT '商品的品类',
`category_id_1` string COMMENT '商品的一级品类', `category_id_1` string COMMENT '商品的一级品类',
`category_id_2` string COMMENT '商品的二级品类', `category_id_2` string COMMENT '商品的二级品类',
`productfeatures` string COMMENT '商品特征', `productfeatures` string COMMENT '商品特征',
`productlabels` string COMMENT '商品兴趣标签', `productlabels` string COMMENT '商品兴趣标签',
`category_org_code_3` string COMMENT '三级分类组织结构代码', `category_org_code_3` string COMMENT '三级分类组织结构代码',
`group_best_sku` bigint COMMENT '分组内最优sku', `group_best_sku` bigint COMMENT '分组内最优sku',
`group_id` bigint COMMENT '清一色group_id', `group_id` bigint COMMENT '清一色group_id',
`group_main_id` bigint COMMENT '清一色group_main_id', `group_main_id` bigint COMMENT '清一色group_main_id',
`product_main_id` bigint COMMENT '商品spu id', `product_main_id` bigint COMMENT '商品spu id',
`vectors` string COMMENT '商品的vectors值', `vectors` string COMMENT '商品的vectors值',
`style` string COMMENT '风格', `style` string COMMENT '风格',
`occasion` string COMMENT '场景', `occasion` string COMMENT '场景',
`color` string COMMENT '颜色', `color` string COMMENT '颜色',
`last_7_days_click_pv` int COMMENT '热度(最近7天点击pv)', `last_7_days_click_pv` int COMMENT '热度(最近7天点击pv)',
`ctr` double COMMENT ' ctr: 网图近7天点击pv/曝光pv' `ctr` double COMMENT ' ctr: 网图近7天点击pv/曝光pv'
) comment '搜索商品特征表' ) comment '搜索商品特征表'
partitioned by (p_day date comment '分区日期') ; partitioned by (p_day date comment '分区日期') ;
-- 商品特征表 -- 商品特征表
create external table if not exists secoo_search.search_data_product_feature create external table if not exists secoo_search.search_data_product_feature
( (
product_id string comment '商品id', product_id string comment '商品id',
is_brand_top_100 string comment '是否是百大品牌', is_brand_top_100 string comment '是否是百大品牌',
product_popularity string comment '浏览、收藏、加购综合算的热度值', product_popularity string comment '浏览、收藏、加购综合算的热度值',
is_member_product string comment '是否会员商品', is_member_product string comment '是否会员商品',
is_special_product string comment '是否是特例品', is_special_product string comment '是否是特例品',
is_new_customer_product string comment '是否新客商品', is_new_customer_product string comment '是否新客商品',
is_new_product string comment '是否是新品', is_new_product string comment '是否是新品',
is_new_style string comment '是否是新款', is_new_style string comment '是否是新款',
is_self_operating string comment '是否自营品', is_self_operating string comment '是否自营品',
product_level string comment '商品等级', product_level string comment '商品等级',
is_promotion_product string comment '是否是促销商品', is_promotion_product string comment '是否是促销商品',
gender string comment '性别', gender string comment '性别',
product_brand string comment '商品的品牌', product_brand string comment '商品的品牌',
product_category string comment '商品的品类', product_category string comment '商品的品类',
product_price_level string comment '商品价格(需要离散化)', product_price_level string comment '商品价格(需要离散化)',
ctr_android_3days string comment '安卓系统下商品3天的CTR', ctr_android_3days string comment '安卓系统下商品3天的CTR',
ctr_android_7days string comment '安卓系统下商品7天的CTR', ctr_android_7days string comment '安卓系统下商品7天的CTR',
ctr_android_30days string comment '安卓系统下商品30天的CTR', ctr_android_30days string comment '安卓系统下商品30天的CTR',
ctr_ios_3days string comment 'ios系统下商品3天的CTR', ctr_ios_3days string comment 'ios系统下商品3天的CTR',
ctr_ios_7days string comment 'ios系统下商品7天的CTR', ctr_ios_7days string comment 'ios系统下商品7天的CTR',
ctr_ios_30days string comment 'ios系统下商品30天的CTR', ctr_ios_30days string comment 'ios系统下商品30天的CTR',
ctr_3days string comment '全部系统下商品3天的CTR', ctr_3days string comment '全部系统下商品3天的CTR',
ctr_7days string comment '全部系统下商品7天的CTR', ctr_7days string comment '全部系统下商品7天的CTR',
ctr_30days string comment '全部系统下商品30天的CTR', ctr_30days string comment '全部系统下商品30天的CTR',
favorite_android_3days string comment '安卓系统下商品3天的收藏', favorite_android_3days string comment '安卓系统下商品3天的收藏',
favorite_android_7days string comment '安卓系统下商品7天的收藏', favorite_android_7days string comment '安卓系统下商品7天的收藏',
favorite_android_30days string comment '安卓系统下商品30天的收藏', favorite_android_30days string comment '安卓系统下商品30天的收藏',
favorite_ios_3days string comment 'ios系统下商品3天的收藏', favorite_ios_3days string comment 'ios系统下商品3天的收藏',
favorite_ios_7days string comment 'ios系统下商品7天的收藏', favorite_ios_7days string comment 'ios系统下商品7天的收藏',
favorite_ios_30days string comment 'ios系统下商品30天的收藏', favorite_ios_30days string comment 'ios系统下商品30天的收藏',
favorite_3days string comment '全部系统下商品3天的收藏', favorite_3days string comment '全部系统下商品3天的收藏',
favorite_7days string comment '全部系统下商品7天的收藏', favorite_7days string comment '全部系统下商品7天的收藏',
favorite_30days string comment '全部系统下商品30天的收藏', favorite_30days string comment '全部系统下商品30天的收藏',
add_cart_android_3days string comment '安卓系统下商品3天的加购', add_cart_android_3days string comment '安卓系统下商品3天的加购',
add_cart_android_7days string comment '安卓系统下商品7天的加购', add_cart_android_7days string comment '安卓系统下商品7天的加购',
add_cart_android_30days string comment '安卓系统下商品30天的加购', add_cart_android_30days string comment '安卓系统下商品30天的加购',
add_cart_ios_3days string comment 'ios系统下商品3天的加购', add_cart_ios_3days string comment 'ios系统下商品3天的加购',
add_cart_ios_7days string comment 'ios系统下商品7天的加购', add_cart_ios_7days string comment 'ios系统下商品7天的加购',
add_cart_ios_30days string comment 'ios系统下商品30天的加购', add_cart_ios_30days string comment 'ios系统下商品30天的加购',
add_cart_3days string comment '全部系统下商品3天的加购', add_cart_3days string comment '全部系统下商品3天的加购',
add_cart_7days string comment '全部系统下商品7天的加购', add_cart_7days string comment '全部系统下商品7天的加购',
add_cart_30days string comment '全部系统下商品30天的加购', add_cart_30days string comment '全部系统下商品30天的加购',
area_type tinyint comment '货源地,(0大陆 1香港 2美国 3日本 4意大利)', area_type tinyint comment '货源地,(0大陆 1香港 2美国 3日本 4意大利)',
sale_qty_180 bigint comment '前180天至今销售数量', sale_qty_180 bigint comment '前180天至今销售数量',
sale_qty_90 bigint comment '前90天至今销售数量', sale_qty_90 bigint comment '前90天至今销售数量',
sale_qty_30 bigint comment '前30天至今销售数量', sale_qty_30 bigint comment '前30天至今销售数量',
sale_qty_15 bigint comment '前15天至今销售数量' sale_qty_15 bigint comment '前15天至今销售数量',
product_cate2_price_level tinyint comment '商品二级类目价格段1-10'
) comment '商品特征' ) comment '商品特征'
partitioned by ( partitioned by (p_day date comment '分区日期')
p_day date comment '分区日期'
)
stored as parquet; stored as parquet;
...@@ -2,10 +2,10 @@ ...@@ -2,10 +2,10 @@
# 检查推荐最新表名 # 检查推荐最新表名
work_dir="/data/zhaoyanchao/java/shell/product_feature/" work_dir="/data/zhaoyanchao/java/shell/product_feature/"
delta_day=0 delta_day=1
today_param=$1 today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"` yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'` date_str=`echo ${today_param}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.productFeatures_"${date_str} table_name="secoo_rcmd_features.productFeatures_"${date_str}
echo ${table_name} echo ${table_name}
...@@ -14,8 +14,8 @@ result=$? ...@@ -14,8 +14,8 @@ result=$?
while [[ ${result} -ne 0 ]]; while [[ ${result} -ne 0 ]];
do do
delta_day=$(($delta_day+1)) delta_day=$(($delta_day+1))
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"` param_delta_1=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'` date_str=`echo ${param_delta_1}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.productFeatures_"${date_str} table_name="secoo_rcmd_features.productFeatures_"${date_str}
hive -e "desc $table_name" hive -e "desc $table_name"
result=$? result=$?
...@@ -24,70 +24,73 @@ done ...@@ -24,70 +24,73 @@ done
# 如果无表,建表 # 如果无表,建表
hive -f $"work_dir"create_search_product_feature_table.sql hive -f "${work_dir}"create_search_product_feature_table.sql
hive -e "insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'$today_param') hive -e "insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'$today_param')
select select
R.product_id, R.product_id as product_id,
R.isbrand100 as isBrand100, R.isbrand100 as isBrand100,
R.productpopularity as productPopularity, R.productpopularity as productPopularity,
R.ismemberproduct as isMemberProduct , R.ismemberproduct as isMemberProduct ,
R.isspecialproduct as isSpecialProduct, R.isspecialproduct as isSpecialProduct,
R.isnewcustomerproduct as isNewCustomerProduct, R.isnewcustomerproduct as isNewCustomerProduct,
R.isnewproduct as isNewProduct, R.isnewproduct as isNewProduct,
R.isnewstyle as isNewStyle, R.isnewstyle as isNewStyle,
R.isselfoperating as isSelfOperating, R.isselfoperating as isSelfOperating,
R.productlevel as productLevel, R.productlevel as productLevel,
R.ispromotionproduct as isPromotionProduct, R.ispromotionproduct as isPromotionProduct,
R.prodgender as prodGender, R.prodgender as prodGender,
R.productbrand as productBrand, R.productbrand as productBrand,
R.productcategory as productCategory, R.productcategory as productCategory,
R.productpricelevel as productPriceLevel, R.productpricelevel as productPriceLevel,
R.3daysandroidctr as 3DaysAndroidCTR, R.3daysandroidctr as 3DaysAndroidCTR,
R.7daysandroidctr as 7DaysAndroidCTR, R.7daysandroidctr as 7DaysAndroidCTR,
R.30daysandroidctr as 30DaysAndroidCTR, R.30daysandroidctr as 30DaysAndroidCTR,
R.3daysiosctr as 3DaysIosCTR, R.3daysiosctr as 3DaysIosCTR,
R.7daysiosctr as 7DaysIosCTR, R.7daysiosctr as 7DaysIosCTR,
R.30daysiosctr as 30DaysIosCTR, R.30daysiosctr as 30DaysIosCTR,
R.3daysctr as 3DaysCTR, R.3daysctr as 3DaysCTR,
R.7daysctr as 7DaysCTR, R.7daysctr as 7DaysCTR,
R.30daysctr as 30DaysCTR, R.30daysctr as 30DaysCTR,
R.3daysandroidfavorite as 3DaysAndroidFavorite, R.3daysandroidfavorite as 3DaysAndroidFavorite,
R.7daysandroidfavorite as 7DaysAndroidFavorite, R.7daysandroidfavorite as 7DaysAndroidFavorite,
R.30daysandroidfavorite as 30DaysAndroidFavorite, R.30daysandroidfavorite as 30DaysAndroidFavorite,
R.3daysiosfavorite as 3DaysIosFavorite, R.3daysiosfavorite as 3DaysIosFavorite,
R.7daysiosfavorite as 7DaysIosFavorite, R.7daysiosfavorite as 7DaysIosFavorite,
R.30daysiosfavorite as 30DaysIosFavorite, R.30daysiosfavorite as 30DaysIosFavorite,
R.3daysfavorite as 3DaysFavorite, R.3daysfavorite as 3DaysFavorite,
R.7daysfavorite as 7DaysFavorite, R.7daysfavorite as 7DaysFavorite,
R.30daysfavorite as 30DaysFavorite, R.30daysfavorite as 30DaysFavorite,
R.3daysandroidaddcart as 3DaysAndroidAddCart, R.3daysandroidaddcart as 3DaysAndroidAddCart,
R.7daysandroidaddcart as 7DaysAndroidAddCart, R.7daysandroidaddcart as 7DaysAndroidAddCart,
R.30daysandroidaddcart as 30DaysAndroidAddCart, R.30daysandroidaddcart as 30DaysAndroidAddCart,
R.3daysiosaddcart as 3DaysIosAddCart, R.3daysiosaddcart as 3DaysIosAddCart,
R.7daysiosaddcart as 7DaysIosAddCart, R.7daysiosaddcart as 7DaysIosAddCart,
R.30daysiosaddcart as 30DaysIosAddCart, R.30daysiosaddcart as 30DaysIosAddCart,
R.3daysaddcart as 3DaysAddCart, R.3daysaddcart as 3DaysAddCart,
R.7daysaddcart as 7DaysAddCart, R.7daysaddcart as 7DaysAddCart,
R.30daysaddcart as 30DaysAddCart, R.30daysaddcart as 30DaysAddCart,
W.area_type, W.area_type as area_type,
W.sale_qty_180, W.sale_qty_180 as sale_qty_180,
W.sale_qty_90, W.sale_qty_90 as sale_qty_90,
W.sale_qty_30, W.sale_qty_30 as sale_qty_30,
W.sale_qty_15 W.sale_qty_15 as sale_qty_15,
from $table_name R
left join secoo_fact.fact_search_product_wide_p_day W P.cate_2_price_level as product_cate2_price_level
on R.product_id = cast(W.product_id AS string) and W.p_day = '$yesterday';" from $table_name R
left join secoo_fact.fact_search_product_wide_p_day W on R.product_id = cast(W.product_id AS string) and W.p_day = '${yesterday}'
left join secoo_fact.fact_search_product_55_30d_click_product_p_day P on W.main_id = P.product_main_id and P.p_day = '${yesterday}'
;"
......
...@@ -6,78 +6,116 @@ device_id string comment '设备id', ...@@ -6,78 +6,116 @@ device_id string comment '设备id',
addCartIn7Days string comment '7天加购量', addCartIn7Days string comment '7天加购量',
1YearCartProductCount string comment '1年加购量', 1YearCartProductCount string comment '1年加购量',
1YearOrderProductManCount string comment '1年男性订单', 1YearOrderProductManCount string comment '1年男性订单',
1YearOrderProductManPriceAmt string comment '', 1YearOrderProductManPriceAmt string comment '最近一年购买男装总金额',
30DaysChannelPvJrzk string comment '今日折扣', 30DaysChannelPvJrzk string comment '最近30天浏览频道次数_今日折扣',
1YearOrderTicketCategoryCount string comment '', 1YearOrderTicketCategoryCount string comment '最近一年使用优惠卷品类数',
30DaysDetailPv string comment '', 30DaysDetailPv string comment '最近30天商品详情页浏览次数',
userInterestCategory1_0 string comment '用户第一个一级品类偏好', userInterestCategory1_0 string comment '用户第一个一级品类偏好',
userInterestCategory2_0 string comment '用户第一个二级品类偏好', userInterestCategory2_0 string comment '用户第一个二级品类偏好',
userInterestCategory1_1 string comment '用户第二个一级品类偏好', userInterestCategory1_1 string comment '用户第二个一级品类偏好',
userInterestCategory2_1 string comment '用户第二个二级品类偏好', userInterestCategory2_1 string comment '用户第二个二级品类偏好',
userInterestCategory3_0 string comment '用户第一个三级品类偏好', userInterestCategory3_0 string comment '用户第一个三级品类偏好',
30DaysSearchCategoryCount string comment '', 30DaysSearchCategoryCount string comment '最近30天搜索品类数',
1YearOrderPayPriceAmt string comment '', 1YearOrderPayPriceAmt string comment '最近一年付费总金额',
userInterestCategory1_2 string comment '用户第三个一级品类偏好', userInterestCategory1_2 string comment '用户第三个一级品类偏好',
30DaysSearchBrandCount string comment '', 30DaysSearchBrandCount string comment '最近30天搜索品牌数',
userInterestCategory1_3 string comment '用户第四个一级品类偏好', userInterestCategory1_3 string comment '用户第四个一级品类偏好',
userInterestCategory2_2 string comment '用户第三个二级品类偏好', userInterestCategory2_2 string comment '用户第三个二级品类偏好',
userInterestCategory3_1 string comment '用户第二个三级品类偏好', userInterestCategory3_1 string comment '用户第二个三级品类偏好',
1YearCartCategoryCount string comment '', 1YearCartCategoryCount string comment '当前购物车品类数(最近一年)',
userInterestCategory2_3 string comment '用户第四个二级品类偏好', userInterestCategory2_3 string comment '用户第四个二级品类偏好',
userInterestCategory1_4 string comment '用户第五个一级品类偏好', userInterestCategory1_4 string comment '用户第五个一级品类偏好',
userInterestCategory3_2 string comment '用户第二个三级品类偏好', userInterestCategory3_2 string comment '用户第二个三级品类偏好',
userInterestCategory1_5 string comment '用户第六个一级品类偏好', userInterestCategory1_5 string comment '用户第六个一级品类偏好',
userInterestCategory3_3 string comment '用户第四个三级品类偏好', userInterestCategory3_3 string comment '用户第四个三级品类偏好',
1YearOrderProductOtherCount string comment '', 1YearOrderProductOtherCount string comment '最近一年购买其他次数',
30DaysDetailPva string comment '', 30DaysDetailPva string comment '最近30天商品详情页浏览次数_A级',
userInterestCategory2_4 string comment '用户第五个二级品类偏好', userInterestCategory2_4 string comment '用户第五个二级品类偏好',
1YearOrderUsePointCount string comment '', 1YearOrderUsePointCount string comment '最近一年使用总积分',
30DaysChannelPvPaiHangBang string comment '', 30DaysChannelPvPaiHangBang string comment '最近30天浏览频道次数_排行榜',
30daysChannelPvXinPinBang string comment '', 30daysChannelPvXinPinBang string comment '最近30天浏览频道次数_新品榜',
1YearAddFavCategoryCount string comment '', 1YearAddFavCategoryCount string comment '当前收藏品类数(最近一年)',
userInterestCategory3_4 string comment '用户第五个三级品类偏好', userInterestCategory3_4 string comment '用户第五个三级品类偏好',
userInterestCategory2_5 string comment '用户第六个二级品类偏好', userInterestCategory2_5 string comment '用户第六个二级品类偏好',
activeInWeekends string comment '', activeInWeekends string comment '节假日是否活跃',
1YearOrderCount string comment '', 1YearOrderCount string comment '最近一年订单总次数',
30DaysDetailPvb string comment '', 30DaysDetailPvb string comment '最近30天商品详情页浏览次数_B级',
1YearOrderPayProductCount string comment '', 1YearOrderPayProductCount string comment '最近一年付费商品数',
1YearOrderUsePointPriceAmt string comment '', 1YearOrderUsePointPriceAmt string comment '最近一年抵扣积分金额',
1YearAddFavProductCount string comment '', 1YearAddFavProductCount string comment '当前收藏商品数(最近一年)',
30DaysDetailPvab string comment '', 30DaysDetailPvab string comment '最近30天商品详情页浏览次数_AB级',
userInterestCategory3_5 string comment '用户第六个三级品类偏好', userInterestCategory3_5 string comment '用户第六个三级品类偏好',
deviceType string comment '', deviceType string comment '设备类型(IOS/Android)',
activeIn7Days string comment '', activeIn7Days string comment '最近7天是否活跃',
gender string comment '', gender string comment '用户性别',
purchasedBrand100 string comment '', purchasedBrand100 string comment '是否购买过百大品牌',
30DaysChannelPvRenQiBang string comment '', 30DaysChannelPvRenQiBang string comment '最近30天浏览频道次数_人气榜',
1YearCartPriceAmt string comment '', 1YearCartPriceAmt string comment '当前购物车总金额(最近一年)',
regularCustomer string comment '', regularCustomer string comment '是否常客',
1YearOrderPayBrandCount string comment '', 1YearOrderPayBrandCount string comment '最近一年付费品牌数',
1YearOrderTicketBrandCount string comment '', 1YearOrderTicketBrandCount string comment '最近一年使用优惠卷品牌数',
1YearOrderProductOtherPriceAmt string comment '', 1YearOrderProductOtherPriceAmt string comment '最近一年购买其他总金额',
1YearOrderProductCountAvg string comment '', 1YearOrderProductCountAvg string comment '最近一年订单包含平均商品数',
favoriteIn7Days string comment '', favoriteIn7Days string comment '七天收藏商品',
30DaysDetailPvn string comment '', 30DaysDetailPvn string comment '最近30天商品详情页浏览次数_N级',
1YearAddFavPriceAmt string comment '', 1YearAddFavPriceAmt string comment '当前收藏总金额(最近一年)',
1YearOrderProductWomanPriceAmt string comment '', 1YearOrderProductWomanPriceAmt string comment '最近一年购买女装总金额',
1YearOrderProductWomanCount string comment '', 1YearOrderProductWomanCount string comment '最近一年购买女装次数',
30DaysChannelPvXscj string comment '', 30DaysChannelPvXscj string comment '最近30天浏览频道次数_限时抽奖',
1YearOrderTicketPriceAmt string comment '', 1YearOrderTicketPriceAmt string comment '最近一年使用优惠卷总金额',
userInterestBrand_0 string comment '用户第一个品牌偏好', userInterestBrand_0 string comment '用户第一个品牌偏好',
1YearAddFavBrandCount string comment '', 1YearAddFavBrandCount string comment '当前收藏品牌数(最近一年)',
userInterestBrand_1 string comment '用户第二个品牌偏好', userInterestBrand_1 string comment '用户第二个品牌偏好',
userInterestBrand_2 string comment '用户第三个品牌偏好', userInterestBrand_2 string comment '用户第三个品牌偏好',
1YearOrderPayCount string comment '', 1YearOrderPayCount string comment '最近一年付费订单数',
30DaysChannelPvAoLai string comment '', 30DaysChannelPvAoLai string comment '最近30天浏览频道次数_奥莱特卖',
userInterestBrand_3 string comment '用户第四个品牌偏好', userInterestBrand_3 string comment '用户第四个品牌偏好',
30DaysDetailPvs string comment '', 30DaysDetailPvs string comment '最近30天商品详情页浏览次数_S级',
purchasedItems string comment '', purchasedItems string comment '是否购过商品',
30DaysSearchCount string comment '', 30DaysSearchCount string comment '最近30天搜索次数',
1YearCartBrandCount string comment '', 1YearCartBrandCount string comment '当前购物车品牌数(最近一年)',
1YearOrderTicketCount string comment '', 1YearOrderTicketCount string comment '最近一年使用优惠卷订单数',
1YearOrderPayCategoryCount string comment '' 1YearOrderPayCategoryCount string comment '最近一年付费品类数',
click_category2_id1 string comment '用户点击偏好二级类目1',
click_category2_id2 string comment '用户点击偏好二级类目2',
click_category2_id3 string comment '用户点击偏好二级类目3',
click_category2_id4 string comment '用户点击偏好二级类目4',
click_category2_id5 string comment '用户点击偏好二级类目5',
click_category2_id6 string comment '用户点击偏好二级类目6',
click_brand_id1 string comment '用户点击偏好品牌1',
click_brand_id2 string comment '用户点击偏好品牌2',
click_brand_id3 string comment '用户点击偏好品牌3',
click_brand_id4 string comment '用户点击偏好品牌4',
click_brand_id5 string comment '用户点击偏好品牌5',
click_brand_id6 string comment '用户点击偏好品牌6',
add_category2_id1 string comment '用户加购偏好二级类目1',
add_category2_id2 string comment '用户加购偏好二级类目2',
add_category2_id3 string comment '用户加购偏好二级类目3',
add_category2_id4 string comment '用户加购偏好二级类目4',
add_category2_id5 string comment '用户加购偏好二级类目5',
add_category2_id6 string comment '用户加购偏好二级类目6',
add_brand_id1 string comment '用户加购偏好品牌1',
add_brand_id2 string comment '用户加购偏好品牌2',
add_brand_id3 string comment '用户加购偏好品牌3',
add_brand_id4 string comment '用户加购偏好品牌4',
add_brand_id5 string comment '用户加购偏好品牌5',
add_brand_id6 string comment '用户加购偏好品牌6',
pay_category2_id1 string comment '用户购买偏好二级类目1',
pay_category2_id2 string comment '用户购买偏好二级类目2',
pay_category2_id3 string comment '用户购买偏好二级类目3',
pay_category2_id4 string comment '用户购买偏好二级类目4',
pay_category2_id5 string comment '用户购买偏好二级类目5',
pay_category2_id6 string comment '用户购买偏好二级类目6',
pay_brand_id1 string comment '用户购买偏好品牌1',
pay_brand_id2 string comment '用户购买偏好品牌2',
pay_brand_id3 string comment '用户购买偏好品牌3',
pay_brand_id4 string comment '用户购买偏好品牌4',
pay_brand_id5 string comment '用户购买偏好品牌5',
pay_brand_id6 string comment '用户购买偏好品牌6',
grow_level string comment '用户成长等级'
) comment '用户特征' ) comment '用户特征'
partitioned by ( partitioned by (p_day date comment '分区日期')
p_day date comment '分区日期'
)
stored as parquet; stored as parquet;
-- 商品特征扩展表
create external table if not exists secoo_search.user_brand_category2_favorite
(
device_id string comment '用户设备id',
click_category2_id1 string comment '用户点击偏好二级类目1',
click_category2_id2 string comment '用户点击偏好二级类目2',
click_category2_id3 string comment '用户点击偏好二级类目3',
click_category2_id4 string comment '用户点击偏好二级类目4',
click_category2_id5 string comment '用户点击偏好二级类目5',
click_category2_id6 string comment '用户点击偏好二级类目6',
click_brand_id1 string comment '用户点击偏好品牌1',
click_brand_id2 string comment '用户点击偏好品牌2',
click_brand_id3 string comment '用户点击偏好品牌3',
click_brand_id4 string comment '用户点击偏好品牌4',
click_brand_id5 string comment '用户点击偏好品牌5',
click_brand_id6 string comment '用户点击偏好品牌6',
add_category2_id1 string comment '用户加购偏好二级类目1',
add_category2_id2 string comment '用户加购偏好二级类目2',
add_category2_id3 string comment '用户加购偏好二级类目3',
add_category2_id4 string comment '用户加购偏好二级类目4',
add_category2_id5 string comment '用户加购偏好二级类目5',
add_category2_id6 string comment '用户加购偏好二级类目6',
add_brand_id1 string comment '用户加购偏好品牌1',
add_brand_id2 string comment '用户加购偏好品牌2',
add_brand_id3 string comment '用户加购偏好品牌3',
add_brand_id4 string comment '用户加购偏好品牌4',
add_brand_id5 string comment '用户加购偏好品牌5',
add_brand_id6 string comment '用户加购偏好品牌6',
pay_category2_id1 string comment '用户购买偏好二级类目1',
pay_category2_id2 string comment '用户购买偏好二级类目2',
pay_category2_id3 string comment '用户购买偏好二级类目3',
pay_category2_id4 string comment '用户购买偏好二级类目4',
pay_category2_id5 string comment '用户购买偏好二级类目5',
pay_category2_id6 string comment '用户购买偏好二级类目6',
pay_brand_id1 string comment '用户购买偏好品牌1',
pay_brand_id2 string comment '用户购买偏好品牌2',
pay_brand_id3 string comment '用户购买偏好品牌3',
pay_brand_id4 string comment '用户购买偏好品牌4',
pay_brand_id5 string comment '用户购买偏好品牌5',
pay_brand_id6 string comment '用户购买偏好品牌6'
) comment '搜索用户偏好品牌品类特征表'
partitioned by (p_day date comment '分区日期');
--点击偏好二级类目--
drop table tmp.tmp_user_click_product_category2_id;
create table if not exists tmp.tmp_user_click_product_category2_id as
select
device_id,
nvl(split(category2_ids, ',')[0], 0) click_category2_id1,
nvl(split(category2_ids, ',')[1], 0) click_category2_id2,
nvl(split(category2_ids, ',')[2], 0) click_category2_id3,
nvl(split(category2_ids, ',')[3], 0) click_category2_id4,
nvl(split(category2_ids, ',')[4], 0) click_category2_id5,
nvl(split(category2_ids, ',')[5], 0) click_category2_id6
from
(select
device_id as device_id,
concat_ws(',',collect_set(cast(category2_id as string))) as category2_ids
from
(select
device_id,
category2_id,
row_number() over(partition by device_id order by click_pv desc) rank
from
(select
click_device_id as device_id,
product_category_id_2 as category2_id,
sum(is_click) as click_pv
from secoo_fact_hour.fact_search_detail_union_p_hour_inrc
where p_day >= date_sub(${today}, 30) AND p_day < ${today}
and is_click = 1 and click_device_id is not null
group by click_device_id, product_category_id_2
) T1
) T2
where rank < 7 group by device_id
)T3;
--点击偏好品牌--
drop table tmp.tmp_user_click_product_brand_id;
create table if not exists tmp.tmp_user_click_product_brand_id as
select
device_id,
nvl(split(product_brand_ids, ',')[0], 0) click_brand_id1,
nvl(split(product_brand_ids, ',')[1], 0) click_brand_id2,
nvl(split(product_brand_ids, ',')[2], 0) click_brand_id3,
nvl(split(product_brand_ids, ',')[3], 0) click_brand_id4,
nvl(split(product_brand_ids, ',')[4], 0) click_brand_id5,
nvl(split(product_brand_ids, ',')[5], 0) click_brand_id6
from
(select
device_id as device_id,
concat_ws(',',collect_set(cast(product_brand_id as string))) as product_brand_ids
from
(select
device_id,
product_brand_id,
row_number() over(partition by device_id order by click_pv desc) rank
from
(select
click_device_id as device_id,
product_brand_id as product_brand_id,
sum(is_click) as click_pv
from secoo_fact_hour.fact_search_detail_union_p_hour_inrc
where p_day >= date_sub(${today}, 30) AND p_day < ${today}
and is_click = 1 and click_device_id is not null
group by click_device_id, product_brand_id
) T1
) T2
where rank < 7 group by device_id
)T3;
--加购偏好类目--
drop table tmp.tmp_user_add_product_category2_id;
create table if not exists tmp.tmp_user_add_product_category2_id as
select
device_id,
nvl(split(category2_ids, ',')[0], 0) add_category2_id1,
nvl(split(category2_ids, ',')[1], 0) add_category2_id2,
nvl(split(category2_ids, ',')[2], 0) add_category2_id3,
nvl(split(category2_ids, ',')[3], 0) add_category2_id4,
nvl(split(category2_ids, ',')[4], 0) add_category2_id5,
nvl(split(category2_ids, ',')[5], 0) add_category2_id6
from
(select
device_id as device_id,
concat_ws(',',collect_set(cast(category2_id as string))) as category2_ids
from
(select
device_id,
category2_id,
row_number() over(partition by device_id order by add_pv desc) rank
from
(select
add_cart_device_id as device_id,
product_category_id_2 as category2_id,
sum(is_action_add_cart) as add_pv
from secoo_fact_hour.fact_search_detail_union_p_hour_inrc
where p_day >= date_sub(${today}, 90) AND p_day < ${today}
and is_action_add_cart = 1 and add_cart_device_id is not null
group by add_cart_device_id, product_category_id_2
) T1
) T2
where rank < 7 group by device_id
)T3;
--加购偏好品牌--
drop table tmp.tmp_user_add_product_brand_id;
create table if not exists tmp.tmp_user_add_product_brand_id as
select
device_id,
nvl(split(product_brand_ids, ',')[0], 0) add_brand_id1,
nvl(split(product_brand_ids, ',')[1], 0) add_brand_id2,
nvl(split(product_brand_ids, ',')[2], 0) add_brand_id3,
nvl(split(product_brand_ids, ',')[3], 0) add_brand_id4,
nvl(split(product_brand_ids, ',')[4], 0) add_brand_id5,
nvl(split(product_brand_ids, ',')[5], 0) add_brand_id6
from
(select
device_id as device_id,
concat_ws(',',collect_set(cast(product_brand_id as string))) as product_brand_ids
from
(select
device_id,
product_brand_id,
row_number() over(partition by device_id order by add_pv desc) rank
from
(select
add_cart_device_id as device_id,
product_brand_id as product_brand_id,
sum(is_action_add_cart) as add_pv
from secoo_fact_hour.fact_search_detail_union_p_hour_inrc
where p_day >= date_sub(${today}, 90) AND p_day < ${today}
and is_action_add_cart = 1 and add_cart_device_id is not null
group by add_cart_device_id, product_brand_id
) T1
) T2
where rank < 7 group by device_id
)T3;
--购买偏好类目--
drop table tmp.tmp_user_pay_product_category2_id;
create table if not exists tmp.tmp_user_pay_product_category2_id as
select
device_id,
nvl(split(category2_ids, ',')[0], 0) pay_category2_id1,
nvl(split(category2_ids, ',')[1], 0) pay_category2_id2,
nvl(split(category2_ids, ',')[2], 0) pay_category2_id3,
nvl(split(category2_ids, ',')[3], 0) pay_category2_id4,
nvl(split(category2_ids, ',')[4], 0) pay_category2_id5,
nvl(split(category2_ids, ',')[5], 0) pay_category2_id6
from
(select
device_id as device_id,
concat_ws(',',collect_set(cast(category2_id as string))) as category2_ids
from
(select
device_id,
category2_id,
row_number() over(partition by device_id order by pay_pv desc) rank
from
(select
pay_device_id as device_id,
product_category_id_2 as category2_id,
sum(is_pay_success) as pay_pv
from secoo_fact_hour.fact_search_detail_union_p_hour_inrc
where p_day >= date_sub(${today}, 365) AND p_day < ${today}
and is_pay_success = 1 and pay_device_id is not null
group by pay_device_id, product_category_id_2
) T1
) T2
where rank < 7 group by device_id
)T3;
--购买偏好品牌--
drop table tmp.tmp_user_pay_product_brand_id;
create table if not exists tmp.tmp_user_pay_product_brand_id as
select
device_id,
nvl(split(product_brand_ids, ',')[0], 0) pay_brand_id1,
nvl(split(product_brand_ids, ',')[1], 0) pay_brand_id2,
nvl(split(product_brand_ids, ',')[2], 0) pay_brand_id3,
nvl(split(product_brand_ids, ',')[3], 0) pay_brand_id4,
nvl(split(product_brand_ids, ',')[4], 0) pay_brand_id5,
nvl(split(product_brand_ids, ',')[5], 0) pay_brand_id6
from
(select
device_id as device_id,
concat_ws(',',collect_set(cast(product_brand_id as string))) as product_brand_ids
from
(select
device_id,
product_brand_id,
row_number() over(partition by device_id order by pay_pv desc) rank
from
(select
pay_device_id as device_id,
product_brand_id as product_brand_id,
sum(is_pay_success) as pay_pv
from secoo_fact_hour.fact_search_detail_union_p_hour_inrc
where p_day >= date_sub(${today}, 365) AND p_day < ${today}
and is_pay_success = 1 and pay_device_id is not null
group by pay_device_id, product_brand_id
) T1
) T2
where rank < 7 group by device_id
)T3;
insert overwrite table secoo_search.user_brand_category2_favorite partition(p_day=${today})
select
C1.device_id as device_id,
nvl(C1.click_category2_id1,0) as click_category2_id1,
nvl(C1.click_category2_id2,0) as click_category2_id2,
nvl(C1.click_category2_id3,0) as click_category2_id3,
nvl(C1.click_category2_id4,0) as click_category2_id4,
nvl(C1.click_category2_id5,0) as click_category2_id5,
nvl(C1.click_category2_id6,0) as click_category2_id6,
nvl(B1.click_brand_id1,0) as click_brand_id1,
nvl(B1.click_brand_id2,0) as click_brand_id2,
nvl(B1.click_brand_id3,0) as click_brand_id3,
nvl(B1.click_brand_id4,0) as click_brand_id4,
nvl(B1.click_brand_id5,0) as click_brand_id5,
nvl(B1.click_brand_id6,0) as click_brand_id6,
nvl(C2.add_category2_id1,0) as add_category2_id1,
nvl(C2.add_category2_id2,0) as add_category2_id2,
nvl(C2.add_category2_id3,0) as add_category2_id3,
nvl(C2.add_category2_id4,0) as add_category2_id4,
nvl(C2.add_category2_id5,0) as add_category2_id5,
nvl(C2.add_category2_id6,0) as add_category2_id6,
nvl(B2.add_brand_id1,0) as add_brand_id1,
nvl(B2.add_brand_id2,0) as add_brand_id2,
nvl(B2.add_brand_id3,0) as add_brand_id3,
nvl(B2.add_brand_id4,0) as add_brand_id4,
nvl(B2.add_brand_id5,0) as add_brand_id5,
nvl(B2.add_brand_id6,0) as add_brand_id6,
nvl(C3.pay_category2_id1,0) as pay_category2_id1,
nvl(C3.pay_category2_id2,0) as pay_category2_id2,
nvl(C3.pay_category2_id3,0) as pay_category2_id3,
nvl(C3.pay_category2_id4,0) as pay_category2_id4,
nvl(C3.pay_category2_id5,0) as pay_category2_id5,
nvl(C3.pay_category2_id6,0) as pay_category2_id6,
nvl(B3.pay_brand_id1,0) as pay_brand_id1,
nvl(B3.pay_brand_id2,0) as pay_brand_id2,
nvl(B3.pay_brand_id3,0) as pay_brand_id3,
nvl(B3.pay_brand_id4,0) as pay_brand_id4,
nvl(B3.pay_brand_id5,0) as pay_brand_id5,
nvl(B3.pay_brand_id6,0) as pay_brand_id6
from tmp.tmp_user_click_product_category2_id C1
left join tmp.tmp_user_click_product_brand_id B1 on C1.device_id = B1.device_id
left join tmp.tmp_user_add_product_category2_id C2 on C1.device_id = C2.device_id
left join tmp.tmp_user_add_product_brand_id B2 on C1.device_id = B2.device_id
left join tmp.tmp_user_pay_product_category2_id C3 on C1.device_id = C3.device_id
left join tmp.tmp_user_pay_product_brand_id B3 on C1.device_id = B3.device_id;
\ No newline at end of file
work_dir="/data/zhaoyanchao/java/shell/user_feature/"
delta_day=0
today_param=$1
today=`date -d "${today_param} -${delta_day} day" "+%Y-%m-%d"`
echo ${today}
hive -f "$work_dir"create_user_brand_category_favorite_table.sql
hive --hivevar today="'${today}'" -f "$work_dir"insert_user_brand_category_favorite_table.sql
...@@ -3,10 +3,10 @@ ...@@ -3,10 +3,10 @@
# 检查推荐最新表名 # 检查推荐最新表名
work_dir="/data/zhaoyanchao/java/shell/user_feature/" work_dir="/data/zhaoyanchao/java/shell/user_feature/"
delta_day=0 delta_day=1
today_param=$1 today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"` yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'` date_str=`echo ${today_param}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userfeatures_"${date_str} table_name="secoo_rcmd_features.userfeatures_"${date_str}
echo ${table_name} echo ${table_name}
...@@ -35,81 +35,124 @@ echo "开始导入数据" ...@@ -35,81 +35,124 @@ echo "开始导入数据"
hive -e "insert overwrite table secoo_search.search_data_user_feature partition(p_day=date'$today_param') hive -e "insert overwrite table secoo_search.search_data_user_feature partition(p_day=date'$today_param')
select select
device_id as device_id, T1.device_id as device_id,
1yearorderticketproductcount as 1YearOrderTicketProductCount, T1.1yearorderticketproductcount as 1YearOrderTicketProductCount,
addcartin7days as addCartIn7Days, T1.addcartin7days as addCartIn7Days,
1yearcartproductcount as 1YearCartProductCount, T1.1yearcartproductcount as 1YearCartProductCount,
1yearorderproductmancount as 1YearOrderProductManCount, T1.1yearorderproductmancount as 1YearOrderProductManCount,
1yearorderproductmanpriceamt as 1YearOrderProductManPriceAmt, T1.1yearorderproductmanpriceamt as 1YearOrderProductManPriceAmt,
30dayschannelpvjrzk as 30DaysChannelPvJrzk, T1.30dayschannelpvjrzk as 30DaysChannelPvJrzk,
1yearorderticketcategorycount as 1YearOrderTicketCategoryCount, T1.1yearorderticketcategorycount as 1YearOrderTicketCategoryCount,
30daysdetailpv as 30DaysDetailPv, T1.30daysdetailpv as 30DaysDetailPv,
userinterestcategory1_0 as userInterestCategory1_0, T1.userinterestcategory1_0 as userInterestCategory1_0,
userinterestcategory2_0 as userInterestCategory2_0, T1.userinterestcategory2_0 as userInterestCategory2_0,
userinterestcategory1_1 as userInterestCategory1_1, T1.userinterestcategory1_1 as userInterestCategory1_1,
userinterestcategory2_1 as userInterestCategory2_1, T1.userinterestcategory2_1 as userInterestCategory2_1,
userinterestcategory3_0 as userInterestCategory3_0, T1.userinterestcategory3_0 as userInterestCategory3_0,
30dayssearchcategorycount as 30DaysSearchCategoryCount, T1.30dayssearchcategorycount as 30DaysSearchCategoryCount,
1yearorderpaypriceamt as 1YearOrderPayPriceAmt, T1.1yearorderpaypriceamt as 1YearOrderPayPriceAmt,
userinterestcategory1_2 as userInterestCategory1_2, T1.userinterestcategory1_2 as userInterestCategory1_2,
30dayssearchbrandcount as 30DaysSearchBrandCount, T1.30dayssearchbrandcount as 30DaysSearchBrandCount,
userinterestcategory1_3 as userInterestCategory1_3, T1.userinterestcategory1_3 as userInterestCategory1_3,
userinterestcategory2_2 as userInterestCategory2_2, T1.userinterestcategory2_2 as userInterestCategory2_2,
userinterestcategory3_1 as userInterestCategory3_1, T1.userinterestcategory3_1 as userInterestCategory3_1,
1yearcartcategorycount as 1YearCartCategoryCount, T1.1yearcartcategorycount as 1YearCartCategoryCount,
userinterestcategory2_3 as userInterestCategory2_3, T1.userinterestcategory2_3 as userInterestCategory2_3,
userinterestcategory1_4 as userInterestCategory1_4, T1.userinterestcategory1_4 as userInterestCategory1_4,
userinterestcategory3_2 as userInterestCategory3_2, T1.userinterestcategory3_2 as userInterestCategory3_2,
userinterestcategory1_5 as userInterestCategory1_5, T1.userinterestcategory1_5 as userInterestCategory1_5,
userinterestcategory3_3 as userInterestCategory3_3, T1.userinterestcategory3_3 as userInterestCategory3_3,
1yearorderproductothercount as 1YearOrderProductOtherCount, T1.1yearorderproductothercount as 1YearOrderProductOtherCount,
30daysdetailpva as 30DaysDetailPva, T1.30daysdetailpva as 30DaysDetailPva,
userinterestcategory2_4 as userInterestCategory2_4, T1.userinterestcategory2_4 as userInterestCategory2_4,
1yearorderusepointcount as 1YearOrderUsePointCount, T1.1yearorderusepointcount as 1YearOrderUsePointCount,
30dayschannelpvpaihangbang as 30DaysChannelPvPaiHangBang, T1.30dayschannelpvpaihangbang as 30DaysChannelPvPaiHangBang,
30dayschannelpvxinpinbang as 30daysChannelPvXinPinBang, T1.30dayschannelpvxinpinbang as 30daysChannelPvXinPinBang,
1yearaddfavcategorycount as 1YearAddFavCategoryCount, T1.1yearaddfavcategorycount as 1YearAddFavCategoryCount,
userinterestcategory3_4 as userInterestCategory3_4, T1.userinterestcategory3_4 as userInterestCategory3_4,
userinterestcategory2_5 as userInterestCategory2_5, T1.userinterestcategory2_5 as userInterestCategory2_5,
activeinweekends as activeInWeekends, T1.activeinweekends as activeInWeekends,
1yearordercount as 1YearOrderCount, T1.1yearordercount as 1YearOrderCount,
30daysdetailpvb as 30DaysDetailPvb, T1.30daysdetailpvb as 30DaysDetailPvb,
1yearorderpayproductcount as 1YearOrderPayProductCount, T1.1yearorderpayproductcount as 1YearOrderPayProductCount,
1yearorderusepointpriceamt as 1YearOrderUsePointPriceAmt, T1.1yearorderusepointpriceamt as 1YearOrderUsePointPriceAmt,
1yearaddfavproductcount as 1YearAddFavProductCount, T1.1yearaddfavproductcount as 1YearAddFavProductCount,
30daysdetailpvab as 30DaysDetailPvab, T1.30daysdetailpvab as 30DaysDetailPvab,
userinterestcategory3_5 as userInterestCategory3_5, T1.userinterestcategory3_5 as userInterestCategory3_5,
devicetype as deviceType, T1.devicetype as deviceType,
activein7days as activeIn7Days, T1.activein7days as activeIn7Days,
gender as gender, T1.gender as gender,
purchasedbrand100 as purchasedBrand100, T1.purchasedbrand100 as purchasedBrand100,
30dayschannelpvrenqibang as 30DaysChannelPvRenQiBang, T1.30dayschannelpvrenqibang as 30DaysChannelPvRenQiBang,
1yearcartpriceamt as 1YearCartPriceAmt, T1.1yearcartpriceamt as 1YearCartPriceAmt,
regularcustomer as regularCustomer, T1.regularcustomer as regularCustomer,
1yearorderpaybrandcount as 1YearOrderPayBrandCount, T1.1yearorderpaybrandcount as 1YearOrderPayBrandCount,
1yearorderticketbrandcount as 1YearOrderTicketBrandCount, T1.1yearorderticketbrandcount as 1YearOrderTicketBrandCount,
1yearorderproductotherpriceamt as 1YearOrderProductOtherPriceAmt, T1.1yearorderproductotherpriceamt as 1YearOrderProductOtherPriceAmt,
1yearorderproductcountavg as 1YearOrderProductCountAvg, T1.1yearorderproductcountavg as 1YearOrderProductCountAvg,
favoritein7days as favoriteIn7Days, T1.favoritein7days as favoriteIn7Days,
30daysdetailpvn as 30DaysDetailPvn, T1.30daysdetailpvn as 30DaysDetailPvn,
1yearaddfavpriceamt as 1YearAddFavPriceAmt, T1.1yearaddfavpriceamt as 1YearAddFavPriceAmt,
1yearorderproductwomanpriceamt as 1YearOrderProductWomanPriceAmt, T1.1yearorderproductwomanpriceamt as 1YearOrderProductWomanPriceAmt,
1yearorderproductwomancount as 1YearOrderProductWomanCount, T1.1yearorderproductwomancount as 1YearOrderProductWomanCount,
30dayschannelpvxscj as 30DaysChannelPvXscj, T1.30dayschannelpvxscj as 30DaysChannelPvXscj,
1yearorderticketpriceamt as 1YearOrderTicketPriceAmt, T1.1yearorderticketpriceamt as 1YearOrderTicketPriceAmt,
userinterestbrand_0 as userInterestBrand_0, T1.userinterestbrand_0 as userInterestBrand_0,
1yearaddfavbrandcount as 1YearAddFavBrandCount, T1.1yearaddfavbrandcount as 1YearAddFavBrandCount,
userinterestbrand_1 as userInterestBrand_1, T1.userinterestbrand_1 as userInterestBrand_1,
userinterestbrand_2 as userInterestBrand_2, T1.userinterestbrand_2 as userInterestBrand_2,
1yearorderpaycount as 1YearOrderPayCount, T1.1yearorderpaycount as 1YearOrderPayCount,
30dayschannelpvaolai as 30DaysChannelPvAoLai, T1.30dayschannelpvaolai as 30DaysChannelPvAoLai,
userinterestbrand_3 as userInterestBrand_3, T1.userinterestbrand_3 as userInterestBrand_3,
30daysdetailpvs as 30DaysDetailPvs, T1.30daysdetailpvs as 30DaysDetailPvs,
purchaseditems as purchasedItems, T1.purchaseditems as purchasedItems,
30dayssearchcount as 30DaysSearchCount, T1.30dayssearchcount as 30DaysSearchCount,
1yearcartbrandcount as 1YearCartBrandCount, T1.1yearcartbrandcount as 1YearCartBrandCount,
1yearorderticketcount as 1YearOrderTicketCount, T1.1yearorderticketcount as 1YearOrderTicketCount,
1yearorderpaycategorycount as 1YearOrderPayCategoryCount T1.1yearorderpaycategorycount as 1YearOrderPayCategoryCount,
from $table_name";
nvl(T2.click_category2_id1,0) as click_category2_id1,
nvl(T2.click_category2_id2,0) as click_category2_id2,
nvl(T2.click_category2_id3,0) as click_category2_id3,
nvl(T2.click_category2_id4,0) as click_category2_id4,
nvl(T2.click_category2_id5,0) as click_category2_id5,
nvl(T2.click_category2_id6,0) as click_category2_id6,
nvl(T2.click_brand_id1,0) as click_brand_id1,
nvl(T2.click_brand_id2,0) as click_brand_id2,
nvl(T2.click_brand_id3,0) as click_brand_id3,
nvl(T2.click_brand_id4,0) as click_brand_id4,
nvl(T2.click_brand_id5,0) as click_brand_id5,
nvl(T2.click_brand_id6,0) as click_brand_id6,
nvl(T2.add_category2_id1,0) as add_category2_id1,
nvl(T2.add_category2_id2,0) as add_category2_id2,
nvl(T2.add_category2_id3,0) as add_category2_id3,
nvl(T2.add_category2_id4,0) as add_category2_id4,
nvl(T2.add_category2_id5,0) as add_category2_id5,
nvl(T2.add_category2_id6,0) as add_category2_id6,
nvl(T2.add_brand_id1,0) as add_brand_id1,
nvl(T2.add_brand_id2,0) as add_brand_id2,
nvl(T2.add_brand_id3,0) as add_brand_id3,
nvl(T2.add_brand_id4,0) as add_brand_id4,
nvl(T2.add_brand_id5,0) as add_brand_id5,
nvl(T2.add_brand_id6,0) as add_brand_id6,
nvl(T2.pay_category2_id1,0) as pay_category2_id1,
nvl(T2.pay_category2_id2,0) as pay_category2_id2,
nvl(T2.pay_category2_id3,0) as pay_category2_id3,
nvl(T2.pay_category2_id4,0) as pay_category2_id4,
nvl(T2.pay_category2_id5,0) as pay_category2_id5,
nvl(T2.pay_category2_id6,0) as pay_category2_id6,
nvl(T2.pay_brand_id1,0) as pay_brand_id1,
nvl(T2.pay_brand_id2,0) as pay_brand_id2,
nvl(T2.pay_brand_id3,0) as pay_brand_id3,
nvl(T2.pay_brand_id4,0) as pay_brand_id4,
nvl(T2.pay_brand_id5,0) as pay_brand_id5,
nvl(T2.pay_brand_id6,0) as pay_brand_id6,
nvl(T3.grow_level,0) as grow_level
from $table_name T1
left join secoo_search.user_brand_category2_favorite T2 on T1.device_id = T2.device_id and T2.p_day = '${today_param}'
left join secoo_dim.dim_user_basic_p_day_full T3 on T1.device_id = T3.device_id and T3.p_day = '${yesterday}'";
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment