Commit 4c3e4514 by zhaoyanchao

更新脚本

parent 7d0d6a67
create external table if not exists secoo_search.search_data_cross_feature_query_product
(
keyword string comment '搜索词',
product_id string comment '商品id',
match_brand tinyint comment '商品品牌是否和Query中品牌匹配',
match_first_cate tinyint comment '商品的一级类目是否和query的品类匹配',
match_second_cate tinyint comment '商品的二级类目是否和query的品类匹配',
match_third_cate tinyint comment '商品的三级类目是否和query的品类匹配',
match_gender tinyint comment '商品的性别是否和Query中的性别一致'
) comment 'query原始特征'
partitioned by (
p_day date comment '分区日期'
)
row format delimited fields terminated by '\t'
stored as textfile;
insert overwrite table secoo_search.search_data_cross_feature_query_product partition(p_day = ${today})
select S.key_word,
S.product_id,
case
when cast(Q.query_brand_1 AS string) = P.productBrand OR cast(Q.query_brand_2 AS string) = P.productBrand OR cast(Q.query_brand_3 AS string) = P.productBrand then 1
else 0
end as match_brand,
case
when W.category_org_code is null then 0
when size(split(W.category_org_code,'_')) > 1
and (
split(W.category_org_code,'_')[1] = cast( Q.query_cat_1 AS string)
or split(W.category_org_code,'_')[1] = cast( Q.query_cat_2 AS string)
or split(W.category_org_code,'_')[1] = cast( Q.query_cat_3 AS string)
or split(W.category_org_code,'_')[1] = cast( Q.query_cat_4 AS string)
or split(W.category_org_code,'_')[1] = cast( Q.query_cat_5 AS string) ) then 1
else 0
end as match_first_cate,
case
when W.category_org_code is null then 0
when size(split(W.category_org_code,'_')) > 2
and (
split(W.category_org_code,'_')[2] = cast( Q.query_cat_1 AS string)
or split(W.category_org_code,'_')[2] = cast( Q.query_cat_2 AS string)
or split(W.category_org_code,'_')[2] = cast( Q.query_cat_3 AS string)
or split(W.category_org_code,'_')[2] = cast( Q.query_cat_4 AS string)
or split(W.category_org_code,'_')[2] = cast( Q.query_cat_5 AS string) ) then 1
else 0
end as match_second_cate,
case
when W.category_org_code is null then 0
when size(split(W.category_org_code,'_')) > 3
and (
split(W.category_org_code,'_')[3] = cast( Q.query_cat_1 AS string)
or split(W.category_org_code,'_')[3] = cast( Q.query_cat_2 AS string)
or split(W.category_org_code,'_')[3] = cast( Q.query_cat_3 AS string)
or split(W.category_org_code,'_')[3] = cast( Q.query_cat_4 AS string)
or split(W.category_org_code,'_')[3] = cast( Q.query_cat_5 AS string) ) then 1
else 0
end as match_third_cate,
case
when P.prodGender = cast(Q.query_gender AS string) then 1
else 0
end as match_gender
from
(
select key_word,product_id
from secoo_search.search_model_action_data_sample
group by key_word,product_id
) S
left join secoo_search.search_data_query_original_feature Q
on S.key_word = Q.keyword
left join secoo_search.search_data_product_feature P
on S.product_id = P.product_id and P.p_day = ${partition_day}
left join secoo_fact.fact_search_product_wide_p_day W
on P.product_id = cast(W.product_id AS string) and W.p_day = ${yesterday} ;
work_dir="/data/zhaoyanchao/java/shell/cross_feature/query_product/"
partition=`hive -e "show partitions secoo_search.search_data_product_feature" |tail -n 1 | head -n 1`
partition_day=${partition:6:10}
yesterday=`date -d "-1 day" "+%Y-%m-%d"`
today=`date "+%Y-%m-%d"`
echo "yesterday is $yesterday and partition_day is $partition_day"
hive --hivevar yesterday="'$yesterday'" --hivevar partition_day="'$partition_day'" --hivevar today="'$today'" -f "$work_dir"insert_cross_feature_query_product.sql
create external table if not exists secoo_search.search_data_cross_feature_user_product
(
device_id string comment '设备id',
product_id string comment '商品id',
prodanduserpurchspecial string comment '购买过的商品是特例品 & 商品本身是特例品',
prodinusercat1 string comment '商品的一级品类是否在用户的一级品类偏好中',
prodinusercat2 string comment '商品的二级品类是否在用户的二级品类偏好中',
prodanduserpurchnew string comment '商品的价格是否在用户购买的价格段中',
prodanduserpurchpromotion string comment '购买过的商品是促销品 & 商品本身是促销品',
prodanduserpurchbig100 string comment '购买过百大品牌 & 商品是百大品牌',
prodpriceinuserbrowsing string comment '商品的价格是否在用户浏览的价格段中',
prodpriceinuserpurch string comment '商品的价格是否在用户购买的价格段中'
) comment '用户商品交叉特征'
partitioned by (
p_day date comment '分区日期'
)
row format delimited fields terminated by '\t'
stored as textfile;
work_dir="/data/zhaoyanchao/java/shell/cross_feature/user_product/"
# 检查推荐最新表名
delta_day=0
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userProductCrossFeatures_"$date_str
hive -e "desc $table_name"
result=$?
while [[ $result -ne 0 ]];
do
delta_day=$(($delta_day+1))
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userProductCrossFeatures_"$date_str
hive -e "desc $table_name"
result=$?
echo $table_name
done
# 如果无表,建表
hive -f "$work_dir"create_cross_feature_user_product.sql
today=`date "+%Y-%m-%d"`
hive -e "insert overwrite table secoo_search.search_data_cross_feature_user_product partition(p_day=date'$today')
select
device_id,
product_id,
prodanduserpurchspecial,
prodinusercat1,
prodinusercat2,
prodanduserpurchnew,
prodanduserpurchpromotion,
prodanduserpurchbig100,
prodpriceinuserbrowsing,
prodpriceinuserpurch
from $table_name;"
-- 商品特征扩展表
create external table if not exists secoo_search.search_data_product_feature_ext
(
`product_id` bigint COMMENT '商品ID',
`area_type` tinyint comment '货源地,(0大陆 1香港 2美国 3日本 4意大利)',
`sale_qty_180` bigint comment '前180天至今销售数量',
`sale_qty_90` bigint comment '前90天至今销售数量',
`sale_qty_30` bigint comment '前30天至今销售数量',
`sale_qty_15` bigint comment '前15天至今销售数量',
`v2p_rate_30` double COMMENT '30天内商品转化率',
`v2p_rate_3` double COMMENT '3天内商品转化率',
`suit_gender` int COMMENT '商品适用性别',
`product_category_id_1` string COMMENT '商品一级品类ID',
`gmv_one_week` double COMMENT '商品一周内GMV',
`gmv_one_month` double COMMENT '商品一月内GMV',
`order_count_one_week` bigint COMMENT '商品在一周内的销售数量',
`order_count_one_month` bigint COMMENT '商品在一个月内的销售数量',
`secoo_price` double COMMENT '寺库价格',
`product_price_level` string COMMENT '商品在二级品类下的价格档位 1:低,2:中,3:高',
`android_product_3day_ctr` double COMMENT '商品在android上3天的点击率',
`android_product_7day_ctr` double COMMENT '商品在android上7天的点击率',
`android_product_30day_ctr` double COMMENT '商品在android上30天的点击率',
`ios_product_3day_ctr` double COMMENT '商品在ios上3天的点击率',
`ios_product_7day_ctr` double COMMENT '商品在ios上7天的点击率',
`ios_product_30day_ctr` double COMMENT '商品在ios上30天的点击率',
`all_system_product_3day_ctr` double COMMENT '商品在平台上3天的点击率',
`all_system_product_7day_ctr` double COMMENT '商品在平台上7天的点击率',
`all_system_product_30day_ctr` double COMMENT '商品在平台上30天的点击率',
`android_product_3day_collection` bigint COMMENT '商品在android上3天的收藏数',
`android_product_7day_collection` bigint COMMENT '商品在android上7天的收藏数',
`android_product_30day_collection` bigint COMMENT '商品在android上30天的收藏数',
`ios_product_3day_collection` bigint COMMENT '商品在ios上3天的收藏数',
`ios_product_7day_collection` bigint COMMENT '商品在ios上7天的收藏数',
`ios_product_30day_collection` bigint COMMENT '商品在ios上30天的收藏数',
`all_system_product_3day_collection` bigint COMMENT '商品在平台上3天的收藏数',
`all_system_product_7day_collection` bigint COMMENT '商品在平台上7天的收藏数',
`all_system_product_30day_collection` bigint COMMENT '商品在平台上30天的收藏数',
`android_product_3day_add_cart` bigint COMMENT '商品在android上3天的加购数',
`android_product_7day_add_cart` bigint COMMENT '商品在android上7天的加购数',
`android_product_30day_add_cart` bigint COMMENT '商品在android上30天的加购数',
`ios_product_3day_add_cart` bigint COMMENT '商品在ios上3天的加购数',
`ios_product_7day_add_cart` bigint COMMENT '商品在ios上7天的加购数',
`ios_product_30day_add_cart` bigint COMMENT '商品在ios上30天的加购数',
`all_system_product_3day_add_cart` bigint COMMENT '商品在平台上3天的加购数',
`all_system_product_7day_add_cart` bigint COMMENT '商品在平台上7天的加购数',
`all_system_product_30day_add_cart` bigint COMMENT '商品在平台上30天的加购数',
`product_brand_id` string COMMENT '商品品牌ID',
`pv_order_code` int COMMENT '根据pv和订单分析得出的编码(1:热销款2:转化率低热品...)',
`product_category_id_3` string COMMENT '商品三级品类ID',
`product_category_id_2` string COMMENT '商品二级品类ID',
`product_group_id` bigint COMMENT '商品所属组ID',
`product_group_layering_code` int COMMENT '商品所属组的分层码',
`price_grade` double COMMENT '商品一级品类下的购买力等级',
`is_new` int COMMENT '是不是新品1:是,0:不是',
`pv` bigint COMMENT '商品近30天pv',
`discount_see_number` int COMMENT '今日折扣频道曝光次数',
`discount_click_number` int COMMENT '今日折扣频道点击次数',
`discount_add_cart_number` int COMMENT '今日折扣频道加购次数',
`discount_pay_number` int COMMENT '今日折扣频道支付次数',
`discount_click_rate` double COMMENT '今日折扣频道曝光点击率',
`discount_add_cart_rate` double COMMENT '今日折扣频道点击加购率',
`discount_pay_rate` double COMMENT '今日折扣频道加购支付率',
`second_category_price_grade` double COMMENT '商品二级品类下的购买力等级',
`third_category_price_grade` double COMMENT '商品三级品类下的购买力等级',
`extreme_hot_score` double COMMENT '爆款分,0.0的为非爆款',
`brand_grade` bigint COMMENT '品牌等级,1、2、3、4、5',
`productlevel` string COMMENT '商品等级<N、S、A、AB、B>',
`ispromotionproduct` string COMMENT '是否是促销商品',
`productpricelevel` string COMMENT '商品价格',
`isspecialproduct` string COMMENT '是否是特例品(便宜处理的)',
`isnewstyle` string COMMENT '是否是新款',
`prodgender` string COMMENT '性别',
`isnewcustomerproduct` string COMMENT '是否新客商品',
`isbrand100` string COMMENT '是否是百大品牌',
`isnewproduct` string COMMENT '是否是新品',
`ismemberproduct` string COMMENT '是否会员商品',
`isselfoperating` string COMMENT '是否自营品',
`productpopularity` string COMMENT '浏览、收藏、加购综合算的热度值',
`productbrand` string COMMENT '商品的品牌',
`productcategory` string COMMENT '商品的品类',
`category_id_1` string COMMENT '商品的一级品类',
`category_id_2` string COMMENT '商品的二级品类',
`productfeatures` string COMMENT '商品特征',
`productlabels` string COMMENT '商品兴趣标签',
`category_org_code_3` string COMMENT '三级分类组织结构代码',
`group_best_sku` bigint COMMENT '分组内最优sku',
`group_id` bigint COMMENT '清一色group_id',
`group_main_id` bigint COMMENT '清一色group_main_id',
`product_main_id` bigint COMMENT '商品spu id',
`embedding_code` string COMMENT '商品的embedding值',
`style` string COMMENT '风格',
`occasion` string COMMENT '场景',
`color` string COMMENT '颜色',
`last_7_days_click_pv` int COMMENT '热度(最近7天点击pv)',
`ctr` double COMMENT ' ctr: 网图近7天点击pv/曝光pv'
) comment '搜索商品特征表'
partitioned by (p_day date comment '分区日期') ;
-- 写入商品特征表数据
insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'2021-05-06')
select
R.product_id,
R.isbrand100 as is_brand_top_100,
R.productpopularity as product_popularity,
R.ismemberproduct as is_member_product ,
R.isspecialproduct as is_special_product,
R.isnewcustomerproduct as is_new_customer_product,
R.isnewproduct as is_new_product,
R.isnewstyle as is_new_style,
R.isselfoperating as is_self_operating,
R.productlevel as product_level,
R.ispromotionproduct as is_promotion_product,
R.prodgender as gender,
R.productbrand as product_brand,
R.productcategory as product_category,
R.productpricelevel as product_price_level,
R.3daysandroidctr as ctr_android_3days,
R.7daysandroidctr as ctr_android_7days,
R.30daysandroidctr as ctr_android_30days,
R.3daysiosctr as ctr_ios_3days,
R.7daysiosctr as ctr_ios_7days,
R.30daysiosctr as ctr_ios_30days,
R.3daysctr as ctr_3days,
R.7daysctr as ctr_7days,
R.30daysctr as ctr_30days,
R.3daysandroidfavorite as favorite_android_3days,
R.7daysandroidfavorite as favorite_android_7days,
R.30daysandroidfavorite as favorite_android_30days,
R.3daysiosfavorite as favorite_ios_3days,
R.7daysiosfavorite as favorite_ios_7days,
R.30daysiosfavorite as favorite_ios_30days,
R.3daysfavorite as favorite_3days,
R.7daysfavorite as favorite_7days,
R.30daysfavorite as favorite_30days,
R.3daysandroidaddcart as add_cart_android_3days,
R.7daysandroidaddcart as add_cart_android_7days,
R.30daysandroidaddcart as add_cart_android_30days,
R.3daysiosaddcart as add_cart_ios_3days,
R.7daysiosaddcart as add_cart_ios_7days,
R.30daysiosaddcart as add_cart_ios_30days,
R.3daysaddcart as add_cart_3days,
R.7daysaddcart as add_cart_7days,
R.30daysaddcart as add_cart_30days,
W.area_type,
W.sale_qty_180,
W.sale_qty_90,
W.sale_qty_30,
W.sale_qty_15
from secoo_rcmd_features.productFeatures_2021_04_23 R
left join secoo_fact.fact_search_product_wide_p_day W
on R.product_id = cast(W.product_id AS string) and W.p_day = '2021-05-06';
# 检查推荐最新表名
work_dir="/data/zhaoyanchao/java/shell/product_feature/"
delta_day=0
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.productFeatures_"$date_str
hive -e "desc $table_name"
result=$?
while [[ $result -ne 0 ]];
do
delta_day=$(($delta_day+1))
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.productFeatures_"$date_str
hive -e "desc $table_name"
result=$?
echo $table_name
done
# 如果无表,建表 # 如果无表,建表
hive -f create_search_product_feature_table.sql hive -f $"work_dir"create_search_product_feature_table.sql
real_yesterday=`date "+%Y-%m-%d"`
hive -e "insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'$real_yesterday')
select
R.product_id,
R.isbrand100 as isBrand100,
R.productpopularity as productPopularity,
R.ismemberproduct as isMemberProduct ,
R.isspecialproduct as isSpecialProduct,
R.isnewcustomerproduct as isNewCustomerProduct,
R.isnewproduct as isNewProduct,
R.isnewstyle as isNewStyle,
R.isselfoperating as isSelfOperating,
R.productlevel as productLevel,
R.ispromotionproduct as isPromotionProduct,
R.prodgender as prodGender,
R.productbrand as productBrand,
R.productcategory as productCategory,
R.productpricelevel as productPriceLevel,
R.3daysandroidctr as 3DaysAndroidCTR,
R.7daysandroidctr as 7DaysAndroidCTR,
R.30daysandroidctr as 30DaysAndroidCTR,
R.3daysiosctr as 3DaysIosCTR,
R.7daysiosctr as 7DaysIosCTR,
R.30daysiosctr as 30DaysIosCTR,
R.3daysctr as 3DaysCTR,
R.7daysctr as 7DaysCTR,
R.30daysctr as 30DaysCTR,
R.3daysandroidfavorite as 3DaysAndroidFavorite,
R.7daysandroidfavorite as 7DaysAndroidFavorite,
R.30daysandroidfavorite as 30DaysAndroidFavorite,
R.3daysiosfavorite as 3DaysIosFavorite,
R.7daysiosfavorite as 7DaysIosFavorite,
R.30daysiosfavorite as 30DaysIosFavorite,
R.3daysfavorite as 3DaysFavorite,
R.7daysfavorite as 7DaysFavorite,
R.30daysfavorite as 30DaysFavorite,
R.3daysandroidaddcart as 3DaysAndroidAddCart,
R.7daysandroidaddcart as 7DaysAndroidAddCart,
R.30daysandroidaddcart as 30DaysAndroidAddCart,
R.3daysiosaddcart as 3DaysIosAddCart,
R.7daysiosaddcart as 7DaysIosAddCart,
R.30daysiosaddcart as 30DaysIosAddCart,
R.3daysaddcart as 3DaysAddCart,
R.7daysaddcart as 7DaysAddCart,
R.30daysaddcart as 30DaysAddCart,
W.area_type,
W.sale_qty_180,
W.sale_qty_90,
W.sale_qty_30,
W.sale_qty_15
from $table_name R
left join secoo_fact.fact_search_product_wide_p_day W
on R.product_id = cast(W.product_id AS string) and W.p_day = '$yesterday';"
hive -f insert_search_product_feature.sql
work_dir="/data/zhaoyanchao/java/shell/product_feature/"
delta_day=1
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
# 扩展表语句
hive -f "$work_dir"create_product_feature_ext_table.sql
hive --hivevar yesterday="'$yesterday'" -f "$work_dir"insert_product_feature_ext.sql
create external table if not exists secoo_search.search_data_query_original_feature
(
keyword string comment 'query词',
query_cat_1 bigint comment '识别类目1',
query_cat_2 bigint comment '识别类目2',
query_cat_3 bigint comment '识别类目3',
query_cat_4 bigint comment '识别类目4',
query_cat_5 bigint comment '识别类目5',
query_brand_1 bigint comment '识别品牌1',
query_brand_2 bigint comment '识别品牌2',
query_brand_3 bigint comment '识别品牌3',
query_gender tinyint comment '识别性别,1是男,2是女, 0是没有',
query_contains_other_word tinyint comment '是否含其他词'
) comment 'query原始特征'
row format delimited fields terminated by '\t'
stored as textfile;
\ No newline at end of file
-- 商品特征表
create external table if not exists secoo_search.search_data_user_feature
(
device_id string comment '设备id',
1YearOrderTicketProductCount string comment '1年下单量',
addCartIn7Days string comment '7天加购量',
1YearCartProductCount string comment '1年加购量',
1YearOrderProductManCount string comment '1年男性订单',
1YearOrderProductManPriceAmt string comment '',
30DaysChannelPvJrzk string comment '今日折扣',
1YearOrderTicketCategoryCount string comment '',
30DaysDetailPv string comment '',
userInterestCategory1_0 string comment '',
userInterestCategory2_0 string comment '',
userInterestCategory1_1 string comment '',
userInterestCategory2_1 string comment '',
userInterestCategory3_0 string comment '',
30DaysSearchCategoryCount string comment '',
1YearOrderPayPriceAmt string comment '',
userInterestCategory1_2 string comment '',
30DaysSearchBrandCount string comment '',
userInterestCategory1_3 string comment '',
userInterestCategory2_2 string comment '',
userInterestCategory3_1 string comment '',
1YearCartCategoryCount string comment '',
userInterestCategory2_3 string comment '',
userInterestCategory1_4 string comment '',
userInterestCategory3_2 string comment '',
userInterestCategory1_5 string comment '',
userInterestCategory3_3 string comment '',
1YearOrderProductOtherCount string comment '',
30DaysDetailPva string comment '',
userInterestCategory2_4 string comment '',
1YearOrderUsePointCount string comment '',
30DaysChannelPvPaiHangBang string comment '',
30daysChannelPvXinPinBang string comment '',
1YearAddFavCategoryCount string comment '',
userInterestCategory3_4 string comment '',
userInterestCategory2_5 string comment '',
activeInWeekends string comment '',
1YearOrderCount string comment '',
30DaysDetailPvb string comment '',
1YearOrderPayProductCount string comment '',
1YearOrderUsePointPriceAmt string comment '',
1YearAddFavProductCount string comment '',
30DaysDetailPvab string comment '',
userInterestCategory3_5 string comment '',
deviceType string comment '',
activeIn7Days string comment '',
gender string comment '',
purchasedBrand100 string comment '',
30DaysChannelPvRenQiBang string comment '',
1YearCartPriceAmt string comment '',
regularCustomer string comment '',
1YearOrderPayBrandCount string comment '',
1YearOrderTicketBrandCount string comment '',
1YearOrderProductOtherPriceAmt string comment '',
1YearOrderProductCountAvg string comment '',
favoriteIn7Days string comment '',
30DaysDetailPvn string comment '',
1YearAddFavPriceAmt string comment '',
1YearOrderProductWomanPriceAmt string comment '',
1YearOrderProductWomanCount string comment '',
30DaysChannelPvXscj string comment '',
1YearOrderTicketPriceAmt string comment '',
userInterestBrand_0 string comment '',
1YearAddFavBrandCount string comment '',
userInterestBrand_1 string comment '',
userInterestBrand_2 string comment '',
1YearOrderPayCount string comment '',
30DaysChannelPvAoLai string comment '',
userInterestBrand_3 string comment '',
30DaysDetailPvs string comment '',
purchasedItems string comment '',
30DaysSearchCount string comment '',
1YearCartBrandCount string comment '',
1YearOrderTicketCount string comment '',
1YearOrderPayCategoryCount string comment ''
) comment '用户特征'
partitioned by (
p_day date comment '分区日期'
)
stored as parquet;
-- 用户特征扩展表
create external table if not exists secoo_search.search_data_user_feature_ext
(
`device_id` string COMMENT '设备ID',
`category_favor` string COMMENT '喜好二级品类及评分',
`category_one_favor` string COMMENT '喜好一级品类及评分',
`favor_gender` int COMMENT '用户购物性别1:男 2:女 0:男女',
`pv_product_level_cate_2` string COMMENT '浏览商品等级',
`order_product_level_cate_2` string COMMENT '购买商品等级',
`pv_product_middle_price` string COMMENT '浏览商品价格中位值',
`pv_product_max_price` string COMMENT '浏览商品价格最大值',
`order_product_middle_price` string COMMENT '购买商品价格中位值',
`order_product_max_price` string COMMENT '购买商品价格最大值',
`brand_favor` string COMMENT '品牌偏好',
`category_three_favor` string COMMENT '三级品类偏好及偏好分',
`order_grades` string COMMENT '用户在一级品类下的购买力',
`currency_grades` string COMMENT '用户在一级品类下的通用购买力',
`global_price_grade` double COMMENT '用户全局购买力',
`crowd_label` double COMMENT 'crowd_label',
`second_category_order_grades` string COMMENT '用户在二级品类下的购买力',
`second_category_currency_grades` string COMMENT '用户在二级品类下的通用购买力',
`second_category_global_price_grade` string COMMENT '用户二级品类全局购买力',
`third_category_order_grades` string COMMENT '用户在三级品类下的购买力',
`third_category_currency_grades` string COMMENT '用户在三级品类下的通用购买力',
`third_category_global_price_grade` double COMMENT '用户三级品类全局购买力',
`search_brand_num` string COMMENT '搜索品牌及次数',
`search_category_firstnum` string COMMENT '搜索一级品类及次数',
`search_category_secondnum` string COMMENT '搜索二级品类及次数',
`search_category_thirdnum` string COMMENT '搜索三级品类及次数',
`30daysdetailpv` string COMMENT '最近30天商品详情页浏览次数',
`30daysdetailpvn` string COMMENT '最近30天商品详情页浏览次数_N级',
`30daysdetailpvs` string COMMENT '最近30天商品详情页浏览次数_S级',
`30daysdetailpva` string COMMENT '最近30天商品详情页浏览次数_A级',
`30daysdetailpvab` string COMMENT '最近30天商品详情页浏览次数_AB级',
`30daysdetailpvb` string COMMENT '最近30天商品详情页浏览次数_B级',
`30dayschannelpvpaihangbang` string COMMENT '最近30天浏览频道次数_排行榜',
`30dayschannelpvrenqibang` string COMMENT '最近30天浏览频道次数_人气榜',
`30dayschannelpvxinpinbang` string COMMENT '最近30天浏览频道次数_新品榜',
`30dayschannelpvjrzk` string COMMENT '最近30天浏览频道次数_今日折扣',
`30dayschannelpvaolai` string COMMENT '最近30天浏览频道次数_奥莱特卖',
`30dayschannelpvxscj` string COMMENT '最近30天浏览频道次数_限时抽奖',
`30dayssearchcount` string COMMENT '最近30天搜索次数',
`30dayssearchbrandcount` string COMMENT '最近30天搜索品牌数',
`30dayssearchcategorycount` string COMMENT '最近30天搜索品类数',
`1yearcartproductcount` string COMMENT '当前购物车商品数(最近一年)',
`1yearcartbrandcount` string COMMENT '当前购物车品牌数(最近一年)',
`1yearcartcategorycount` string COMMENT '当前购物车品类数(最近一年)',
`1yearcartpriceamt` string COMMENT '当前购物车总金额(最近一年)',
`1yearaddfavproductcount` string COMMENT '当前收藏商品数(最近一年)',
`1yearaddfavbrandcount` string COMMENT '当前收藏品牌数(最近一年)',
`1yearaddfavcategorycount` string COMMENT '当前收藏品类数(最近一年)',
`1yearaddfavpriceamt` string COMMENT '当前收藏总金额(最近一年)',
`1yearordercount` string COMMENT '最近一年订单总次数',
`1yearorderproductcountavg` string COMMENT '最近一年订单包含平均商品数',
`1yearorderpaycount` string COMMENT '最近一年付费订单数',
`1yearorderpaypriceamt` string COMMENT '最近一年付费总金额',
`1yearorderpayproductcount` string COMMENT '最近一年付费商品数',
`1yearorderpaybrandcount` string COMMENT '最近一年付费品牌数',
`1yearorderpaycategorycount` string COMMENT '最近一年付费品类数',
`1yearorderticketcount` string COMMENT '最近一年使用优惠卷订单数',
`1yearorderticketproductcount` string COMMENT '最近一年使用优惠卷商品数',
`1yearorderticketpriceamt` string COMMENT '最近一年使用优惠卷总金额',
`1yearorderticketbrandcount` string COMMENT '最近一年使用优惠卷品牌数',
`1yearorderticketcategorycount` string COMMENT '最近一年使用优惠卷品类数',
`1yearorderusepointcount` string COMMENT '最近一年使用总积分',
`1yearorderusepointpriceamt` string COMMENT '最近一年抵扣积分金额',
`1yearorderproductmancount` string COMMENT '最近一年购买男装次数',
`1yearorderproductmanpriceamt` string COMMENT '最近一年购买男装总金额',
`1yearorderproductwomancount` string COMMENT '最近一年购买女装次数',
`1yearorderproductwomanpriceamt` string COMMENT '最近一年购买女装总金额',
`1yearorderproductothercount` string COMMENT '最近一年购买其他次数',
`1yearorderproductotherpriceamt` string COMMENT '最近一年购买其他总金额',
`userinterestcategory1_0` string COMMENT '用户一级品类偏好',
`userinterestcategory1_1` string COMMENT '用户一级品类偏好',
`userinterestcategory1_2` string COMMENT '用户一级品类偏好',
`userinterestcategory1_3` string COMMENT '用户一级品类偏好',
`userinterestcategory1_4` string COMMENT '用户一级品类偏好',
`userinterestcategory1_5` string COMMENT '用户一级品类偏好',
`userinterestcategory2_0` string COMMENT '用户二级品类偏好',
`userinterestcategory2_1` string COMMENT '用户二级品类偏好',
`userinterestcategory2_2` string COMMENT '用户二级品类偏好',
`userinterestcategory2_3` string COMMENT '用户二级品类偏好',
`userinterestcategory2_4` string COMMENT '用户二级品类偏好',
`userinterestcategory2_5` string COMMENT '用户二级品类偏好',
`userinterestcategory3_0` string COMMENT '用户三级品类偏好',
`userinterestcategory3_1` string COMMENT '用户三级品类偏好',
`userinterestcategory3_2` string COMMENT '用户三级品类偏好',
`userinterestcategory3_3` string COMMENT '用户三级品类偏好',
`userinterestcategory3_4` string COMMENT '用户三级品类偏好',
`userinterestcategory3_5` string COMMENT '用户三级品类偏好',
`userinterestbrand_0` string COMMENT '用户品牌偏好',
`userinterestbrand_1` string COMMENT '用户品牌偏好',
`userinterestbrand_2` string COMMENT '用户品牌偏好',
`userinterestbrand_3` string COMMENT '用户品牌偏好',
`addcartin7days` string COMMENT '最近7天是否加购',
`devicetype` string COMMENT '使用设别(IOS/Android)',
`gender` string COMMENT '用户性别',
`purchasedbrand100` string COMMENT '是否购买过百大品牌',
`purchaseditems` string COMMENT '是否购过商品',
`activein7days` string COMMENT '最近7天是否活跃',
`activeinweekends` string COMMENT '节假日是否活跃',
`prodanduserpurchnew` string COMMENT '购买过的商品是新品',
`prodanduserpurchpromotion` string COMMENT '购买过的商品是促销品',
`prodanduserpurchspecial` string COMMENT '购买过的商品是特例品',
`ispayeduser` int COMMENT '是否已下单用户,1是 0否',
`userfeatures` string COMMENT '用户特征',
`userlabels` string COMMENT '用户兴趣标签',
`is_new_user` int COMMENT '是否新用户',
`grow_level` bigint COMMENT '用户成长等级',
`activate_diff_days` int COMMENT '第N天使用APP, 从激活日期至今',
`head_model_level` string COMMENT '设备型号等级',
`category_price_prefer` string COMMENT '品类价格偏好',
`not_repurchase_spu_list` string COMMENT '不会复购的spu列表',
`payed_spu_list` string COMMENT '最近购买过的spu列表',
`payed_org_code_list` string COMMENT '最近购买过的商品 org_code列表',
`is_repurchase_user` int COMMENT '是否是会重复购买的用户: 1是 0否',
`user_add_cart_prod` string COMMENT '当前购物车商品列表',
`cart_spu_info` string COMMENT '最近加购的spu列表,带附属信息',
`cart_sku_info` string COMMENT '最近加购的sku列表,带附属信息',
`order_spu_info` string COMMENT '最近购买过的spu列表,带附属信息',
`user_favorite_prod` string COMMENT '当前收藏spu列表',
`favorite_spu_info` string COMMENT '最近收藏过的spu列表,带附属信息',
`payed_group_list` string COMMENT '用户最近7天购买过的商品列表,清一色group_id',
`payed_group_main_list` string COMMENT '用户最近7天购买过的商品列表,清一色group_main_id',
`not_repurchase_group_main_list` string COMMENT '不会复购的group_main_id列表',
`style_preference` string COMMENT '风格偏好',
`occasion_preference` string COMMENT '场景偏好'
) comment '搜索用户特征表'
partitioned by (p_day date comment '分区日期') ;
-- 写入用户特征扩展表数据
INSERT overwrite TABLE secoo_search.search_data_user_feature_ext partition(p_day=${yesterday})
SELECT
U.device_id
,U.category_favor
,U.category_one_favor
,U.favor_gender
,U.pv_product_level_cate_2
,U.order_product_level_cate_2
,U.pv_product_middle_price
,U.pv_product_max_price
,U.order_product_middle_price
,U.order_product_max_price
,U.brand_favor
,U.category_three_favor
,U.order_grades
,U.currency_grades
,U.global_price_grade
,U.crowd_label
,U.second_category_order_grades
,U.second_category_currency_grades
,U.second_category_global_price_grade
,U.third_category_order_grades
,U.third_category_currency_grades
,U.third_category_global_price_grade
,U.search_brand_num
,U.search_category_firstnum
,U.search_category_secondnum
,U.search_category_thirdnum
,U.30DaysDetailPV
,U.30DaysDetailPVN
,U.30DaysDetailPVS
,U.30DaysDetailPVA
,U.30DaysDetailPVAB
,U.30DaysDetailPVB
,U.30DaysChannelPVPaiHangBang
,U.30DaysChannelPVRenQiBang
,U.30DaysChannelPVXinPinBang
,U.30DaysChannelPVJRZK
,U.30DaysChannelPVAoLai
,U.30DaysChannelPVXSCJ
,U.30DaysSearchCount
,U.30DaysSearchBrandCount
,U.30DaysSearchCategoryCount
,U.1YearCartProductCount
,U.1YearCartBrandCount
,U.1YearCartCategoryCount
,U.1YearCartPriceAmt
,U.1YearAddFavProductCount
,U.1YearAddFavBrandCount
,U.1YearAddFavCategoryCount
,U.1YearAddFavPriceAmt
,U.1YearOrderCount
,U.1YearOrderProductCountAvg
,U.1YearOrderPayCount
,U.1YearOrderPayPriceAmt
,U.1YearOrderPayProductCount
,U.1YearOrderPayBrandCount
,U.1YearOrderPayCategoryCount
,U.1YearOrderTicketCount
,U.1YearOrderTicketProductCount
,U.1YearOrderTicketPriceAmt
,U.1YearOrderTicketBrandCount
,U.1YearOrderTicketCategoryCount
,U.1YearOrderUsePointCount
,U.1YearOrderUsePointPriceAmt
,U.1YearOrderProductManCount
,U.1YearOrderProductManPriceAmt
,U.1YearOrderProductWomanCount
,U.1YearOrderProductWomanPriceAmt
,U.1YearOrderProductOtherCount
,U.1YearOrderProductOtherPriceAmt
,U.userInterestCategory1_0
,U.userInterestCategory1_1
,U.userInterestCategory1_2
,U.userInterestCategory1_3
,U.userInterestCategory1_4
,U.userInterestCategory1_5
,U.userInterestCategory2_0
,U.userInterestCategory2_1
,U.userInterestCategory2_2
,U.userInterestCategory2_3
,U.userInterestCategory2_4
,U.userInterestCategory2_5
,U.userInterestCategory3_0
,U.userInterestCategory3_1
,U.userInterestCategory3_2
,U.userInterestCategory3_3
,U.userInterestCategory3_4
,U.userInterestCategory3_5
,U.userInterestBrand_0
,U.userInterestBrand_1
,U.userInterestBrand_2
,U.userInterestBrand_3
,U.addCartIn7Days
,U.deviceType
,U.gender
,U.purchasedBrand100
,U.purchasedItems
,U.activeIn7Days
,U.activeInWeekends
,U.prodAndUserPurchNew
,U.prodAndUserPurchPromotion
,U.prodAndUserPurchSpecial
,U.ispayeduser
,U.userfeatures
,U.userlabels
,U.is_new_user
,U.grow_level
,U.activate_diff_days
,U.head_model_level
,U.category_price_prefer
,U.not_repurchase_spu_list
,U.payed_spu_list
,U.payed_org_code_list
,U.is_repurchase_user
,U.user_add_cart_prod
,U.cart_spu_info
,U.cart_sku_info
,U.order_spu_info
,U.user_favorite_prod
,U.favorite_spu_info
,U.payed_group_list
,U.payed_group_main_list
,U.not_repurchase_group_main_list
,U.style_preference
,U.occasion_preference
FROM secoo_rcmd.rcmd_model_forecast_user_profile_p_day AS U
WHERE U.p_day = ${yesterday}
\ No newline at end of file
# 检查推荐最新表名
work_dir="/data/zhaoyanchao/java/shell/user_feature/"
delta_day=0
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userfeatures_"$date_str
hive -e "desc $table_name"
result=$?
while [[ $result -ne 0 ]];
do
delta_day=$(($delta_day+1))
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userfeatures_"$date_str
hive -e "desc $table_name"
result=$?
echo $table_name
done
# 如果无表,建表
hive -f "$work_dir"create_search_user_feature_table.sql
echo "开始导入数据"
yesterday=`date "+%Y-%m-%d"`
hive -e "insert overwrite table secoo_search.search_data_user_feature partition(p_day=date'$yesterday')
select
device_id as device_id,
1yearorderticketproductcount as 1YearOrderTicketProductCount,
addcartin7days as addCartIn7Days,
1yearcartproductcount as 1YearCartProductCount,
1yearorderproductmancount as 1YearOrderProductManCount,
1yearorderproductmanpriceamt as 1YearOrderProductManPriceAmt,
30dayschannelpvjrzk as 30DaysChannelPvJrzk,
1yearorderticketcategorycount as 1YearOrderTicketCategoryCount,
30daysdetailpv as 30DaysDetailPv,
userinterestcategory1_0 as userInterestCategory1_0,
userinterestcategory2_0 as userInterestCategory2_0,
userinterestcategory1_1 as userInterestCategory1_1,
userinterestcategory2_1 as userInterestCategory2_1,
userinterestcategory3_0 as userInterestCategory3_0,
30dayssearchcategorycount as 30DaysSearchCategoryCount,
1yearorderpaypriceamt as 1YearOrderPayPriceAmt,
userinterestcategory1_2 as userInterestCategory1_2,
30dayssearchbrandcount as 30DaysSearchBrandCount,
userinterestcategory1_3 as userInterestCategory1_3,
userinterestcategory2_2 as userInterestCategory2_2,
userinterestcategory3_1 as userInterestCategory3_1,
1yearcartcategorycount as 1YearCartCategoryCount,
userinterestcategory2_3 as userInterestCategory2_3,
userinterestcategory1_4 as userInterestCategory1_4,
userinterestcategory3_2 as userInterestCategory3_2,
userinterestcategory1_5 as userInterestCategory1_5,
userinterestcategory3_3 as userInterestCategory3_3,
1yearorderproductothercount as 1YearOrderProductOtherCount,
30daysdetailpva as 30DaysDetailPva,
userinterestcategory2_4 as userInterestCategory2_4,
1yearorderusepointcount as 1YearOrderUsePointCount,
30dayschannelpvpaihangbang as 30DaysChannelPvPaiHangBang,
30dayschannelpvxinpinbang as 30daysChannelPvXinPinBang,
1yearaddfavcategorycount as 1YearAddFavCategoryCount,
userinterestcategory3_4 as userInterestCategory3_4,
userinterestcategory2_5 as userInterestCategory2_5,
activeinweekends as activeInWeekends,
1yearordercount as 1YearOrderCount,
30daysdetailpvb as 30DaysDetailPvb,
1yearorderpayproductcount as 1YearOrderPayProductCount,
1yearorderusepointpriceamt as 1YearOrderUsePointPriceAmt,
1yearaddfavproductcount as 1YearAddFavProductCount,
30daysdetailpvab as 30DaysDetailPvab,
userinterestcategory3_5 as userInterestCategory3_5,
devicetype as deviceType,
activein7days as activeIn7Days,
gender as gender,
purchasedbrand100 as purchasedBrand100,
30dayschannelpvrenqibang as 30DaysChannelPvRenQiBang,
1yearcartpriceamt as 1YearCartPriceAmt,
regularcustomer as regularCustomer,
1yearorderpaybrandcount as 1YearOrderPayBrandCount,
1yearorderticketbrandcount as 1YearOrderTicketBrandCount,
1yearorderproductotherpriceamt as 1YearOrderProductOtherPriceAmt,
1yearorderproductcountavg as 1YearOrderProductCountAvg,
favoritein7days as favoriteIn7Days,
30daysdetailpvn as 30DaysDetailPvn,
1yearaddfavpriceamt as 1YearAddFavPriceAmt,
1yearorderproductwomanpriceamt as 1YearOrderProductWomanPriceAmt,
1yearorderproductwomancount as 1YearOrderProductWomanCount,
30dayschannelpvxscj as 30DaysChannelPvXscj,
1yearorderticketpriceamt as 1YearOrderTicketPriceAmt,
userinterestbrand_0 as userInterestBrand_0,
1yearaddfavbrandcount as 1YearAddFavBrandCount,
userinterestbrand_1 as userInterestBrand_1,
userinterestbrand_2 as userInterestBrand_2,
1yearorderpaycount as 1YearOrderPayCount,
30dayschannelpvaolai as 30DaysChannelPvAoLai,
userinterestbrand_3 as userInterestBrand_3,
30daysdetailpvs as 30DaysDetailPvs,
purchaseditems as purchasedItems,
30dayssearchcount as 30DaysSearchCount,
1yearcartbrandcount as 1YearCartBrandCount,
1yearorderticketcount as 1YearOrderTicketCount,
1yearorderpaycategorycount as 1YearOrderPayCategoryCount
from $table_name";
work_dir="/data/zhaoyanchao/java/shell/user_feature/"
delta_day=1
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
hive -f "$work_dir"create_user_feature_ext_table.sql
hive --hivevar yesterday="'$yesterday'" -f "$work_dir"insert_user_feature_ext.sql
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment