Commit 1fdf6673 by wangyulong

1.特征数据优化

parent 2d707b51
insert overwrite table secoo_search.search_data_cross_feature_query_product partition(p_day = ${today}) insert overwrite table secoo_search.search_data_cross_feature_query_product partition(p_day = ${today_param})
select S.key_word, select S.key_word,
S.product_id, S.product_id,
case case
...@@ -66,7 +66,7 @@ SELECT ...@@ -66,7 +66,7 @@ SELECT
sum(match_third_cate), sum(match_third_cate),
sum(match_gender) sum(match_gender)
FROM secoo_search.search_data_cross_feature_query_product FROM secoo_search.search_data_cross_feature_query_product
WHERE p_day = ${today}; WHERE p_day = ${today_param};
SELECT SELECT
assert_true(sum(match_brand) > 0), assert_true(sum(match_brand) > 0),
...@@ -75,4 +75,4 @@ SELECT ...@@ -75,4 +75,4 @@ SELECT
assert_true(sum(match_third_cate) > 0), assert_true(sum(match_third_cate) > 0),
assert_true(sum(match_gender) > 0) assert_true(sum(match_gender) > 0)
FROM secoo_search.search_data_cross_feature_query_product FROM secoo_search.search_data_cross_feature_query_product
WHERE p_day = ${today}; WHERE p_day = ${today_param};
\ No newline at end of file \ No newline at end of file
work_dir="/data/zhaoyanchao/java/shell/cross_feature/query_product/" work_dir="/data/zhaoyanchao/java/shell/cross_feature/query_product/"
delta_day=1
today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
partition=`hive -e "show partitions secoo_search.search_data_product_feature" |tail -n 1 | head -n 1` partition=`hive -e "show partitions secoo_search.search_data_product_feature" |tail -n 1 | head -n 1`
partition_day=${partition:6:10} partition_day=${partition:6:10}
yesterday=`date -d "-1 day" "+%Y-%m-%d"` echo "today_param is $today_param yesterday is $yesterday and partition_day is $partition_day"
today=`date "+%Y-%m-%d"`
echo "yesterday is $yesterday and partition_day is $partition_day"
hive --hivevar yesterday="'$yesterday'" --hivevar partition_day="'$partition_day'" --hivevar today="'$today'" -f "$work_dir"insert_cross_feature_query_product.sql hive --hivevar yesterday="'$yesterday'" --hivevar partition_day="'$partition_day'" --hivevar today_param="'$today_param'" -f "$work_dir"insert_cross_feature_query_product.sql
...@@ -3,37 +3,34 @@ work_dir="/data/zhaoyanchao/java/shell/cross_feature/user_product/" ...@@ -3,37 +3,34 @@ work_dir="/data/zhaoyanchao/java/shell/cross_feature/user_product/"
# 检查推荐最新表名 # 检查推荐最新表名
delta_day=0 delta_day=0
today_param=$1
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"` yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'`
date_str=`echo $yesterday|sed 's/\-/_/g'` table_name="secoo_rcmd_features.userProductCrossFeatures_"${date_str}
echo ${table_name}
table_name="secoo_rcmd_features.userProductCrossFeatures_"$date_str
hive -e "desc $table_name" hive -e "desc $table_name"
result=$? result=$?
while [[ $result -ne 0 ]]; while [[ ${result} -ne 0 ]];
do do
delta_day=$(($delta_day+1)) delta_day=$(($delta_day+1))
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"` yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'` date_str=`echo ${yesterday}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userProductCrossFeatures_"$date_str table_name="secoo_rcmd_features.userProductCrossFeatures_"${date_str}
hive -e "desc $table_name" hive -e "desc $table_name"
result=$? result=$?
echo $table_name echo ${table_name}
done done
# 如果无表,建表 # 如果无表,建表
hive -f "$work_dir"create_cross_feature_user_product.sql hive -f "$work_dir"create_cross_feature_user_product.sql
today=`date "+%Y-%m-%d"` hive -e "insert overwrite table secoo_search.search_data_cross_feature_user_product partition(p_day=date'$today_param')
hive -e "insert overwrite table secoo_search.search_data_cross_feature_user_product partition(p_day=date'$today')
select select
device_id, device_id,
product_id, product_id,
...@@ -45,9 +42,8 @@ select ...@@ -45,9 +42,8 @@ select
prodanduserpurchbig100, prodanduserpurchbig100,
prodpriceinuserbrowsing, prodpriceinuserbrowsing,
prodpriceinuserpurch prodpriceinuserpurch
from $table_name; from $table_name;"
hive -e "SELECT
SELECT
sum(prodanduserpurchspecial), sum(prodanduserpurchspecial),
sum(prodinusercat1), sum(prodinusercat1),
sum(prodinusercat2), sum(prodinusercat2),
...@@ -57,9 +53,8 @@ SELECT ...@@ -57,9 +53,8 @@ SELECT
sum(prodpriceinuserbrowsing), sum(prodpriceinuserbrowsing),
sum(prodpriceinuserpurch) sum(prodpriceinuserpurch)
FROM secoo_search.search_data_cross_feature_user_product FROM secoo_search.search_data_cross_feature_user_product
WHERE p_day = '$today'; WHERE p_day = '$today_param'"
hive -e "SELECT
SELECT
assert_true(sum(prodanduserpurchspecial) > 0), assert_true(sum(prodanduserpurchspecial) > 0),
assert_true(sum(prodinusercat1) > 0), assert_true(sum(prodinusercat1) > 0),
assert_true(sum(prodinusercat2) > 0), assert_true(sum(prodinusercat2) > 0),
...@@ -69,4 +64,4 @@ SELECT ...@@ -69,4 +64,4 @@ SELECT
assert_true(sum(prodpriceinuserbrowsing) > 0), assert_true(sum(prodpriceinuserbrowsing) > 0),
assert_true(sum(prodpriceinuserpurch) > 0) assert_true(sum(prodpriceinuserpurch) > 0)
FROM secoo_search.search_data_cross_feature_user_product FROM secoo_search.search_data_cross_feature_user_product
WHERE p_day = '$today';" WHERE p_day = '$today_param'"
\ No newline at end of file \ No newline at end of file
...@@ -3,37 +3,30 @@ ...@@ -3,37 +3,30 @@
# 检查推荐最新表名 # 检查推荐最新表名
work_dir="/data/zhaoyanchao/java/shell/product_feature/" work_dir="/data/zhaoyanchao/java/shell/product_feature/"
delta_day=0 delta_day=0
today_param=$1
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"` yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'`
date_str=`echo $yesterday|sed 's/\-/_/g'` table_name="secoo_rcmd_features.productFeatures_"${date_str}
echo ${table_name}
table_name="secoo_rcmd_features.productFeatures_"$date_str
hive -e "desc $table_name" hive -e "desc $table_name"
result=$? result=$?
while [[ $result -ne 0 ]]; while [[ ${result} -ne 0 ]];
do do
delta_day=$(($delta_day+1)) delta_day=$(($delta_day+1))
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"` date_str=`echo ${yesterday}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.productFeatures_"${date_str}
date_str=`echo $yesterday|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.productFeatures_"$date_str
hive -e "desc $table_name" hive -e "desc $table_name"
result=$? result=$?
echo $table_name echo ${table_name}
done done
# 如果无表,建表 # 如果无表,建表
hive -f $"work_dir"create_search_product_feature_table.sql hive -f $"work_dir"create_search_product_feature_table.sql
real_yesterday=`date "+%Y-%m-%d"` hive -e "insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'$today_param')
hive -e "insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'$real_yesterday')
select select
R.product_id, R.product_id,
R.isbrand100 as isBrand100, R.isbrand100 as isBrand100,
......
...@@ -4,28 +4,26 @@ ...@@ -4,28 +4,26 @@
work_dir="/data/zhaoyanchao/java/shell/user_feature/" work_dir="/data/zhaoyanchao/java/shell/user_feature/"
delta_day=0 delta_day=0
today_param=$1
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"` yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'`
date_str=`echo $yesterday|sed 's/\-/_/g'` table_name="secoo_rcmd_features.userfeatures_"${date_str}
echo ${table_name}
table_name="secoo_rcmd_features.userfeatures_"$date_str
hive -e "desc $table_name" hive -e "desc $table_name"
result=$? result=$?
while [[ $result -ne 0 ]]; while [[ ${result} -ne 0 ]];
do do
delta_day=$(($delta_day+1)) delta_day=$(($delta_day+1))
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"` yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'` date_str=`echo ${yesterday}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userfeatures_"$date_str table_name="secoo_rcmd_features.userfeatures_"${date_str}
hive -e "desc $table_name" hive -e "desc $table_name"
result=$? result=$?
echo $table_name echo ${table_name}
done done
...@@ -35,9 +33,7 @@ hive -f "$work_dir"create_search_user_feature_table.sql ...@@ -35,9 +33,7 @@ hive -f "$work_dir"create_search_user_feature_table.sql
echo "开始导入数据" echo "开始导入数据"
yesterday=`date "+%Y-%m-%d"` hive -e "insert overwrite table secoo_search.search_data_user_feature partition(p_day=date'$today_param')
hive -e "insert overwrite table secoo_search.search_data_user_feature partition(p_day=date'$yesterday')
select select
device_id as device_id, device_id as device_id,
1yearorderticketproductcount as 1YearOrderTicketProductCount, 1yearorderticketproductcount as 1YearOrderTicketProductCount,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment