Commit 1fdf6673 by wangyulong

1.特征数据优化

parent 2d707b51
insert overwrite table secoo_search.search_data_cross_feature_query_product partition(p_day = ${today})
insert overwrite table secoo_search.search_data_cross_feature_query_product partition(p_day = ${today_param})
select S.key_word,
S.product_id,
case
......@@ -66,7 +66,7 @@ SELECT
sum(match_third_cate),
sum(match_gender)
FROM secoo_search.search_data_cross_feature_query_product
WHERE p_day = ${today};
WHERE p_day = ${today_param};
SELECT
assert_true(sum(match_brand) > 0),
......@@ -75,4 +75,4 @@ SELECT
assert_true(sum(match_third_cate) > 0),
assert_true(sum(match_gender) > 0)
FROM secoo_search.search_data_cross_feature_query_product
WHERE p_day = ${today};
\ No newline at end of file
WHERE p_day = ${today_param};
\ No newline at end of file
work_dir="/data/zhaoyanchao/java/shell/cross_feature/query_product/"
delta_day=1
today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
partition=`hive -e "show partitions secoo_search.search_data_product_feature" |tail -n 1 | head -n 1`
partition_day=${partition:6:10}
yesterday=`date -d "-1 day" "+%Y-%m-%d"`
today=`date "+%Y-%m-%d"`
echo "yesterday is $yesterday and partition_day is $partition_day"
echo "today_param is $today_param yesterday is $yesterday and partition_day is $partition_day"
hive --hivevar yesterday="'$yesterday'" --hivevar partition_day="'$partition_day'" --hivevar today="'$today'" -f "$work_dir"insert_cross_feature_query_product.sql
hive --hivevar yesterday="'$yesterday'" --hivevar partition_day="'$partition_day'" --hivevar today_param="'$today_param'" -f "$work_dir"insert_cross_feature_query_product.sql
......@@ -3,37 +3,34 @@ work_dir="/data/zhaoyanchao/java/shell/cross_feature/user_product/"
# 检查推荐最新表名
delta_day=0
today_param=$1
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userProductCrossFeatures_"$date_str
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userProductCrossFeatures_"${date_str}
echo ${table_name}
hive -e "desc $table_name"
result=$?
while [[ $result -ne 0 ]];
while [[ ${result} -ne 0 ]];
do
delta_day=$(($delta_day+1))
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
date_str=`echo ${yesterday}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userProductCrossFeatures_"$date_str
table_name="secoo_rcmd_features.userProductCrossFeatures_"${date_str}
hive -e "desc $table_name"
result=$?
echo $table_name
echo ${table_name}
done
# 如果无表,建表
hive -f "$work_dir"create_cross_feature_user_product.sql
today=`date "+%Y-%m-%d"`
hive -e "insert overwrite table secoo_search.search_data_cross_feature_user_product partition(p_day=date'$today')
hive -e "insert overwrite table secoo_search.search_data_cross_feature_user_product partition(p_day=date'$today_param')
select
device_id,
product_id,
......@@ -45,9 +42,8 @@ select
prodanduserpurchbig100,
prodpriceinuserbrowsing,
prodpriceinuserpurch
from $table_name;
SELECT
from $table_name;"
hive -e "SELECT
sum(prodanduserpurchspecial),
sum(prodinusercat1),
sum(prodinusercat2),
......@@ -57,9 +53,8 @@ SELECT
sum(prodpriceinuserbrowsing),
sum(prodpriceinuserpurch)
FROM secoo_search.search_data_cross_feature_user_product
WHERE p_day = '$today';
SELECT
WHERE p_day = '$today_param'"
hive -e "SELECT
assert_true(sum(prodanduserpurchspecial) > 0),
assert_true(sum(prodinusercat1) > 0),
assert_true(sum(prodinusercat2) > 0),
......@@ -69,4 +64,4 @@ SELECT
assert_true(sum(prodpriceinuserbrowsing) > 0),
assert_true(sum(prodpriceinuserpurch) > 0)
FROM secoo_search.search_data_cross_feature_user_product
WHERE p_day = '$today';"
\ No newline at end of file
WHERE p_day = '$today_param'"
\ No newline at end of file
......@@ -3,37 +3,30 @@
# 检查推荐最新表名
work_dir="/data/zhaoyanchao/java/shell/product_feature/"
delta_day=0
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.productFeatures_"$date_str
today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.productFeatures_"${date_str}
echo ${table_name}
hive -e "desc $table_name"
result=$?
while [[ $result -ne 0 ]];
while [[ ${result} -ne 0 ]];
do
delta_day=$(($delta_day+1))
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.productFeatures_"$date_str
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.productFeatures_"${date_str}
hive -e "desc $table_name"
result=$?
echo $table_name
echo ${table_name}
done
# 如果无表,建表
hive -f $"work_dir"create_search_product_feature_table.sql
real_yesterday=`date "+%Y-%m-%d"`
hive -e "insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'$real_yesterday')
hive -e "insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'$today_param')
select
R.product_id,
R.isbrand100 as isBrand100,
......
......@@ -4,28 +4,26 @@
work_dir="/data/zhaoyanchao/java/shell/user_feature/"
delta_day=0
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userfeatures_"$date_str
today_param=$1
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo ${yesterday}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userfeatures_"${date_str}
echo ${table_name}
hive -e "desc $table_name"
result=$?
while [[ $result -ne 0 ]];
while [[ ${result} -ne 0 ]];
do
delta_day=$(($delta_day+1))
yesterday=`date -d "-$delta_day day" "+%Y-%m-%d"`
yesterday=`date -d "${today_param} -$delta_day day" "+%Y-%m-%d"`
date_str=`echo $yesterday|sed 's/\-/_/g'`
date_str=`echo ${yesterday}|sed 's/\-/_/g'`
table_name="secoo_rcmd_features.userfeatures_"$date_str
table_name="secoo_rcmd_features.userfeatures_"${date_str}
hive -e "desc $table_name"
result=$?
echo $table_name
echo ${table_name}
done
......@@ -35,9 +33,7 @@ hive -f "$work_dir"create_search_user_feature_table.sql
echo "开始导入数据"
yesterday=`date "+%Y-%m-%d"`
hive -e "insert overwrite table secoo_search.search_data_user_feature partition(p_day=date'$yesterday')
hive -e "insert overwrite table secoo_search.search_data_user_feature partition(p_day=date'$today_param')
select
device_id as device_id,
1yearorderticketproductcount as 1YearOrderTicketProductCount,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment