Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
search-model-data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
search-model-data
Commits
2ce837d7
Commit
2ce837d7
authored
Aug 21, 2021
by
wangyulong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1.商品特征,用户商品交叉特征
parent
6afe09c3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
517 additions
and
145 deletions
+517
-145
create_cross_feature_user_product.sql
src/main/scripts/cross_feature/user_product/create_cross_feature_user_product.sql
+13
-13
create_cross_feature_user_product_buy.sql
src/main/scripts/cross_feature/user_product/create_cross_feature_user_product_buy.sql
+11
-0
insert_cross_feature_user_product_buy.sql
src/main/scripts/cross_feature/user_product/insert_cross_feature_user_product_buy.sql
+27
-0
user_product_buy_feature.sh
src/main/scripts/cross_feature/user_product/user_product_buy_feature.sh
+10
-0
user_product_feature.sh
src/main/scripts/cross_feature/user_product/user_product_feature.sh
+29
-23
create_product_feature_ext_table.sql
src/main/scripts/product_feature/create_product_feature_ext_table.sql
+0
-0
create_search_product_feature_table.sql
src/main/scripts/product_feature/create_search_product_feature_table.sql
+44
-45
product_feature_compose.sh
src/main/scripts/product_feature/product_feature_compose.sh
+67
-64
create_search_user_feature_table.sql
src/main/scripts/user_feature/create_search_user_feature_table.sql
+0
-0
create_user_brand_category_favorite_table.sql
src/main/scripts/user_feature/create_user_brand_category_favorite_table.sql
+44
-0
insert_user_brand_category_favorite_table.sql
src/main/scripts/user_feature/insert_user_brand_category_favorite_table.sql
+260
-0
user_favor_brand_category_feature.sh
src/main/scripts/user_feature/user_favor_brand_category_feature.sh
+12
-0
user_feature_compose.sh
src/main/scripts/user_feature/user_feature_compose.sh
+0
-0
No files found.
src/main/scripts/cross_feature/user_product/create_cross_feature_user_product.sql
View file @
2ce837d7
create
external
table
if
not
exists
secoo_search
.
search_data_cross_feature_user_product
create
external
table
if
not
exists
secoo_search
.
search_data_cross_feature_user_product
(
(
device_id
string
comment
'设备id'
,
device_id
string
comment
'设备id'
,
product_id
string
comment
'商品id'
,
product_id
string
comment
'商品id'
,
prodanduserpurchspecial
string
comment
'购买过的商品是特例品 & 商品本身是特例品'
,
prodanduserpurchspecial
string
comment
'购买过的商品是特例品 & 商品本身是特例品'
,
prodinusercat1
string
comment
'商品的一级品类是否在用户的一级品类偏好中'
,
prodinusercat1
string
comment
'商品的一级品类是否在用户的一级品类偏好中'
,
prodinusercat2
string
comment
'商品的二级品类是否在用户的二级品类偏好中'
,
prodinusercat2
string
comment
'商品的二级品类是否在用户的二级品类偏好中'
,
prodanduserpurchnew
string
comment
'商品的价格是否在用户购买的价格段中'
,
prodanduserpurchnew
string
comment
'商品的价格是否在用户购买的价格段中'
,
prodanduserpurchpromotion
string
comment
'购买过的商品是促销品 & 商品本身是促销品'
,
prodanduserpurchpromotion
string
comment
'购买过的商品是促销品 & 商品本身是促销品'
,
prodanduserpurchbig100
string
comment
'购买过百大品牌 & 商品是百大品牌'
,
prodanduserpurchbig100
string
comment
'购买过百大品牌 & 商品是百大品牌'
,
prodpriceinuserbrowsing
string
comment
'商品的价格是否在用户浏览的价格段中'
,
prodpriceinuserbrowsing
string
comment
'商品的价格是否在用户浏览的价格段中'
,
prodpriceinuserpurch
string
comment
'商品的价格是否在用户购买的价格段中'
prodpriceinuserpurch
string
comment
'商品的价格是否在用户购买的价格段中'
,
user_product_buy_is_match
string
comment
'用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:1/0'
,
user_product_buy_price_level
string
comment
'用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:price_level/0'
)
comment
'用户商品交叉特征'
)
comment
'用户商品交叉特征'
partitioned
by
(
partitioned
by
(
p_day
date
comment
'分区日期'
)
p_day
date
comment
'分区日期'
)
row
format
delimited
fields
terminated
by
'
\t
'
row
format
delimited
fields
terminated
by
'
\t
'
stored
as
textfile
;
stored
as
textfile
;
src/main/scripts/cross_feature/user_product/create_cross_feature_user_product_buy.sql
0 → 100644
View file @
2ce837d7
create
external
table
if
not
exists
secoo_search
.
search_data_cross_feature_user_product_buy
(
device_id
string
comment
'设备id'
,
product_id
string
comment
'商品id'
,
category2_id
string
comment
'二级类目id'
,
cate_2_price_level
string
comment
'商品二级类目价格段1-10'
,
user_product_buy_is_match
string
comment
'用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:1/0'
,
user_product_buy_price_level
string
comment
'用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:price_level/0'
)
comment
'用户商品购买交叉特征'
partitioned
by
(
p_day
date
comment
'分区日期'
);
\ No newline at end of file
src/main/scripts/cross_feature/user_product/insert_cross_feature_user_product_buy.sql
0 → 100644
View file @
2ce837d7
insert
overwrite
table
secoo_search
.
search_data_cross_feature_user_product_buy
partition
(
p_day
=
${
yesterday
}
)
select
UPPER
(
T1
.
uuid
)
as
device_id
,
T1
.
product_id
as
product_id
,
T1
.
category_id_2
as
category2_id
,
T1
.
buy_sku_cat2_price_level
as
cate_2_price_level
,
case
when
T1
.
buy_sku_cat2_price_level
=
T2
.
cate_2_price_level
then
1
else
0
end
as
user_product_buy_is_match
,
case
when
T1
.
buy_sku_cat2_price_level
=
T2
.
cate_2_price_level
then
T1
.
buy_sku_cat2_price_level
else
0
end
as
user_product_buy_price_level
from
secoo_app
.
app_search_uuid_sku_price_level
T1
left
join
(
select
W
.
product_id
as
product_id
,
split
(
W
.
category_org_code
,
'_'
)[
2
]
as
category_id_2
,
P
.
cate_2_price_level
as
cate_2_price_level
from
secoo_fact
.
fact_search_product_wide_p_day
W
join
secoo_fact
.
fact_search_product_55_30d_click_product_p_day
P
on
W
.
main_id
=
P
.
product_main_id
and
P
.
p_day
=
${
yesterday
}
where
W
.
p_day
=
${
yesterday
}
)
T2
on
T1
.
product_id
=
T2
.
product_id
where
T1
.
p_day
=
${
yesterday
}
src/main/scripts/cross_feature/user_product/user_product_buy_feature.sh
0 → 100644
View file @
2ce837d7
work_dir
=
"/data/zhaoyanchao/java/shell/cross_feature/user_product/"
# 用户商品交叉特征-购买
delta_day
=
1
today_param
=
$1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
hive
-f
"
$work_dir
"
create_cross_feature_user_product_buy.sql
hive
--hivevar
yesterday
=
"'
${
yesterday
}
'"
-f
"
$work_dir
"
insert_cross_feature_user_product_buy.sql
src/main/scripts/cross_feature/user_product/user_product_feature.sh
View file @
2ce837d7
...
@@ -2,11 +2,11 @@
...
@@ -2,11 +2,11 @@
work_dir
=
"/data/zhaoyanchao/java/shell/cross_feature/user_product/"
work_dir
=
"/data/zhaoyanchao/java/shell/cross_feature/user_product/"
# 检查推荐最新表名
# 检查推荐最新表名
delta_day
=
0
delta_day
=
1
today_param
=
$1
today_param
=
$1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
date_str
=
`
echo
${
yesterday
}
|sed
's/\-/_/g'
`
date_str
=
`
echo
${
today_param
}
|sed
's/\-/_/g'
`
table_name
=
"secoo_rcmd_features.userProductCrossFeatures_"
${
date_str
}
table_name
=
"secoo_rcmd_features.userProductCrossFeatures_"
${
date_str
}
echo
${
table_name
}
echo
${
table_name
}
...
@@ -16,9 +16,9 @@ while [[ ${result} -ne 0 ]];
...
@@ -16,9 +16,9 @@ while [[ ${result} -ne 0 ]];
do
do
delta_day
=
$((
$delta_day
+
1
))
delta_day
=
$((
$delta_day
+
1
))
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
today_param_1
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
date_str
=
`
echo
${
yesterday
}
|sed
's/\-/_/g'
`
date_str
=
`
echo
${
today_param_1
}
|sed
's/\-/_/g'
`
table_name
=
"secoo_rcmd_features.userProductCrossFeatures_"
${
date_str
}
table_name
=
"secoo_rcmd_features.userProductCrossFeatures_"
${
date_str
}
hive
-e
"desc
$table_name
"
hive
-e
"desc
$table_name
"
...
@@ -30,19 +30,24 @@ done
...
@@ -30,19 +30,24 @@ done
# 如果无表,建表
# 如果无表,建表
hive
-f
"
$work_dir
"
create_cross_feature_user_product.sql
hive
-f
"
$work_dir
"
create_cross_feature_user_product.sql
hive
-e
"insert overwrite table secoo_search.search_data_cross_feature_user_product partition(p_day=date'
$
today_param
')
hive
-e
"insert overwrite table secoo_search.search_data_cross_feature_user_product partition(p_day=date'
$
{
today_param
}
')
select
select
device_id,
T1.device_id,
product_id,
T1.product_id,
prodanduserpurchspecial,
T1.prodanduserpurchspecial,
prodinusercat1,
T1.prodinusercat1,
prodinusercat2,
T1.prodinusercat2,
prodanduserpurchnew,
T1.prodanduserpurchnew,
prodanduserpurchpromotion,
T1.prodanduserpurchpromotion,
prodanduserpurchbig100,
T1.prodanduserpurchbig100,
prodpriceinuserbrowsing,
T1.prodpriceinuserbrowsing,
prodpriceinuserpurch
T1.prodpriceinuserpurch,
from
$table_name
;"
T2.user_product_buy_is_match,
T2.user_product_buy_price_level
from
${
table_name
}
T1
left join secoo_search.search_data_cross_feature_user_product_buy T2 on T1.device_id = T2.device_id and T1.product_id = T2.product_id
where T2.p_day = '
${
yesterday
}
';"
hive
-e
"SELECT
hive
-e
"SELECT
sum(prodanduserpurchspecial),
sum(prodanduserpurchspecial),
sum(prodinusercat1),
sum(prodinusercat1),
...
@@ -51,9 +56,10 @@ hive -e "SELECT
...
@@ -51,9 +56,10 @@ hive -e "SELECT
sum(prodanduserpurchpromotion),
sum(prodanduserpurchpromotion),
sum(prodanduserpurchbig100),
sum(prodanduserpurchbig100),
sum(prodpriceinuserbrowsing),
sum(prodpriceinuserbrowsing),
sum(prodpriceinuserpurch)
sum(prodpriceinuserpurch),
FROM secoo_search.search_data_cross_feature_user_product
sum(user_product_buy_is_match)
WHERE p_day = '
$today_param
'"
FROM secoo_search.search_data_cross_feature_user_product WHERE p_day = '
${
today_param
}
'"
hive
-e
"SELECT
hive
-e
"SELECT
assert_true(sum(prodanduserpurchspecial) > 0),
assert_true(sum(prodanduserpurchspecial) > 0),
assert_true(sum(prodinusercat1) > 0),
assert_true(sum(prodinusercat1) > 0),
...
@@ -62,6 +68,6 @@ hive -e "SELECT
...
@@ -62,6 +68,6 @@ hive -e "SELECT
assert_true(sum(prodanduserpurchpromotion) > 0),
assert_true(sum(prodanduserpurchpromotion) > 0),
assert_true(sum(prodanduserpurchbig100) > 0),
assert_true(sum(prodanduserpurchbig100) > 0),
assert_true(sum(prodpriceinuserbrowsing) > 0),
assert_true(sum(prodpriceinuserbrowsing) > 0),
assert_true(sum(prodpriceinuserpurch) > 0)
assert_true(sum(prodpriceinuserpurch) > 0),
FROM secoo_search.search_data_cross_feature_user_product
assert_true(sum(user_product_buy_is_match) > 0)
WHERE p_day = '
$today_param
'"
FROM secoo_search.search_data_cross_feature_user_product WHERE p_day = '
${
today_param
}
'"
\ No newline at end of file
\ No newline at end of file
src/main/scripts/product_feature/create_product_feature_ext_table.sql
View file @
2ce837d7
This diff is collapsed.
Click to expand it.
src/main/scripts/product_feature/create_search_product_feature_table.sql
View file @
2ce837d7
-- 商品特征表
-- 商品特征表
create
external
table
if
not
exists
secoo_search
.
search_data_product_feature
create
external
table
if
not
exists
secoo_search
.
search_data_product_feature
(
(
product_id
string
comment
'商品id'
,
product_id
string
comment
'商品id'
,
is_brand_top_100
string
comment
'是否是百大品牌'
,
is_brand_top_100
string
comment
'是否是百大品牌'
,
product_popularity
string
comment
'浏览、收藏、加购综合算的热度值'
,
product_popularity
string
comment
'浏览、收藏、加购综合算的热度值'
,
is_member_product
string
comment
'是否会员商品'
,
is_member_product
string
comment
'是否会员商品'
,
is_special_product
string
comment
'是否是特例品'
,
is_special_product
string
comment
'是否是特例品'
,
is_new_customer_product
string
comment
'是否新客商品'
,
is_new_customer_product
string
comment
'是否新客商品'
,
is_new_product
string
comment
'是否是新品'
,
is_new_product
string
comment
'是否是新品'
,
is_new_style
string
comment
'是否是新款'
,
is_new_style
string
comment
'是否是新款'
,
is_self_operating
string
comment
'是否自营品'
,
is_self_operating
string
comment
'是否自营品'
,
product_level
string
comment
'商品等级'
,
product_level
string
comment
'商品等级'
,
is_promotion_product
string
comment
'是否是促销商品'
,
is_promotion_product
string
comment
'是否是促销商品'
,
gender
string
comment
'性别'
,
gender
string
comment
'性别'
,
product_brand
string
comment
'商品的品牌'
,
product_brand
string
comment
'商品的品牌'
,
product_category
string
comment
'商品的品类'
,
product_category
string
comment
'商品的品类'
,
product_price_level
string
comment
'商品价格(需要离散化)'
,
product_price_level
string
comment
'商品价格(需要离散化)'
,
ctr_android_3days
string
comment
'安卓系统下商品3天的CTR'
,
ctr_android_3days
string
comment
'安卓系统下商品3天的CTR'
,
ctr_android_7days
string
comment
'安卓系统下商品7天的CTR'
,
ctr_android_7days
string
comment
'安卓系统下商品7天的CTR'
,
ctr_android_30days
string
comment
'安卓系统下商品30天的CTR'
,
ctr_android_30days
string
comment
'安卓系统下商品30天的CTR'
,
ctr_ios_3days
string
comment
'ios系统下商品3天的CTR'
,
ctr_ios_3days
string
comment
'ios系统下商品3天的CTR'
,
ctr_ios_7days
string
comment
'ios系统下商品7天的CTR'
,
ctr_ios_7days
string
comment
'ios系统下商品7天的CTR'
,
ctr_ios_30days
string
comment
'ios系统下商品30天的CTR'
,
ctr_ios_30days
string
comment
'ios系统下商品30天的CTR'
,
ctr_3days
string
comment
'全部系统下商品3天的CTR'
,
ctr_3days
string
comment
'全部系统下商品3天的CTR'
,
ctr_7days
string
comment
'全部系统下商品7天的CTR'
,
ctr_7days
string
comment
'全部系统下商品7天的CTR'
,
ctr_30days
string
comment
'全部系统下商品30天的CTR'
,
ctr_30days
string
comment
'全部系统下商品30天的CTR'
,
favorite_android_3days
string
comment
'安卓系统下商品3天的收藏'
,
favorite_android_3days
string
comment
'安卓系统下商品3天的收藏'
,
favorite_android_7days
string
comment
'安卓系统下商品7天的收藏'
,
favorite_android_7days
string
comment
'安卓系统下商品7天的收藏'
,
favorite_android_30days
string
comment
'安卓系统下商品30天的收藏'
,
favorite_android_30days
string
comment
'安卓系统下商品30天的收藏'
,
favorite_ios_3days
string
comment
'ios系统下商品3天的收藏'
,
favorite_ios_3days
string
comment
'ios系统下商品3天的收藏'
,
favorite_ios_7days
string
comment
'ios系统下商品7天的收藏'
,
favorite_ios_7days
string
comment
'ios系统下商品7天的收藏'
,
favorite_ios_30days
string
comment
'ios系统下商品30天的收藏'
,
favorite_ios_30days
string
comment
'ios系统下商品30天的收藏'
,
favorite_3days
string
comment
'全部系统下商品3天的收藏'
,
favorite_3days
string
comment
'全部系统下商品3天的收藏'
,
favorite_7days
string
comment
'全部系统下商品7天的收藏'
,
favorite_7days
string
comment
'全部系统下商品7天的收藏'
,
favorite_30days
string
comment
'全部系统下商品30天的收藏'
,
favorite_30days
string
comment
'全部系统下商品30天的收藏'
,
add_cart_android_3days
string
comment
'安卓系统下商品3天的加购'
,
add_cart_android_3days
string
comment
'安卓系统下商品3天的加购'
,
add_cart_android_7days
string
comment
'安卓系统下商品7天的加购'
,
add_cart_android_7days
string
comment
'安卓系统下商品7天的加购'
,
add_cart_android_30days
string
comment
'安卓系统下商品30天的加购'
,
add_cart_android_30days
string
comment
'安卓系统下商品30天的加购'
,
add_cart_ios_3days
string
comment
'ios系统下商品3天的加购'
,
add_cart_ios_3days
string
comment
'ios系统下商品3天的加购'
,
add_cart_ios_7days
string
comment
'ios系统下商品7天的加购'
,
add_cart_ios_7days
string
comment
'ios系统下商品7天的加购'
,
add_cart_ios_30days
string
comment
'ios系统下商品30天的加购'
,
add_cart_ios_30days
string
comment
'ios系统下商品30天的加购'
,
add_cart_3days
string
comment
'全部系统下商品3天的加购'
,
add_cart_3days
string
comment
'全部系统下商品3天的加购'
,
add_cart_7days
string
comment
'全部系统下商品7天的加购'
,
add_cart_7days
string
comment
'全部系统下商品7天的加购'
,
add_cart_30days
string
comment
'全部系统下商品30天的加购'
,
add_cart_30days
string
comment
'全部系统下商品30天的加购'
,
area_type
tinyint
comment
'货源地,(0大陆 1香港 2美国 3日本 4意大利)'
,
area_type
tinyint
comment
'货源地,(0大陆 1香港 2美国 3日本 4意大利)'
,
sale_qty_180
bigint
comment
'前180天至今销售数量'
,
sale_qty_180
bigint
comment
'前180天至今销售数量'
,
sale_qty_90
bigint
comment
'前90天至今销售数量'
,
sale_qty_90
bigint
comment
'前90天至今销售数量'
,
sale_qty_30
bigint
comment
'前30天至今销售数量'
,
sale_qty_30
bigint
comment
'前30天至今销售数量'
,
sale_qty_15
bigint
comment
'前15天至今销售数量'
sale_qty_15
bigint
comment
'前15天至今销售数量'
,
product_cate2_price_level
tinyint
comment
'商品二级类目价格段1-10'
)
comment
'商品特征'
)
comment
'商品特征'
partitioned
by
(
partitioned
by
(
p_day
date
comment
'分区日期'
)
p_day
date
comment
'分区日期'
)
stored
as
parquet
;
stored
as
parquet
;
src/main/scripts/product_feature/product_feature_compose.sh
View file @
2ce837d7
...
@@ -2,10 +2,10 @@
...
@@ -2,10 +2,10 @@
# 检查推荐最新表名
# 检查推荐最新表名
work_dir
=
"/data/zhaoyanchao/java/shell/product_feature/"
work_dir
=
"/data/zhaoyanchao/java/shell/product_feature/"
delta_day
=
0
delta_day
=
1
today_param
=
$1
today_param
=
$1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
date_str
=
`
echo
${
yesterday
}
|sed
's/\-/_/g'
`
date_str
=
`
echo
${
today_param
}
|sed
's/\-/_/g'
`
table_name
=
"secoo_rcmd_features.productFeatures_"
${
date_str
}
table_name
=
"secoo_rcmd_features.productFeatures_"
${
date_str
}
echo
${
table_name
}
echo
${
table_name
}
...
@@ -14,8 +14,8 @@ result=$?
...
@@ -14,8 +14,8 @@ result=$?
while
[[
${
result
}
-ne
0
]]
;
while
[[
${
result
}
-ne
0
]]
;
do
do
delta_day
=
$((
$delta_day
+
1
))
delta_day
=
$((
$delta_day
+
1
))
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
param_delta_1
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
date_str
=
`
echo
${
yesterday
}
|sed
's/\-/_/g'
`
date_str
=
`
echo
${
param_delta_1
}
|sed
's/\-/_/g'
`
table_name
=
"secoo_rcmd_features.productFeatures_"
${
date_str
}
table_name
=
"secoo_rcmd_features.productFeatures_"
${
date_str
}
hive
-e
"desc
$table_name
"
hive
-e
"desc
$table_name
"
result
=
$?
result
=
$?
...
@@ -24,70 +24,73 @@ done
...
@@ -24,70 +24,73 @@ done
# 如果无表,建表
# 如果无表,建表
hive
-f
$"work_dir
"
create_search_product_feature_table.sql
hive
-f
"
${
work_dir
}
"
create_search_product_feature_table.sql
hive
-e
"insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'
$today_param
')
hive
-e
"insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'
$today_param
')
select
select
R.product_id,
R.product_id
as product_id
,
R.isbrand100 as isBrand100,
R.isbrand100
as isBrand100,
R.productpopularity as productPopularity,
R.productpopularity
as productPopularity,
R.ismemberproduct as isMemberProduct ,
R.ismemberproduct
as isMemberProduct ,
R.isspecialproduct as isSpecialProduct,
R.isspecialproduct
as isSpecialProduct,
R.isnewcustomerproduct as isNewCustomerProduct,
R.isnewcustomerproduct
as isNewCustomerProduct,
R.isnewproduct as isNewProduct,
R.isnewproduct
as isNewProduct,
R.isnewstyle
as
isNewStyle,
R.isnewstyle
as
isNewStyle,
R.isselfoperating as isSelfOperating,
R.isselfoperating
as isSelfOperating,
R.productlevel as productLevel,
R.productlevel
as productLevel,
R.ispromotionproduct as isPromotionProduct,
R.ispromotionproduct
as isPromotionProduct,
R.prodgender as prodGender,
R.prodgender
as prodGender,
R.productbrand as productBrand,
R.productbrand
as productBrand,
R.productcategory as productCategory,
R.productcategory
as productCategory,
R.productpricelevel as productPriceLevel,
R.productpricelevel
as productPriceLevel,
R.3daysandroidctr as 3DaysAndroidCTR,
R.3daysandroidctr
as 3DaysAndroidCTR,
R.7daysandroidctr as 7DaysAndroidCTR,
R.7daysandroidctr
as 7DaysAndroidCTR,
R.30daysandroidctr as 30DaysAndroidCTR,
R.30daysandroidctr
as 30DaysAndroidCTR,
R.3daysiosctr as 3DaysIosCTR,
R.3daysiosctr as 3DaysIosCTR,
R.7daysiosctr as 7DaysIosCTR,
R.7daysiosctr as 7DaysIosCTR,
R.30daysiosctr as 30DaysIosCTR,
R.30daysiosctr as 30DaysIosCTR,
R.3daysctr as 3DaysCTR,
R.3daysctr as 3DaysCTR,
R.7daysctr as 7DaysCTR,
R.7daysctr as 7DaysCTR,
R.30daysctr as 30DaysCTR,
R.30daysctr as 30DaysCTR,
R.3daysandroidfavorite as 3DaysAndroidFavorite,
R.3daysandroidfavorite as 3DaysAndroidFavorite,
R.7daysandroidfavorite as 7DaysAndroidFavorite,
R.7daysandroidfavorite as 7DaysAndroidFavorite,
R.30daysandroidfavorite as 30DaysAndroidFavorite,
R.30daysandroidfavorite as 30DaysAndroidFavorite,
R.3daysiosfavorite as 3DaysIosFavorite,
R.3daysiosfavorite as 3DaysIosFavorite,
R.7daysiosfavorite as 7DaysIosFavorite,
R.7daysiosfavorite as 7DaysIosFavorite,
R.30daysiosfavorite as 30DaysIosFavorite,
R.30daysiosfavorite as 30DaysIosFavorite,
R.3daysfavorite as 3DaysFavorite,
R.3daysfavorite as 3DaysFavorite,
R.7daysfavorite as 7DaysFavorite,
R.7daysfavorite as 7DaysFavorite,
R.30daysfavorite as 30DaysFavorite,
R.30daysfavorite as 30DaysFavorite,
R.3daysandroidaddcart as 3DaysAndroidAddCart,
R.3daysandroidaddcart as 3DaysAndroidAddCart,
R.7daysandroidaddcart as 7DaysAndroidAddCart,
R.7daysandroidaddcart as 7DaysAndroidAddCart,
R.30daysandroidaddcart as 30DaysAndroidAddCart,
R.30daysandroidaddcart as 30DaysAndroidAddCart,
R.3daysiosaddcart as 3DaysIosAddCart,
R.3daysiosaddcart as 3DaysIosAddCart,
R.7daysiosaddcart as 7DaysIosAddCart,
R.7daysiosaddcart as 7DaysIosAddCart,
R.30daysiosaddcart as 30DaysIosAddCart,
R.30daysiosaddcart as 30DaysIosAddCart,
R.3daysaddcart as 3DaysAddCart,
R.3daysaddcart as 3DaysAddCart,
R.7daysaddcart as 7DaysAddCart,
R.7daysaddcart as 7DaysAddCart,
R.30daysaddcart as 30DaysAddCart,
R.30daysaddcart as 30DaysAddCart,
W.area_type,
W.area_type as area_type,
W.sale_qty_180,
W.sale_qty_180 as sale_qty_180,
W.sale_qty_90,
W.sale_qty_90 as sale_qty_90,
W.sale_qty_30,
W.sale_qty_30 as sale_qty_30,
W.sale_qty_15
W.sale_qty_15 as sale_qty_15,
from
$table_name
R
left join secoo_fact.fact_search_product_wide_p_day W
P.cate_2_price_level as product_cate2_price_level
on R.product_id = cast(W.product_id AS string) and W.p_day = '
$yesterday
';"
from
$table_name
R
left join secoo_fact.fact_search_product_wide_p_day W on R.product_id = cast(W.product_id AS string) and W.p_day = '
${
yesterday
}
'
left join secoo_fact.fact_search_product_55_30d_click_product_p_day P on W.main_id = P.product_main_id and P.p_day = '
${
yesterday
}
'
;"
...
...
src/main/scripts/user_feature/create_search_user_feature_table.sql
View file @
2ce837d7
This diff is collapsed.
Click to expand it.
src/main/scripts/user_feature/create_user_brand_category_favorite_table.sql
0 → 100644
View file @
2ce837d7
-- 商品特征扩展表
create
external
table
if
not
exists
secoo_search
.
user_brand_category2_favorite
(
device_id
string
comment
'用户设备id'
,
click_category2_id1
string
comment
'用户点击偏好二级类目1'
,
click_category2_id2
string
comment
'用户点击偏好二级类目2'
,
click_category2_id3
string
comment
'用户点击偏好二级类目3'
,
click_category2_id4
string
comment
'用户点击偏好二级类目4'
,
click_category2_id5
string
comment
'用户点击偏好二级类目5'
,
click_category2_id6
string
comment
'用户点击偏好二级类目6'
,
click_brand_id1
string
comment
'用户点击偏好品牌1'
,
click_brand_id2
string
comment
'用户点击偏好品牌2'
,
click_brand_id3
string
comment
'用户点击偏好品牌3'
,
click_brand_id4
string
comment
'用户点击偏好品牌4'
,
click_brand_id5
string
comment
'用户点击偏好品牌5'
,
click_brand_id6
string
comment
'用户点击偏好品牌6'
,
add_category2_id1
string
comment
'用户加购偏好二级类目1'
,
add_category2_id2
string
comment
'用户加购偏好二级类目2'
,
add_category2_id3
string
comment
'用户加购偏好二级类目3'
,
add_category2_id4
string
comment
'用户加购偏好二级类目4'
,
add_category2_id5
string
comment
'用户加购偏好二级类目5'
,
add_category2_id6
string
comment
'用户加购偏好二级类目6'
,
add_brand_id1
string
comment
'用户加购偏好品牌1'
,
add_brand_id2
string
comment
'用户加购偏好品牌2'
,
add_brand_id3
string
comment
'用户加购偏好品牌3'
,
add_brand_id4
string
comment
'用户加购偏好品牌4'
,
add_brand_id5
string
comment
'用户加购偏好品牌5'
,
add_brand_id6
string
comment
'用户加购偏好品牌6'
,
pay_category2_id1
string
comment
'用户购买偏好二级类目1'
,
pay_category2_id2
string
comment
'用户购买偏好二级类目2'
,
pay_category2_id3
string
comment
'用户购买偏好二级类目3'
,
pay_category2_id4
string
comment
'用户购买偏好二级类目4'
,
pay_category2_id5
string
comment
'用户购买偏好二级类目5'
,
pay_category2_id6
string
comment
'用户购买偏好二级类目6'
,
pay_brand_id1
string
comment
'用户购买偏好品牌1'
,
pay_brand_id2
string
comment
'用户购买偏好品牌2'
,
pay_brand_id3
string
comment
'用户购买偏好品牌3'
,
pay_brand_id4
string
comment
'用户购买偏好品牌4'
,
pay_brand_id5
string
comment
'用户购买偏好品牌5'
,
pay_brand_id6
string
comment
'用户购买偏好品牌6'
)
comment
'搜索用户偏好品牌品类特征表'
partitioned
by
(
p_day
date
comment
'分区日期'
);
src/main/scripts/user_feature/insert_user_brand_category_favorite_table.sql
0 → 100644
View file @
2ce837d7
--点击偏好二级类目--
drop
table
tmp
.
tmp_user_click_product_category2_id
;
create
table
if
not
exists
tmp
.
tmp_user_click_product_category2_id
as
select
device_id
,
nvl
(
split
(
category2_ids
,
','
)[
0
],
0
)
click_category2_id1
,
nvl
(
split
(
category2_ids
,
','
)[
1
],
0
)
click_category2_id2
,
nvl
(
split
(
category2_ids
,
','
)[
2
],
0
)
click_category2_id3
,
nvl
(
split
(
category2_ids
,
','
)[
3
],
0
)
click_category2_id4
,
nvl
(
split
(
category2_ids
,
','
)[
4
],
0
)
click_category2_id5
,
nvl
(
split
(
category2_ids
,
','
)[
5
],
0
)
click_category2_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
category2_id
as
string
)))
as
category2_ids
from
(
select
device_id
,
category2_id
,
row_number
()
over
(
partition
by
device_id
order
by
click_pv
desc
)
rank
from
(
select
click_device_id
as
device_id
,
product_category_id_2
as
category2_id
,
sum
(
is_click
)
as
click_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
30
)
AND
p_day
<
${
today
}
and
is_click
=
1
and
click_device_id
is
not
null
group
by
click_device_id
,
product_category_id_2
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
--点击偏好品牌--
drop
table
tmp
.
tmp_user_click_product_brand_id
;
create
table
if
not
exists
tmp
.
tmp_user_click_product_brand_id
as
select
device_id
,
nvl
(
split
(
product_brand_ids
,
','
)[
0
],
0
)
click_brand_id1
,
nvl
(
split
(
product_brand_ids
,
','
)[
1
],
0
)
click_brand_id2
,
nvl
(
split
(
product_brand_ids
,
','
)[
2
],
0
)
click_brand_id3
,
nvl
(
split
(
product_brand_ids
,
','
)[
3
],
0
)
click_brand_id4
,
nvl
(
split
(
product_brand_ids
,
','
)[
4
],
0
)
click_brand_id5
,
nvl
(
split
(
product_brand_ids
,
','
)[
5
],
0
)
click_brand_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
product_brand_id
as
string
)))
as
product_brand_ids
from
(
select
device_id
,
product_brand_id
,
row_number
()
over
(
partition
by
device_id
order
by
click_pv
desc
)
rank
from
(
select
click_device_id
as
device_id
,
product_brand_id
as
product_brand_id
,
sum
(
is_click
)
as
click_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
30
)
AND
p_day
<
${
today
}
and
is_click
=
1
and
click_device_id
is
not
null
group
by
click_device_id
,
product_brand_id
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
--加购偏好类目--
drop
table
tmp
.
tmp_user_add_product_category2_id
;
create
table
if
not
exists
tmp
.
tmp_user_add_product_category2_id
as
select
device_id
,
nvl
(
split
(
category2_ids
,
','
)[
0
],
0
)
add_category2_id1
,
nvl
(
split
(
category2_ids
,
','
)[
1
],
0
)
add_category2_id2
,
nvl
(
split
(
category2_ids
,
','
)[
2
],
0
)
add_category2_id3
,
nvl
(
split
(
category2_ids
,
','
)[
3
],
0
)
add_category2_id4
,
nvl
(
split
(
category2_ids
,
','
)[
4
],
0
)
add_category2_id5
,
nvl
(
split
(
category2_ids
,
','
)[
5
],
0
)
add_category2_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
category2_id
as
string
)))
as
category2_ids
from
(
select
device_id
,
category2_id
,
row_number
()
over
(
partition
by
device_id
order
by
add_pv
desc
)
rank
from
(
select
add_cart_device_id
as
device_id
,
product_category_id_2
as
category2_id
,
sum
(
is_action_add_cart
)
as
add_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
90
)
AND
p_day
<
${
today
}
and
is_action_add_cart
=
1
and
add_cart_device_id
is
not
null
group
by
add_cart_device_id
,
product_category_id_2
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
--加购偏好品牌--
drop
table
tmp
.
tmp_user_add_product_brand_id
;
create
table
if
not
exists
tmp
.
tmp_user_add_product_brand_id
as
select
device_id
,
nvl
(
split
(
product_brand_ids
,
','
)[
0
],
0
)
add_brand_id1
,
nvl
(
split
(
product_brand_ids
,
','
)[
1
],
0
)
add_brand_id2
,
nvl
(
split
(
product_brand_ids
,
','
)[
2
],
0
)
add_brand_id3
,
nvl
(
split
(
product_brand_ids
,
','
)[
3
],
0
)
add_brand_id4
,
nvl
(
split
(
product_brand_ids
,
','
)[
4
],
0
)
add_brand_id5
,
nvl
(
split
(
product_brand_ids
,
','
)[
5
],
0
)
add_brand_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
product_brand_id
as
string
)))
as
product_brand_ids
from
(
select
device_id
,
product_brand_id
,
row_number
()
over
(
partition
by
device_id
order
by
add_pv
desc
)
rank
from
(
select
add_cart_device_id
as
device_id
,
product_brand_id
as
product_brand_id
,
sum
(
is_action_add_cart
)
as
add_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
90
)
AND
p_day
<
${
today
}
and
is_action_add_cart
=
1
and
add_cart_device_id
is
not
null
group
by
add_cart_device_id
,
product_brand_id
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
--购买偏好类目--
drop
table
tmp
.
tmp_user_pay_product_category2_id
;
create
table
if
not
exists
tmp
.
tmp_user_pay_product_category2_id
as
select
device_id
,
nvl
(
split
(
category2_ids
,
','
)[
0
],
0
)
pay_category2_id1
,
nvl
(
split
(
category2_ids
,
','
)[
1
],
0
)
pay_category2_id2
,
nvl
(
split
(
category2_ids
,
','
)[
2
],
0
)
pay_category2_id3
,
nvl
(
split
(
category2_ids
,
','
)[
3
],
0
)
pay_category2_id4
,
nvl
(
split
(
category2_ids
,
','
)[
4
],
0
)
pay_category2_id5
,
nvl
(
split
(
category2_ids
,
','
)[
5
],
0
)
pay_category2_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
category2_id
as
string
)))
as
category2_ids
from
(
select
device_id
,
category2_id
,
row_number
()
over
(
partition
by
device_id
order
by
pay_pv
desc
)
rank
from
(
select
pay_device_id
as
device_id
,
product_category_id_2
as
category2_id
,
sum
(
is_pay_success
)
as
pay_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
365
)
AND
p_day
<
${
today
}
and
is_pay_success
=
1
and
pay_device_id
is
not
null
group
by
pay_device_id
,
product_category_id_2
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
--购买偏好品牌--
drop
table
tmp
.
tmp_user_pay_product_brand_id
;
create
table
if
not
exists
tmp
.
tmp_user_pay_product_brand_id
as
select
device_id
,
nvl
(
split
(
product_brand_ids
,
','
)[
0
],
0
)
pay_brand_id1
,
nvl
(
split
(
product_brand_ids
,
','
)[
1
],
0
)
pay_brand_id2
,
nvl
(
split
(
product_brand_ids
,
','
)[
2
],
0
)
pay_brand_id3
,
nvl
(
split
(
product_brand_ids
,
','
)[
3
],
0
)
pay_brand_id4
,
nvl
(
split
(
product_brand_ids
,
','
)[
4
],
0
)
pay_brand_id5
,
nvl
(
split
(
product_brand_ids
,
','
)[
5
],
0
)
pay_brand_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
product_brand_id
as
string
)))
as
product_brand_ids
from
(
select
device_id
,
product_brand_id
,
row_number
()
over
(
partition
by
device_id
order
by
pay_pv
desc
)
rank
from
(
select
pay_device_id
as
device_id
,
product_brand_id
as
product_brand_id
,
sum
(
is_pay_success
)
as
pay_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
365
)
AND
p_day
<
${
today
}
and
is_pay_success
=
1
and
pay_device_id
is
not
null
group
by
pay_device_id
,
product_brand_id
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
insert
overwrite
table
secoo_search
.
user_brand_category2_favorite
partition
(
p_day
=
${
today
}
)
select
C1
.
device_id
as
device_id
,
nvl
(
C1
.
click_category2_id1
,
0
)
as
click_category2_id1
,
nvl
(
C1
.
click_category2_id2
,
0
)
as
click_category2_id2
,
nvl
(
C1
.
click_category2_id3
,
0
)
as
click_category2_id3
,
nvl
(
C1
.
click_category2_id4
,
0
)
as
click_category2_id4
,
nvl
(
C1
.
click_category2_id5
,
0
)
as
click_category2_id5
,
nvl
(
C1
.
click_category2_id6
,
0
)
as
click_category2_id6
,
nvl
(
B1
.
click_brand_id1
,
0
)
as
click_brand_id1
,
nvl
(
B1
.
click_brand_id2
,
0
)
as
click_brand_id2
,
nvl
(
B1
.
click_brand_id3
,
0
)
as
click_brand_id3
,
nvl
(
B1
.
click_brand_id4
,
0
)
as
click_brand_id4
,
nvl
(
B1
.
click_brand_id5
,
0
)
as
click_brand_id5
,
nvl
(
B1
.
click_brand_id6
,
0
)
as
click_brand_id6
,
nvl
(
C2
.
add_category2_id1
,
0
)
as
add_category2_id1
,
nvl
(
C2
.
add_category2_id2
,
0
)
as
add_category2_id2
,
nvl
(
C2
.
add_category2_id3
,
0
)
as
add_category2_id3
,
nvl
(
C2
.
add_category2_id4
,
0
)
as
add_category2_id4
,
nvl
(
C2
.
add_category2_id5
,
0
)
as
add_category2_id5
,
nvl
(
C2
.
add_category2_id6
,
0
)
as
add_category2_id6
,
nvl
(
B2
.
add_brand_id1
,
0
)
as
add_brand_id1
,
nvl
(
B2
.
add_brand_id2
,
0
)
as
add_brand_id2
,
nvl
(
B2
.
add_brand_id3
,
0
)
as
add_brand_id3
,
nvl
(
B2
.
add_brand_id4
,
0
)
as
add_brand_id4
,
nvl
(
B2
.
add_brand_id5
,
0
)
as
add_brand_id5
,
nvl
(
B2
.
add_brand_id6
,
0
)
as
add_brand_id6
,
nvl
(
C3
.
pay_category2_id1
,
0
)
as
pay_category2_id1
,
nvl
(
C3
.
pay_category2_id2
,
0
)
as
pay_category2_id2
,
nvl
(
C3
.
pay_category2_id3
,
0
)
as
pay_category2_id3
,
nvl
(
C3
.
pay_category2_id4
,
0
)
as
pay_category2_id4
,
nvl
(
C3
.
pay_category2_id5
,
0
)
as
pay_category2_id5
,
nvl
(
C3
.
pay_category2_id6
,
0
)
as
pay_category2_id6
,
nvl
(
B3
.
pay_brand_id1
,
0
)
as
pay_brand_id1
,
nvl
(
B3
.
pay_brand_id2
,
0
)
as
pay_brand_id2
,
nvl
(
B3
.
pay_brand_id3
,
0
)
as
pay_brand_id3
,
nvl
(
B3
.
pay_brand_id4
,
0
)
as
pay_brand_id4
,
nvl
(
B3
.
pay_brand_id5
,
0
)
as
pay_brand_id5
,
nvl
(
B3
.
pay_brand_id6
,
0
)
as
pay_brand_id6
from
tmp
.
tmp_user_click_product_category2_id
C1
left
join
tmp
.
tmp_user_click_product_brand_id
B1
on
C1
.
device_id
=
B1
.
device_id
left
join
tmp
.
tmp_user_add_product_category2_id
C2
on
C1
.
device_id
=
C2
.
device_id
left
join
tmp
.
tmp_user_add_product_brand_id
B2
on
C1
.
device_id
=
B2
.
device_id
left
join
tmp
.
tmp_user_pay_product_category2_id
C3
on
C1
.
device_id
=
C3
.
device_id
left
join
tmp
.
tmp_user_pay_product_brand_id
B3
on
C1
.
device_id
=
B3
.
device_id
;
\ No newline at end of file
src/main/scripts/user_feature/user_favor_brand_category_feature.sh
0 → 100644
View file @
2ce837d7
work_dir
=
"/data/zhaoyanchao/java/shell/user_feature/"
delta_day
=
0
today_param
=
$1
today
=
`
date
-d
"
${
today_param
}
-
${
delta_day
}
day"
"+%Y-%m-%d"
`
echo
${
today
}
hive
-f
"
$work_dir
"
create_user_brand_category_favorite_table.sql
hive
--hivevar
today
=
"'
${
today
}
'"
-f
"
$work_dir
"
insert_user_brand_category_favorite_table.sql
src/main/scripts/user_feature/user_feature_compose.sh
View file @
2ce837d7
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment