Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
search-model-data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
search-model-data
Commits
2ce837d7
Commit
2ce837d7
authored
Aug 21, 2021
by
wangyulong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1.商品特征,用户商品交叉特征
parent
6afe09c3
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
627 additions
and
174 deletions
+627
-174
create_cross_feature_user_product.sql
src/main/scripts/cross_feature/user_product/create_cross_feature_user_product.sql
+4
-4
create_cross_feature_user_product_buy.sql
src/main/scripts/cross_feature/user_product/create_cross_feature_user_product_buy.sql
+11
-0
insert_cross_feature_user_product_buy.sql
src/main/scripts/cross_feature/user_product/insert_cross_feature_user_product_buy.sql
+27
-0
user_product_buy_feature.sh
src/main/scripts/cross_feature/user_product/user_product_buy_feature.sh
+10
-0
user_product_feature.sh
src/main/scripts/cross_feature/user_product/user_product_feature.sh
+29
-23
create_product_feature_ext_table.sql
src/main/scripts/product_feature/create_product_feature_ext_table.sql
+1
-1
create_search_product_feature_table.sql
src/main/scripts/product_feature/create_search_product_feature_table.sql
+3
-4
product_feature_compose.sh
src/main/scripts/product_feature/product_feature_compose.sh
+16
-13
create_search_user_feature_table.sql
src/main/scripts/user_feature/create_search_user_feature_table.sql
+89
-51
create_user_brand_category_favorite_table.sql
src/main/scripts/user_feature/create_user_brand_category_favorite_table.sql
+44
-0
insert_user_brand_category_favorite_table.sql
src/main/scripts/user_feature/insert_user_brand_category_favorite_table.sql
+260
-0
user_favor_brand_category_feature.sh
src/main/scripts/user_feature/user_favor_brand_category_feature.sh
+12
-0
user_feature_compose.sh
src/main/scripts/user_feature/user_feature_compose.sh
+121
-78
No files found.
src/main/scripts/cross_feature/user_product/create_cross_feature_user_product.sql
View file @
2ce837d7
...
@@ -9,11 +9,11 @@ create external table if not exists secoo_search.search_data_cross_feature_user_
...
@@ -9,11 +9,11 @@ create external table if not exists secoo_search.search_data_cross_feature_user_
prodanduserpurchpromotion
string
comment
'购买过的商品是促销品 & 商品本身是促销品'
,
prodanduserpurchpromotion
string
comment
'购买过的商品是促销品 & 商品本身是促销品'
,
prodanduserpurchbig100
string
comment
'购买过百大品牌 & 商品是百大品牌'
,
prodanduserpurchbig100
string
comment
'购买过百大品牌 & 商品是百大品牌'
,
prodpriceinuserbrowsing
string
comment
'商品的价格是否在用户浏览的价格段中'
,
prodpriceinuserbrowsing
string
comment
'商品的价格是否在用户浏览的价格段中'
,
prodpriceinuserpurch
string
comment
'商品的价格是否在用户购买的价格段中'
prodpriceinuserpurch
string
comment
'商品的价格是否在用户购买的价格段中'
,
user_product_buy_is_match
string
comment
'用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:1/0'
,
user_product_buy_price_level
string
comment
'用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:price_level/0'
)
comment
'用户商品交叉特征'
)
comment
'用户商品交叉特征'
partitioned
by
(
partitioned
by
(
p_day
date
comment
'分区日期'
)
p_day
date
comment
'分区日期'
)
row
format
delimited
fields
terminated
by
'
\t
'
row
format
delimited
fields
terminated
by
'
\t
'
stored
as
textfile
;
stored
as
textfile
;
src/main/scripts/cross_feature/user_product/create_cross_feature_user_product_buy.sql
0 → 100644
View file @
2ce837d7
create
external
table
if
not
exists
secoo_search
.
search_data_cross_feature_user_product_buy
(
device_id
string
comment
'设备id'
,
product_id
string
comment
'商品id'
,
category2_id
string
comment
'二级类目id'
,
cate_2_price_level
string
comment
'商品二级类目价格段1-10'
,
user_product_buy_is_match
string
comment
'用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:1/0'
,
user_product_buy_price_level
string
comment
'用户购买商品二级类目价格等级是否匹配商品二级类目价格等级:price_level/0'
)
comment
'用户商品购买交叉特征'
partitioned
by
(
p_day
date
comment
'分区日期'
);
\ No newline at end of file
src/main/scripts/cross_feature/user_product/insert_cross_feature_user_product_buy.sql
0 → 100644
View file @
2ce837d7
insert
overwrite
table
secoo_search
.
search_data_cross_feature_user_product_buy
partition
(
p_day
=
${
yesterday
}
)
select
UPPER
(
T1
.
uuid
)
as
device_id
,
T1
.
product_id
as
product_id
,
T1
.
category_id_2
as
category2_id
,
T1
.
buy_sku_cat2_price_level
as
cate_2_price_level
,
case
when
T1
.
buy_sku_cat2_price_level
=
T2
.
cate_2_price_level
then
1
else
0
end
as
user_product_buy_is_match
,
case
when
T1
.
buy_sku_cat2_price_level
=
T2
.
cate_2_price_level
then
T1
.
buy_sku_cat2_price_level
else
0
end
as
user_product_buy_price_level
from
secoo_app
.
app_search_uuid_sku_price_level
T1
left
join
(
select
W
.
product_id
as
product_id
,
split
(
W
.
category_org_code
,
'_'
)[
2
]
as
category_id_2
,
P
.
cate_2_price_level
as
cate_2_price_level
from
secoo_fact
.
fact_search_product_wide_p_day
W
join
secoo_fact
.
fact_search_product_55_30d_click_product_p_day
P
on
W
.
main_id
=
P
.
product_main_id
and
P
.
p_day
=
${
yesterday
}
where
W
.
p_day
=
${
yesterday
}
)
T2
on
T1
.
product_id
=
T2
.
product_id
where
T1
.
p_day
=
${
yesterday
}
src/main/scripts/cross_feature/user_product/user_product_buy_feature.sh
0 → 100644
View file @
2ce837d7
work_dir
=
"/data/zhaoyanchao/java/shell/cross_feature/user_product/"
# 用户商品交叉特征-购买
delta_day
=
1
today_param
=
$1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
hive
-f
"
$work_dir
"
create_cross_feature_user_product_buy.sql
hive
--hivevar
yesterday
=
"'
${
yesterday
}
'"
-f
"
$work_dir
"
insert_cross_feature_user_product_buy.sql
src/main/scripts/cross_feature/user_product/user_product_feature.sh
View file @
2ce837d7
...
@@ -2,11 +2,11 @@
...
@@ -2,11 +2,11 @@
work_dir
=
"/data/zhaoyanchao/java/shell/cross_feature/user_product/"
work_dir
=
"/data/zhaoyanchao/java/shell/cross_feature/user_product/"
# 检查推荐最新表名
# 检查推荐最新表名
delta_day
=
0
delta_day
=
1
today_param
=
$1
today_param
=
$1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
date_str
=
`
echo
${
yesterday
}
|sed
's/\-/_/g'
`
date_str
=
`
echo
${
today_param
}
|sed
's/\-/_/g'
`
table_name
=
"secoo_rcmd_features.userProductCrossFeatures_"
${
date_str
}
table_name
=
"secoo_rcmd_features.userProductCrossFeatures_"
${
date_str
}
echo
${
table_name
}
echo
${
table_name
}
...
@@ -16,9 +16,9 @@ while [[ ${result} -ne 0 ]];
...
@@ -16,9 +16,9 @@ while [[ ${result} -ne 0 ]];
do
do
delta_day
=
$((
$delta_day
+
1
))
delta_day
=
$((
$delta_day
+
1
))
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
today_param_1
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
date_str
=
`
echo
${
yesterday
}
|sed
's/\-/_/g'
`
date_str
=
`
echo
${
today_param_1
}
|sed
's/\-/_/g'
`
table_name
=
"secoo_rcmd_features.userProductCrossFeatures_"
${
date_str
}
table_name
=
"secoo_rcmd_features.userProductCrossFeatures_"
${
date_str
}
hive
-e
"desc
$table_name
"
hive
-e
"desc
$table_name
"
...
@@ -30,19 +30,24 @@ done
...
@@ -30,19 +30,24 @@ done
# 如果无表,建表
# 如果无表,建表
hive
-f
"
$work_dir
"
create_cross_feature_user_product.sql
hive
-f
"
$work_dir
"
create_cross_feature_user_product.sql
hive
-e
"insert overwrite table secoo_search.search_data_cross_feature_user_product partition(p_day=date'
$
today_param
')
hive
-e
"insert overwrite table secoo_search.search_data_cross_feature_user_product partition(p_day=date'
$
{
today_param
}
')
select
select
device_id,
T1.device_id,
product_id,
T1.product_id,
prodanduserpurchspecial,
T1.prodanduserpurchspecial,
prodinusercat1,
T1.prodinusercat1,
prodinusercat2,
T1.prodinusercat2,
prodanduserpurchnew,
T1.prodanduserpurchnew,
prodanduserpurchpromotion,
T1.prodanduserpurchpromotion,
prodanduserpurchbig100,
T1.prodanduserpurchbig100,
prodpriceinuserbrowsing,
T1.prodpriceinuserbrowsing,
prodpriceinuserpurch
T1.prodpriceinuserpurch,
from
$table_name
;"
T2.user_product_buy_is_match,
T2.user_product_buy_price_level
from
${
table_name
}
T1
left join secoo_search.search_data_cross_feature_user_product_buy T2 on T1.device_id = T2.device_id and T1.product_id = T2.product_id
where T2.p_day = '
${
yesterday
}
';"
hive
-e
"SELECT
hive
-e
"SELECT
sum(prodanduserpurchspecial),
sum(prodanduserpurchspecial),
sum(prodinusercat1),
sum(prodinusercat1),
...
@@ -51,9 +56,10 @@ hive -e "SELECT
...
@@ -51,9 +56,10 @@ hive -e "SELECT
sum(prodanduserpurchpromotion),
sum(prodanduserpurchpromotion),
sum(prodanduserpurchbig100),
sum(prodanduserpurchbig100),
sum(prodpriceinuserbrowsing),
sum(prodpriceinuserbrowsing),
sum(prodpriceinuserpurch)
sum(prodpriceinuserpurch),
FROM secoo_search.search_data_cross_feature_user_product
sum(user_product_buy_is_match)
WHERE p_day = '
$today_param
'"
FROM secoo_search.search_data_cross_feature_user_product WHERE p_day = '
${
today_param
}
'"
hive
-e
"SELECT
hive
-e
"SELECT
assert_true(sum(prodanduserpurchspecial) > 0),
assert_true(sum(prodanduserpurchspecial) > 0),
assert_true(sum(prodinusercat1) > 0),
assert_true(sum(prodinusercat1) > 0),
...
@@ -62,6 +68,6 @@ hive -e "SELECT
...
@@ -62,6 +68,6 @@ hive -e "SELECT
assert_true(sum(prodanduserpurchpromotion) > 0),
assert_true(sum(prodanduserpurchpromotion) > 0),
assert_true(sum(prodanduserpurchbig100) > 0),
assert_true(sum(prodanduserpurchbig100) > 0),
assert_true(sum(prodpriceinuserbrowsing) > 0),
assert_true(sum(prodpriceinuserbrowsing) > 0),
assert_true(sum(prodpriceinuserpurch) > 0)
assert_true(sum(prodpriceinuserpurch) > 0),
FROM secoo_search.search_data_cross_feature_user_product
assert_true(sum(user_product_buy_is_match) > 0)
WHERE p_day = '
$today_param
'"
FROM secoo_search.search_data_cross_feature_user_product WHERE p_day = '
${
today_param
}
'"
\ No newline at end of file
\ No newline at end of file
src/main/scripts/product_feature/create_product_feature_ext_table.sql
View file @
2ce837d7
-- 商品特征扩展表
-- 商品特征扩展表
create
external
table
if
not
exists
secoo_search
.
search_data_product_feature_ext
create
external
table
if
not
exists
secoo_search
.
search_data_product_feature_ext
(
(
`product_id`
bigint
COMMENT
'商品ID'
,
`product_id`
bigint
comment
'商品ID'
,
`area_type`
tinyint
comment
'货源地,(0大陆 1香港 2美国 3日本 4意大利)'
,
`area_type`
tinyint
comment
'货源地,(0大陆 1香港 2美国 3日本 4意大利)'
,
`sale_qty_180`
bigint
comment
'前180天至今销售数量'
,
`sale_qty_180`
bigint
comment
'前180天至今销售数量'
,
`sale_qty_90`
bigint
comment
'前90天至今销售数量'
,
`sale_qty_90`
bigint
comment
'前90天至今销售数量'
,
...
...
src/main/scripts/product_feature/create_search_product_feature_table.sql
View file @
2ce837d7
...
@@ -51,9 +51,8 @@ create external table if not exists secoo_search.search_data_product_feature
...
@@ -51,9 +51,8 @@ create external table if not exists secoo_search.search_data_product_feature
sale_qty_180
bigint
comment
'前180天至今销售数量'
,
sale_qty_180
bigint
comment
'前180天至今销售数量'
,
sale_qty_90
bigint
comment
'前90天至今销售数量'
,
sale_qty_90
bigint
comment
'前90天至今销售数量'
,
sale_qty_30
bigint
comment
'前30天至今销售数量'
,
sale_qty_30
bigint
comment
'前30天至今销售数量'
,
sale_qty_15
bigint
comment
'前15天至今销售数量'
sale_qty_15
bigint
comment
'前15天至今销售数量'
,
product_cate2_price_level
tinyint
comment
'商品二级类目价格段1-10'
)
comment
'商品特征'
)
comment
'商品特征'
partitioned
by
(
partitioned
by
(
p_day
date
comment
'分区日期'
)
p_day
date
comment
'分区日期'
)
stored
as
parquet
;
stored
as
parquet
;
src/main/scripts/product_feature/product_feature_compose.sh
View file @
2ce837d7
...
@@ -2,10 +2,10 @@
...
@@ -2,10 +2,10 @@
# 检查推荐最新表名
# 检查推荐最新表名
work_dir
=
"/data/zhaoyanchao/java/shell/product_feature/"
work_dir
=
"/data/zhaoyanchao/java/shell/product_feature/"
delta_day
=
0
delta_day
=
1
today_param
=
$1
today_param
=
$1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
date_str
=
`
echo
${
yesterday
}
|sed
's/\-/_/g'
`
date_str
=
`
echo
${
today_param
}
|sed
's/\-/_/g'
`
table_name
=
"secoo_rcmd_features.productFeatures_"
${
date_str
}
table_name
=
"secoo_rcmd_features.productFeatures_"
${
date_str
}
echo
${
table_name
}
echo
${
table_name
}
...
@@ -14,8 +14,8 @@ result=$?
...
@@ -14,8 +14,8 @@ result=$?
while
[[
${
result
}
-ne
0
]]
;
while
[[
${
result
}
-ne
0
]]
;
do
do
delta_day
=
$((
$delta_day
+
1
))
delta_day
=
$((
$delta_day
+
1
))
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
param_delta_1
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
date_str
=
`
echo
${
yesterday
}
|sed
's/\-/_/g'
`
date_str
=
`
echo
${
param_delta_1
}
|sed
's/\-/_/g'
`
table_name
=
"secoo_rcmd_features.productFeatures_"
${
date_str
}
table_name
=
"secoo_rcmd_features.productFeatures_"
${
date_str
}
hive
-e
"desc
$table_name
"
hive
-e
"desc
$table_name
"
result
=
$?
result
=
$?
...
@@ -24,11 +24,11 @@ done
...
@@ -24,11 +24,11 @@ done
# 如果无表,建表
# 如果无表,建表
hive
-f
$"work_dir
"
create_search_product_feature_table.sql
hive
-f
"
${
work_dir
}
"
create_search_product_feature_table.sql
hive
-e
"insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'
$today_param
')
hive
-e
"insert overwrite table secoo_search.search_data_product_feature partition(p_day=date'
$today_param
')
select
select
R.product_id,
R.product_id
as product_id
,
R.isbrand100 as isBrand100,
R.isbrand100 as isBrand100,
R.productpopularity as productPopularity,
R.productpopularity as productPopularity,
R.ismemberproduct as isMemberProduct ,
R.ismemberproduct as isMemberProduct ,
...
@@ -80,14 +80,17 @@ select
...
@@ -80,14 +80,17 @@ select
R.7daysaddcart as 7DaysAddCart,
R.7daysaddcart as 7DaysAddCart,
R.30daysaddcart as 30DaysAddCart,
R.30daysaddcart as 30DaysAddCart,
W.area_type,
W.area_type as area_type,
W.sale_qty_180,
W.sale_qty_180 as sale_qty_180,
W.sale_qty_90,
W.sale_qty_90 as sale_qty_90,
W.sale_qty_30,
W.sale_qty_30 as sale_qty_30,
W.sale_qty_15
W.sale_qty_15 as sale_qty_15,
P.cate_2_price_level as product_cate2_price_level
from
$table_name
R
from
$table_name
R
left join secoo_fact.fact_search_product_wide_p_day W
left join secoo_fact.fact_search_product_wide_p_day W on R.product_id = cast(W.product_id AS string) and W.p_day = '
${
yesterday
}
'
on R.product_id = cast(W.product_id AS string) and W.p_day = '
$yesterday
';"
left join secoo_fact.fact_search_product_55_30d_click_product_p_day P on W.main_id = P.product_main_id and P.p_day = '
${
yesterday
}
'
;"
...
...
src/main/scripts/user_feature/create_search_user_feature_table.sql
View file @
2ce837d7
...
@@ -6,78 +6,116 @@ device_id string comment '设备id',
...
@@ -6,78 +6,116 @@ device_id string comment '设备id',
addCartIn7Days
string
comment
'7天加购量'
,
addCartIn7Days
string
comment
'7天加购量'
,
1
YearCartProductCount
string
comment
'1年加购量'
,
1
YearCartProductCount
string
comment
'1年加购量'
,
1
YearOrderProductManCount
string
comment
'1年男性订单'
,
1
YearOrderProductManCount
string
comment
'1年男性订单'
,
1
YearOrderProductManPriceAmt
string
comment
''
,
1
YearOrderProductManPriceAmt
string
comment
'
最近一年购买男装总金额
'
,
30
DaysChannelPvJrzk
string
comment
'今日折扣'
,
30
DaysChannelPvJrzk
string
comment
'
最近30天浏览频道次数_
今日折扣'
,
1
YearOrderTicketCategoryCount
string
comment
''
,
1
YearOrderTicketCategoryCount
string
comment
'
最近一年使用优惠卷品类数
'
,
30
DaysDetailPv
string
comment
''
,
30
DaysDetailPv
string
comment
'
最近30天商品详情页浏览次数
'
,
userInterestCategory1_0
string
comment
'用户第一个一级品类偏好'
,
userInterestCategory1_0
string
comment
'用户第一个一级品类偏好'
,
userInterestCategory2_0
string
comment
'用户第一个二级品类偏好'
,
userInterestCategory2_0
string
comment
'用户第一个二级品类偏好'
,
userInterestCategory1_1
string
comment
'用户第二个一级品类偏好'
,
userInterestCategory1_1
string
comment
'用户第二个一级品类偏好'
,
userInterestCategory2_1
string
comment
'用户第二个二级品类偏好'
,
userInterestCategory2_1
string
comment
'用户第二个二级品类偏好'
,
userInterestCategory3_0
string
comment
'用户第一个三级品类偏好'
,
userInterestCategory3_0
string
comment
'用户第一个三级品类偏好'
,
30
DaysSearchCategoryCount
string
comment
''
,
30
DaysSearchCategoryCount
string
comment
'
最近30天搜索品类数
'
,
1
YearOrderPayPriceAmt
string
comment
''
,
1
YearOrderPayPriceAmt
string
comment
'
最近一年付费总金额
'
,
userInterestCategory1_2
string
comment
'用户第三个一级品类偏好'
,
userInterestCategory1_2
string
comment
'用户第三个一级品类偏好'
,
30
DaysSearchBrandCount
string
comment
''
,
30
DaysSearchBrandCount
string
comment
'
最近30天搜索品牌数
'
,
userInterestCategory1_3
string
comment
'用户第四个一级品类偏好'
,
userInterestCategory1_3
string
comment
'用户第四个一级品类偏好'
,
userInterestCategory2_2
string
comment
'用户第三个二级品类偏好'
,
userInterestCategory2_2
string
comment
'用户第三个二级品类偏好'
,
userInterestCategory3_1
string
comment
'用户第二个三级品类偏好'
,
userInterestCategory3_1
string
comment
'用户第二个三级品类偏好'
,
1
YearCartCategoryCount
string
comment
''
,
1
YearCartCategoryCount
string
comment
'
当前购物车品类数(最近一年)
'
,
userInterestCategory2_3
string
comment
'用户第四个二级品类偏好'
,
userInterestCategory2_3
string
comment
'用户第四个二级品类偏好'
,
userInterestCategory1_4
string
comment
'用户第五个一级品类偏好'
,
userInterestCategory1_4
string
comment
'用户第五个一级品类偏好'
,
userInterestCategory3_2
string
comment
'用户第二个三级品类偏好'
,
userInterestCategory3_2
string
comment
'用户第二个三级品类偏好'
,
userInterestCategory1_5
string
comment
'用户第六个一级品类偏好'
,
userInterestCategory1_5
string
comment
'用户第六个一级品类偏好'
,
userInterestCategory3_3
string
comment
'用户第四个三级品类偏好'
,
userInterestCategory3_3
string
comment
'用户第四个三级品类偏好'
,
1
YearOrderProductOtherCount
string
comment
''
,
1
YearOrderProductOtherCount
string
comment
'
最近一年购买其他次数
'
,
30
DaysDetailPva
string
comment
''
,
30
DaysDetailPva
string
comment
'
最近30天商品详情页浏览次数_A级
'
,
userInterestCategory2_4
string
comment
'用户第五个二级品类偏好'
,
userInterestCategory2_4
string
comment
'用户第五个二级品类偏好'
,
1
YearOrderUsePointCount
string
comment
''
,
1
YearOrderUsePointCount
string
comment
'
最近一年使用总积分
'
,
30
DaysChannelPvPaiHangBang
string
comment
''
,
30
DaysChannelPvPaiHangBang
string
comment
'
最近30天浏览频道次数_排行榜
'
,
30
daysChannelPvXinPinBang
string
comment
''
,
30
daysChannelPvXinPinBang
string
comment
'
最近30天浏览频道次数_新品榜
'
,
1
YearAddFavCategoryCount
string
comment
''
,
1
YearAddFavCategoryCount
string
comment
'
当前收藏品类数(最近一年)
'
,
userInterestCategory3_4
string
comment
'用户第五个三级品类偏好'
,
userInterestCategory3_4
string
comment
'用户第五个三级品类偏好'
,
userInterestCategory2_5
string
comment
'用户第六个二级品类偏好'
,
userInterestCategory2_5
string
comment
'用户第六个二级品类偏好'
,
activeInWeekends
string
comment
''
,
activeInWeekends
string
comment
'
节假日是否活跃
'
,
1
YearOrderCount
string
comment
''
,
1
YearOrderCount
string
comment
'
最近一年订单总次数
'
,
30
DaysDetailPvb
string
comment
''
,
30
DaysDetailPvb
string
comment
'
最近30天商品详情页浏览次数_B级
'
,
1
YearOrderPayProductCount
string
comment
''
,
1
YearOrderPayProductCount
string
comment
'
最近一年付费商品数
'
,
1
YearOrderUsePointPriceAmt
string
comment
''
,
1
YearOrderUsePointPriceAmt
string
comment
'
最近一年抵扣积分金额
'
,
1
YearAddFavProductCount
string
comment
''
,
1
YearAddFavProductCount
string
comment
'
当前收藏商品数(最近一年)
'
,
30
DaysDetailPvab
string
comment
''
,
30
DaysDetailPvab
string
comment
'
最近30天商品详情页浏览次数_AB级
'
,
userInterestCategory3_5
string
comment
'用户第六个三级品类偏好'
,
userInterestCategory3_5
string
comment
'用户第六个三级品类偏好'
,
deviceType
string
comment
''
,
deviceType
string
comment
'
设备类型(IOS/Android)
'
,
activeIn7Days
string
comment
''
,
activeIn7Days
string
comment
'
最近7天是否活跃
'
,
gender
string
comment
''
,
gender
string
comment
'
用户性别
'
,
purchasedBrand100
string
comment
''
,
purchasedBrand100
string
comment
'
是否购买过百大品牌
'
,
30
DaysChannelPvRenQiBang
string
comment
''
,
30
DaysChannelPvRenQiBang
string
comment
'
最近30天浏览频道次数_人气榜
'
,
1
YearCartPriceAmt
string
comment
''
,
1
YearCartPriceAmt
string
comment
'
当前购物车总金额(最近一年)
'
,
regularCustomer
string
comment
''
,
regularCustomer
string
comment
'
是否常客
'
,
1
YearOrderPayBrandCount
string
comment
''
,
1
YearOrderPayBrandCount
string
comment
'
最近一年付费品牌数
'
,
1
YearOrderTicketBrandCount
string
comment
''
,
1
YearOrderTicketBrandCount
string
comment
'
最近一年使用优惠卷品牌数
'
,
1
YearOrderProductOtherPriceAmt
string
comment
''
,
1
YearOrderProductOtherPriceAmt
string
comment
'
最近一年购买其他总金额
'
,
1
YearOrderProductCountAvg
string
comment
''
,
1
YearOrderProductCountAvg
string
comment
'
最近一年订单包含平均商品数
'
,
favoriteIn7Days
string
comment
''
,
favoriteIn7Days
string
comment
'
七天收藏商品
'
,
30
DaysDetailPvn
string
comment
''
,
30
DaysDetailPvn
string
comment
'
最近30天商品详情页浏览次数_N级
'
,
1
YearAddFavPriceAmt
string
comment
''
,
1
YearAddFavPriceAmt
string
comment
'
当前收藏总金额(最近一年)
'
,
1
YearOrderProductWomanPriceAmt
string
comment
''
,
1
YearOrderProductWomanPriceAmt
string
comment
'
最近一年购买女装总金额
'
,
1
YearOrderProductWomanCount
string
comment
''
,
1
YearOrderProductWomanCount
string
comment
'
最近一年购买女装次数
'
,
30
DaysChannelPvXscj
string
comment
''
,
30
DaysChannelPvXscj
string
comment
'
最近30天浏览频道次数_限时抽奖
'
,
1
YearOrderTicketPriceAmt
string
comment
''
,
1
YearOrderTicketPriceAmt
string
comment
'
最近一年使用优惠卷总金额
'
,
userInterestBrand_0
string
comment
'用户第一个品牌偏好'
,
userInterestBrand_0
string
comment
'用户第一个品牌偏好'
,
1
YearAddFavBrandCount
string
comment
''
,
1
YearAddFavBrandCount
string
comment
'
当前收藏品牌数(最近一年)
'
,
userInterestBrand_1
string
comment
'用户第二个品牌偏好'
,
userInterestBrand_1
string
comment
'用户第二个品牌偏好'
,
userInterestBrand_2
string
comment
'用户第三个品牌偏好'
,
userInterestBrand_2
string
comment
'用户第三个品牌偏好'
,
1
YearOrderPayCount
string
comment
''
,
1
YearOrderPayCount
string
comment
'
最近一年付费订单数
'
,
30
DaysChannelPvAoLai
string
comment
''
,
30
DaysChannelPvAoLai
string
comment
'
最近30天浏览频道次数_奥莱特卖
'
,
userInterestBrand_3
string
comment
'用户第四个品牌偏好'
,
userInterestBrand_3
string
comment
'用户第四个品牌偏好'
,
30
DaysDetailPvs
string
comment
''
,
30
DaysDetailPvs
string
comment
'最近30天商品详情页浏览次数_S级'
,
purchasedItems
string
comment
''
,
purchasedItems
string
comment
'是否购过商品'
,
30
DaysSearchCount
string
comment
''
,
30
DaysSearchCount
string
comment
'最近30天搜索次数'
,
1
YearCartBrandCount
string
comment
''
,
1
YearCartBrandCount
string
comment
'当前购物车品牌数(最近一年)'
,
1
YearOrderTicketCount
string
comment
''
,
1
YearOrderTicketCount
string
comment
'最近一年使用优惠卷订单数'
,
1
YearOrderPayCategoryCount
string
comment
''
1
YearOrderPayCategoryCount
string
comment
'最近一年付费品类数'
,
click_category2_id1
string
comment
'用户点击偏好二级类目1'
,
click_category2_id2
string
comment
'用户点击偏好二级类目2'
,
click_category2_id3
string
comment
'用户点击偏好二级类目3'
,
click_category2_id4
string
comment
'用户点击偏好二级类目4'
,
click_category2_id5
string
comment
'用户点击偏好二级类目5'
,
click_category2_id6
string
comment
'用户点击偏好二级类目6'
,
click_brand_id1
string
comment
'用户点击偏好品牌1'
,
click_brand_id2
string
comment
'用户点击偏好品牌2'
,
click_brand_id3
string
comment
'用户点击偏好品牌3'
,
click_brand_id4
string
comment
'用户点击偏好品牌4'
,
click_brand_id5
string
comment
'用户点击偏好品牌5'
,
click_brand_id6
string
comment
'用户点击偏好品牌6'
,
add_category2_id1
string
comment
'用户加购偏好二级类目1'
,
add_category2_id2
string
comment
'用户加购偏好二级类目2'
,
add_category2_id3
string
comment
'用户加购偏好二级类目3'
,
add_category2_id4
string
comment
'用户加购偏好二级类目4'
,
add_category2_id5
string
comment
'用户加购偏好二级类目5'
,
add_category2_id6
string
comment
'用户加购偏好二级类目6'
,
add_brand_id1
string
comment
'用户加购偏好品牌1'
,
add_brand_id2
string
comment
'用户加购偏好品牌2'
,
add_brand_id3
string
comment
'用户加购偏好品牌3'
,
add_brand_id4
string
comment
'用户加购偏好品牌4'
,
add_brand_id5
string
comment
'用户加购偏好品牌5'
,
add_brand_id6
string
comment
'用户加购偏好品牌6'
,
pay_category2_id1
string
comment
'用户购买偏好二级类目1'
,
pay_category2_id2
string
comment
'用户购买偏好二级类目2'
,
pay_category2_id3
string
comment
'用户购买偏好二级类目3'
,
pay_category2_id4
string
comment
'用户购买偏好二级类目4'
,
pay_category2_id5
string
comment
'用户购买偏好二级类目5'
,
pay_category2_id6
string
comment
'用户购买偏好二级类目6'
,
pay_brand_id1
string
comment
'用户购买偏好品牌1'
,
pay_brand_id2
string
comment
'用户购买偏好品牌2'
,
pay_brand_id3
string
comment
'用户购买偏好品牌3'
,
pay_brand_id4
string
comment
'用户购买偏好品牌4'
,
pay_brand_id5
string
comment
'用户购买偏好品牌5'
,
pay_brand_id6
string
comment
'用户购买偏好品牌6'
,
grow_level
string
comment
'用户成长等级'
)
comment
'用户特征'
)
comment
'用户特征'
partitioned
by
(
partitioned
by
(
p_day
date
comment
'分区日期'
)
p_day
date
comment
'分区日期'
)
stored
as
parquet
;
stored
as
parquet
;
src/main/scripts/user_feature/create_user_brand_category_favorite_table.sql
0 → 100644
View file @
2ce837d7
-- 商品特征扩展表
create
external
table
if
not
exists
secoo_search
.
user_brand_category2_favorite
(
device_id
string
comment
'用户设备id'
,
click_category2_id1
string
comment
'用户点击偏好二级类目1'
,
click_category2_id2
string
comment
'用户点击偏好二级类目2'
,
click_category2_id3
string
comment
'用户点击偏好二级类目3'
,
click_category2_id4
string
comment
'用户点击偏好二级类目4'
,
click_category2_id5
string
comment
'用户点击偏好二级类目5'
,
click_category2_id6
string
comment
'用户点击偏好二级类目6'
,
click_brand_id1
string
comment
'用户点击偏好品牌1'
,
click_brand_id2
string
comment
'用户点击偏好品牌2'
,
click_brand_id3
string
comment
'用户点击偏好品牌3'
,
click_brand_id4
string
comment
'用户点击偏好品牌4'
,
click_brand_id5
string
comment
'用户点击偏好品牌5'
,
click_brand_id6
string
comment
'用户点击偏好品牌6'
,
add_category2_id1
string
comment
'用户加购偏好二级类目1'
,
add_category2_id2
string
comment
'用户加购偏好二级类目2'
,
add_category2_id3
string
comment
'用户加购偏好二级类目3'
,
add_category2_id4
string
comment
'用户加购偏好二级类目4'
,
add_category2_id5
string
comment
'用户加购偏好二级类目5'
,
add_category2_id6
string
comment
'用户加购偏好二级类目6'
,
add_brand_id1
string
comment
'用户加购偏好品牌1'
,
add_brand_id2
string
comment
'用户加购偏好品牌2'
,
add_brand_id3
string
comment
'用户加购偏好品牌3'
,
add_brand_id4
string
comment
'用户加购偏好品牌4'
,
add_brand_id5
string
comment
'用户加购偏好品牌5'
,
add_brand_id6
string
comment
'用户加购偏好品牌6'
,
pay_category2_id1
string
comment
'用户购买偏好二级类目1'
,
pay_category2_id2
string
comment
'用户购买偏好二级类目2'
,
pay_category2_id3
string
comment
'用户购买偏好二级类目3'
,
pay_category2_id4
string
comment
'用户购买偏好二级类目4'
,
pay_category2_id5
string
comment
'用户购买偏好二级类目5'
,
pay_category2_id6
string
comment
'用户购买偏好二级类目6'
,
pay_brand_id1
string
comment
'用户购买偏好品牌1'
,
pay_brand_id2
string
comment
'用户购买偏好品牌2'
,
pay_brand_id3
string
comment
'用户购买偏好品牌3'
,
pay_brand_id4
string
comment
'用户购买偏好品牌4'
,
pay_brand_id5
string
comment
'用户购买偏好品牌5'
,
pay_brand_id6
string
comment
'用户购买偏好品牌6'
)
comment
'搜索用户偏好品牌品类特征表'
partitioned
by
(
p_day
date
comment
'分区日期'
);
src/main/scripts/user_feature/insert_user_brand_category_favorite_table.sql
0 → 100644
View file @
2ce837d7
--点击偏好二级类目--
drop
table
tmp
.
tmp_user_click_product_category2_id
;
create
table
if
not
exists
tmp
.
tmp_user_click_product_category2_id
as
select
device_id
,
nvl
(
split
(
category2_ids
,
','
)[
0
],
0
)
click_category2_id1
,
nvl
(
split
(
category2_ids
,
','
)[
1
],
0
)
click_category2_id2
,
nvl
(
split
(
category2_ids
,
','
)[
2
],
0
)
click_category2_id3
,
nvl
(
split
(
category2_ids
,
','
)[
3
],
0
)
click_category2_id4
,
nvl
(
split
(
category2_ids
,
','
)[
4
],
0
)
click_category2_id5
,
nvl
(
split
(
category2_ids
,
','
)[
5
],
0
)
click_category2_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
category2_id
as
string
)))
as
category2_ids
from
(
select
device_id
,
category2_id
,
row_number
()
over
(
partition
by
device_id
order
by
click_pv
desc
)
rank
from
(
select
click_device_id
as
device_id
,
product_category_id_2
as
category2_id
,
sum
(
is_click
)
as
click_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
30
)
AND
p_day
<
${
today
}
and
is_click
=
1
and
click_device_id
is
not
null
group
by
click_device_id
,
product_category_id_2
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
--点击偏好品牌--
drop
table
tmp
.
tmp_user_click_product_brand_id
;
create
table
if
not
exists
tmp
.
tmp_user_click_product_brand_id
as
select
device_id
,
nvl
(
split
(
product_brand_ids
,
','
)[
0
],
0
)
click_brand_id1
,
nvl
(
split
(
product_brand_ids
,
','
)[
1
],
0
)
click_brand_id2
,
nvl
(
split
(
product_brand_ids
,
','
)[
2
],
0
)
click_brand_id3
,
nvl
(
split
(
product_brand_ids
,
','
)[
3
],
0
)
click_brand_id4
,
nvl
(
split
(
product_brand_ids
,
','
)[
4
],
0
)
click_brand_id5
,
nvl
(
split
(
product_brand_ids
,
','
)[
5
],
0
)
click_brand_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
product_brand_id
as
string
)))
as
product_brand_ids
from
(
select
device_id
,
product_brand_id
,
row_number
()
over
(
partition
by
device_id
order
by
click_pv
desc
)
rank
from
(
select
click_device_id
as
device_id
,
product_brand_id
as
product_brand_id
,
sum
(
is_click
)
as
click_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
30
)
AND
p_day
<
${
today
}
and
is_click
=
1
and
click_device_id
is
not
null
group
by
click_device_id
,
product_brand_id
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
--加购偏好类目--
drop
table
tmp
.
tmp_user_add_product_category2_id
;
create
table
if
not
exists
tmp
.
tmp_user_add_product_category2_id
as
select
device_id
,
nvl
(
split
(
category2_ids
,
','
)[
0
],
0
)
add_category2_id1
,
nvl
(
split
(
category2_ids
,
','
)[
1
],
0
)
add_category2_id2
,
nvl
(
split
(
category2_ids
,
','
)[
2
],
0
)
add_category2_id3
,
nvl
(
split
(
category2_ids
,
','
)[
3
],
0
)
add_category2_id4
,
nvl
(
split
(
category2_ids
,
','
)[
4
],
0
)
add_category2_id5
,
nvl
(
split
(
category2_ids
,
','
)[
5
],
0
)
add_category2_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
category2_id
as
string
)))
as
category2_ids
from
(
select
device_id
,
category2_id
,
row_number
()
over
(
partition
by
device_id
order
by
add_pv
desc
)
rank
from
(
select
add_cart_device_id
as
device_id
,
product_category_id_2
as
category2_id
,
sum
(
is_action_add_cart
)
as
add_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
90
)
AND
p_day
<
${
today
}
and
is_action_add_cart
=
1
and
add_cart_device_id
is
not
null
group
by
add_cart_device_id
,
product_category_id_2
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
--加购偏好品牌--
drop
table
tmp
.
tmp_user_add_product_brand_id
;
create
table
if
not
exists
tmp
.
tmp_user_add_product_brand_id
as
select
device_id
,
nvl
(
split
(
product_brand_ids
,
','
)[
0
],
0
)
add_brand_id1
,
nvl
(
split
(
product_brand_ids
,
','
)[
1
],
0
)
add_brand_id2
,
nvl
(
split
(
product_brand_ids
,
','
)[
2
],
0
)
add_brand_id3
,
nvl
(
split
(
product_brand_ids
,
','
)[
3
],
0
)
add_brand_id4
,
nvl
(
split
(
product_brand_ids
,
','
)[
4
],
0
)
add_brand_id5
,
nvl
(
split
(
product_brand_ids
,
','
)[
5
],
0
)
add_brand_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
product_brand_id
as
string
)))
as
product_brand_ids
from
(
select
device_id
,
product_brand_id
,
row_number
()
over
(
partition
by
device_id
order
by
add_pv
desc
)
rank
from
(
select
add_cart_device_id
as
device_id
,
product_brand_id
as
product_brand_id
,
sum
(
is_action_add_cart
)
as
add_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
90
)
AND
p_day
<
${
today
}
and
is_action_add_cart
=
1
and
add_cart_device_id
is
not
null
group
by
add_cart_device_id
,
product_brand_id
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
--购买偏好类目--
drop
table
tmp
.
tmp_user_pay_product_category2_id
;
create
table
if
not
exists
tmp
.
tmp_user_pay_product_category2_id
as
select
device_id
,
nvl
(
split
(
category2_ids
,
','
)[
0
],
0
)
pay_category2_id1
,
nvl
(
split
(
category2_ids
,
','
)[
1
],
0
)
pay_category2_id2
,
nvl
(
split
(
category2_ids
,
','
)[
2
],
0
)
pay_category2_id3
,
nvl
(
split
(
category2_ids
,
','
)[
3
],
0
)
pay_category2_id4
,
nvl
(
split
(
category2_ids
,
','
)[
4
],
0
)
pay_category2_id5
,
nvl
(
split
(
category2_ids
,
','
)[
5
],
0
)
pay_category2_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
category2_id
as
string
)))
as
category2_ids
from
(
select
device_id
,
category2_id
,
row_number
()
over
(
partition
by
device_id
order
by
pay_pv
desc
)
rank
from
(
select
pay_device_id
as
device_id
,
product_category_id_2
as
category2_id
,
sum
(
is_pay_success
)
as
pay_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
365
)
AND
p_day
<
${
today
}
and
is_pay_success
=
1
and
pay_device_id
is
not
null
group
by
pay_device_id
,
product_category_id_2
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
--购买偏好品牌--
drop
table
tmp
.
tmp_user_pay_product_brand_id
;
create
table
if
not
exists
tmp
.
tmp_user_pay_product_brand_id
as
select
device_id
,
nvl
(
split
(
product_brand_ids
,
','
)[
0
],
0
)
pay_brand_id1
,
nvl
(
split
(
product_brand_ids
,
','
)[
1
],
0
)
pay_brand_id2
,
nvl
(
split
(
product_brand_ids
,
','
)[
2
],
0
)
pay_brand_id3
,
nvl
(
split
(
product_brand_ids
,
','
)[
3
],
0
)
pay_brand_id4
,
nvl
(
split
(
product_brand_ids
,
','
)[
4
],
0
)
pay_brand_id5
,
nvl
(
split
(
product_brand_ids
,
','
)[
5
],
0
)
pay_brand_id6
from
(
select
device_id
as
device_id
,
concat_ws
(
','
,
collect_set
(
cast
(
product_brand_id
as
string
)))
as
product_brand_ids
from
(
select
device_id
,
product_brand_id
,
row_number
()
over
(
partition
by
device_id
order
by
pay_pv
desc
)
rank
from
(
select
pay_device_id
as
device_id
,
product_brand_id
as
product_brand_id
,
sum
(
is_pay_success
)
as
pay_pv
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
${
today
}
,
365
)
AND
p_day
<
${
today
}
and
is_pay_success
=
1
and
pay_device_id
is
not
null
group
by
pay_device_id
,
product_brand_id
)
T1
)
T2
where
rank
<
7
group
by
device_id
)
T3
;
insert
overwrite
table
secoo_search
.
user_brand_category2_favorite
partition
(
p_day
=
${
today
}
)
select
C1
.
device_id
as
device_id
,
nvl
(
C1
.
click_category2_id1
,
0
)
as
click_category2_id1
,
nvl
(
C1
.
click_category2_id2
,
0
)
as
click_category2_id2
,
nvl
(
C1
.
click_category2_id3
,
0
)
as
click_category2_id3
,
nvl
(
C1
.
click_category2_id4
,
0
)
as
click_category2_id4
,
nvl
(
C1
.
click_category2_id5
,
0
)
as
click_category2_id5
,
nvl
(
C1
.
click_category2_id6
,
0
)
as
click_category2_id6
,
nvl
(
B1
.
click_brand_id1
,
0
)
as
click_brand_id1
,
nvl
(
B1
.
click_brand_id2
,
0
)
as
click_brand_id2
,
nvl
(
B1
.
click_brand_id3
,
0
)
as
click_brand_id3
,
nvl
(
B1
.
click_brand_id4
,
0
)
as
click_brand_id4
,
nvl
(
B1
.
click_brand_id5
,
0
)
as
click_brand_id5
,
nvl
(
B1
.
click_brand_id6
,
0
)
as
click_brand_id6
,
nvl
(
C2
.
add_category2_id1
,
0
)
as
add_category2_id1
,
nvl
(
C2
.
add_category2_id2
,
0
)
as
add_category2_id2
,
nvl
(
C2
.
add_category2_id3
,
0
)
as
add_category2_id3
,
nvl
(
C2
.
add_category2_id4
,
0
)
as
add_category2_id4
,
nvl
(
C2
.
add_category2_id5
,
0
)
as
add_category2_id5
,
nvl
(
C2
.
add_category2_id6
,
0
)
as
add_category2_id6
,
nvl
(
B2
.
add_brand_id1
,
0
)
as
add_brand_id1
,
nvl
(
B2
.
add_brand_id2
,
0
)
as
add_brand_id2
,
nvl
(
B2
.
add_brand_id3
,
0
)
as
add_brand_id3
,
nvl
(
B2
.
add_brand_id4
,
0
)
as
add_brand_id4
,
nvl
(
B2
.
add_brand_id5
,
0
)
as
add_brand_id5
,
nvl
(
B2
.
add_brand_id6
,
0
)
as
add_brand_id6
,
nvl
(
C3
.
pay_category2_id1
,
0
)
as
pay_category2_id1
,
nvl
(
C3
.
pay_category2_id2
,
0
)
as
pay_category2_id2
,
nvl
(
C3
.
pay_category2_id3
,
0
)
as
pay_category2_id3
,
nvl
(
C3
.
pay_category2_id4
,
0
)
as
pay_category2_id4
,
nvl
(
C3
.
pay_category2_id5
,
0
)
as
pay_category2_id5
,
nvl
(
C3
.
pay_category2_id6
,
0
)
as
pay_category2_id6
,
nvl
(
B3
.
pay_brand_id1
,
0
)
as
pay_brand_id1
,
nvl
(
B3
.
pay_brand_id2
,
0
)
as
pay_brand_id2
,
nvl
(
B3
.
pay_brand_id3
,
0
)
as
pay_brand_id3
,
nvl
(
B3
.
pay_brand_id4
,
0
)
as
pay_brand_id4
,
nvl
(
B3
.
pay_brand_id5
,
0
)
as
pay_brand_id5
,
nvl
(
B3
.
pay_brand_id6
,
0
)
as
pay_brand_id6
from
tmp
.
tmp_user_click_product_category2_id
C1
left
join
tmp
.
tmp_user_click_product_brand_id
B1
on
C1
.
device_id
=
B1
.
device_id
left
join
tmp
.
tmp_user_add_product_category2_id
C2
on
C1
.
device_id
=
C2
.
device_id
left
join
tmp
.
tmp_user_add_product_brand_id
B2
on
C1
.
device_id
=
B2
.
device_id
left
join
tmp
.
tmp_user_pay_product_category2_id
C3
on
C1
.
device_id
=
C3
.
device_id
left
join
tmp
.
tmp_user_pay_product_brand_id
B3
on
C1
.
device_id
=
B3
.
device_id
;
\ No newline at end of file
src/main/scripts/user_feature/user_favor_brand_category_feature.sh
0 → 100644
View file @
2ce837d7
work_dir
=
"/data/zhaoyanchao/java/shell/user_feature/"
delta_day
=
0
today_param
=
$1
today
=
`
date
-d
"
${
today_param
}
-
${
delta_day
}
day"
"+%Y-%m-%d"
`
echo
${
today
}
hive
-f
"
$work_dir
"
create_user_brand_category_favorite_table.sql
hive
--hivevar
today
=
"'
${
today
}
'"
-f
"
$work_dir
"
insert_user_brand_category_favorite_table.sql
src/main/scripts/user_feature/user_feature_compose.sh
View file @
2ce837d7
...
@@ -3,10 +3,10 @@
...
@@ -3,10 +3,10 @@
# 检查推荐最新表名
# 检查推荐最新表名
work_dir
=
"/data/zhaoyanchao/java/shell/user_feature/"
work_dir
=
"/data/zhaoyanchao/java/shell/user_feature/"
delta_day
=
0
delta_day
=
1
today_param
=
$1
today_param
=
$1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
date_str
=
`
echo
${
yesterday
}
|sed
's/\-/_/g'
`
date_str
=
`
echo
${
today_param
}
|sed
's/\-/_/g'
`
table_name
=
"secoo_rcmd_features.userfeatures_"
${
date_str
}
table_name
=
"secoo_rcmd_features.userfeatures_"
${
date_str
}
echo
${
table_name
}
echo
${
table_name
}
...
@@ -35,81 +35,124 @@ echo "开始导入数据"
...
@@ -35,81 +35,124 @@ echo "开始导入数据"
hive
-e
"insert overwrite table secoo_search.search_data_user_feature partition(p_day=date'
$today_param
')
hive
-e
"insert overwrite table secoo_search.search_data_user_feature partition(p_day=date'
$today_param
')
select
select
device_id as device_id,
T1.device_id as device_id,
1yearorderticketproductcount as 1YearOrderTicketProductCount,
T1.1yearorderticketproductcount as 1YearOrderTicketProductCount,
addcartin7days as addCartIn7Days,
T1.addcartin7days as addCartIn7Days,
1yearcartproductcount as 1YearCartProductCount,
T1.1yearcartproductcount as 1YearCartProductCount,
1yearorderproductmancount as 1YearOrderProductManCount,
T1.1yearorderproductmancount as 1YearOrderProductManCount,
1yearorderproductmanpriceamt as 1YearOrderProductManPriceAmt,
T1.1yearorderproductmanpriceamt as 1YearOrderProductManPriceAmt,
30dayschannelpvjrzk as 30DaysChannelPvJrzk,
T1.30dayschannelpvjrzk as 30DaysChannelPvJrzk,
1yearorderticketcategorycount as 1YearOrderTicketCategoryCount,
T1.1yearorderticketcategorycount as 1YearOrderTicketCategoryCount,
30daysdetailpv as 30DaysDetailPv,
T1.30daysdetailpv as 30DaysDetailPv,
userinterestcategory1_0 as userInterestCategory1_0,
T1.userinterestcategory1_0 as userInterestCategory1_0,
userinterestcategory2_0 as userInterestCategory2_0,
T1.userinterestcategory2_0 as userInterestCategory2_0,
userinterestcategory1_1 as userInterestCategory1_1,
T1.userinterestcategory1_1 as userInterestCategory1_1,
userinterestcategory2_1 as userInterestCategory2_1,
T1.userinterestcategory2_1 as userInterestCategory2_1,
userinterestcategory3_0 as userInterestCategory3_0,
T1.userinterestcategory3_0 as userInterestCategory3_0,
30dayssearchcategorycount as 30DaysSearchCategoryCount,
T1.30dayssearchcategorycount as 30DaysSearchCategoryCount,
1yearorderpaypriceamt as 1YearOrderPayPriceAmt,
T1.1yearorderpaypriceamt as 1YearOrderPayPriceAmt,
userinterestcategory1_2 as userInterestCategory1_2,
T1.userinterestcategory1_2 as userInterestCategory1_2,
30dayssearchbrandcount as 30DaysSearchBrandCount,
T1.30dayssearchbrandcount as 30DaysSearchBrandCount,
userinterestcategory1_3 as userInterestCategory1_3,
T1.userinterestcategory1_3 as userInterestCategory1_3,
userinterestcategory2_2 as userInterestCategory2_2,
T1.userinterestcategory2_2 as userInterestCategory2_2,
userinterestcategory3_1 as userInterestCategory3_1,
T1.userinterestcategory3_1 as userInterestCategory3_1,
1yearcartcategorycount as 1YearCartCategoryCount,
T1.1yearcartcategorycount as 1YearCartCategoryCount,
userinterestcategory2_3 as userInterestCategory2_3,
T1.userinterestcategory2_3 as userInterestCategory2_3,
userinterestcategory1_4 as userInterestCategory1_4,
T1.userinterestcategory1_4 as userInterestCategory1_4,
userinterestcategory3_2 as userInterestCategory3_2,
T1.userinterestcategory3_2 as userInterestCategory3_2,
userinterestcategory1_5 as userInterestCategory1_5,
T1.userinterestcategory1_5 as userInterestCategory1_5,
userinterestcategory3_3 as userInterestCategory3_3,
T1.userinterestcategory3_3 as userInterestCategory3_3,
1yearorderproductothercount as 1YearOrderProductOtherCount,
T1.1yearorderproductothercount as 1YearOrderProductOtherCount,
30daysdetailpva as 30DaysDetailPva,
T1.30daysdetailpva as 30DaysDetailPva,
userinterestcategory2_4 as userInterestCategory2_4,
T1.userinterestcategory2_4 as userInterestCategory2_4,
1yearorderusepointcount as 1YearOrderUsePointCount,
T1.1yearorderusepointcount as 1YearOrderUsePointCount,
30dayschannelpvpaihangbang as 30DaysChannelPvPaiHangBang,
T1.30dayschannelpvpaihangbang as 30DaysChannelPvPaiHangBang,
30dayschannelpvxinpinbang as 30daysChannelPvXinPinBang,
T1.30dayschannelpvxinpinbang as 30daysChannelPvXinPinBang,
1yearaddfavcategorycount as 1YearAddFavCategoryCount,
T1.1yearaddfavcategorycount as 1YearAddFavCategoryCount,
userinterestcategory3_4 as userInterestCategory3_4,
T1.userinterestcategory3_4 as userInterestCategory3_4,
userinterestcategory2_5 as userInterestCategory2_5,
T1.userinterestcategory2_5 as userInterestCategory2_5,
activeinweekends as activeInWeekends,
T1.activeinweekends as activeInWeekends,
1yearordercount as 1YearOrderCount,
T1.1yearordercount as 1YearOrderCount,
30daysdetailpvb as 30DaysDetailPvb,
T1.30daysdetailpvb as 30DaysDetailPvb,
1yearorderpayproductcount as 1YearOrderPayProductCount,
T1.1yearorderpayproductcount as 1YearOrderPayProductCount,
1yearorderusepointpriceamt as 1YearOrderUsePointPriceAmt,
T1.1yearorderusepointpriceamt as 1YearOrderUsePointPriceAmt,
1yearaddfavproductcount as 1YearAddFavProductCount,
T1.1yearaddfavproductcount as 1YearAddFavProductCount,
30daysdetailpvab as 30DaysDetailPvab,
T1.30daysdetailpvab as 30DaysDetailPvab,
userinterestcategory3_5 as userInterestCategory3_5,
T1.userinterestcategory3_5 as userInterestCategory3_5,
devicetype as deviceType,
T1.devicetype as deviceType,
activein7days as activeIn7Days,
T1.activein7days as activeIn7Days,
gender as gender,
T1.gender as gender,
purchasedbrand100 as purchasedBrand100,
T1.purchasedbrand100 as purchasedBrand100,
30dayschannelpvrenqibang as 30DaysChannelPvRenQiBang,
T1.30dayschannelpvrenqibang as 30DaysChannelPvRenQiBang,
1yearcartpriceamt as 1YearCartPriceAmt,
T1.1yearcartpriceamt as 1YearCartPriceAmt,
regularcustomer as regularCustomer,
T1.regularcustomer as regularCustomer,
1yearorderpaybrandcount as 1YearOrderPayBrandCount,
T1.1yearorderpaybrandcount as 1YearOrderPayBrandCount,
1yearorderticketbrandcount as 1YearOrderTicketBrandCount,
T1.1yearorderticketbrandcount as 1YearOrderTicketBrandCount,
1yearorderproductotherpriceamt as 1YearOrderProductOtherPriceAmt,
T1.1yearorderproductotherpriceamt as 1YearOrderProductOtherPriceAmt,
1yearorderproductcountavg as 1YearOrderProductCountAvg,
T1.1yearorderproductcountavg as 1YearOrderProductCountAvg,
favoritein7days as favoriteIn7Days,
T1.favoritein7days as favoriteIn7Days,
30daysdetailpvn as 30DaysDetailPvn,
T1.30daysdetailpvn as 30DaysDetailPvn,
1yearaddfavpriceamt as 1YearAddFavPriceAmt,
T1.1yearaddfavpriceamt as 1YearAddFavPriceAmt,
1yearorderproductwomanpriceamt as 1YearOrderProductWomanPriceAmt,
T1.1yearorderproductwomanpriceamt as 1YearOrderProductWomanPriceAmt,
1yearorderproductwomancount as 1YearOrderProductWomanCount,
T1.1yearorderproductwomancount as 1YearOrderProductWomanCount,
30dayschannelpvxscj as 30DaysChannelPvXscj,
T1.30dayschannelpvxscj as 30DaysChannelPvXscj,
1yearorderticketpriceamt as 1YearOrderTicketPriceAmt,
T1.1yearorderticketpriceamt as 1YearOrderTicketPriceAmt,
userinterestbrand_0 as userInterestBrand_0,
T1.userinterestbrand_0 as userInterestBrand_0,
1yearaddfavbrandcount as 1YearAddFavBrandCount,
T1.1yearaddfavbrandcount as 1YearAddFavBrandCount,
userinterestbrand_1 as userInterestBrand_1,
T1.userinterestbrand_1 as userInterestBrand_1,
userinterestbrand_2 as userInterestBrand_2,
T1.userinterestbrand_2 as userInterestBrand_2,
1yearorderpaycount as 1YearOrderPayCount,
T1.1yearorderpaycount as 1YearOrderPayCount,
30dayschannelpvaolai as 30DaysChannelPvAoLai,
T1.30dayschannelpvaolai as 30DaysChannelPvAoLai,
userinterestbrand_3 as userInterestBrand_3,
T1.userinterestbrand_3 as userInterestBrand_3,
30daysdetailpvs as 30DaysDetailPvs,
T1.30daysdetailpvs as 30DaysDetailPvs,
purchaseditems as purchasedItems,
T1.purchaseditems as purchasedItems,
30dayssearchcount as 30DaysSearchCount,
T1.30dayssearchcount as 30DaysSearchCount,
1yearcartbrandcount as 1YearCartBrandCount,
T1.1yearcartbrandcount as 1YearCartBrandCount,
1yearorderticketcount as 1YearOrderTicketCount,
T1.1yearorderticketcount as 1YearOrderTicketCount,
1yearorderpaycategorycount as 1YearOrderPayCategoryCount
T1.1yearorderpaycategorycount as 1YearOrderPayCategoryCount,
from
$table_name
"
;
nvl(T2.click_category2_id1,0) as click_category2_id1,
nvl(T2.click_category2_id2,0) as click_category2_id2,
nvl(T2.click_category2_id3,0) as click_category2_id3,
nvl(T2.click_category2_id4,0) as click_category2_id4,
nvl(T2.click_category2_id5,0) as click_category2_id5,
nvl(T2.click_category2_id6,0) as click_category2_id6,
nvl(T2.click_brand_id1,0) as click_brand_id1,
nvl(T2.click_brand_id2,0) as click_brand_id2,
nvl(T2.click_brand_id3,0) as click_brand_id3,
nvl(T2.click_brand_id4,0) as click_brand_id4,
nvl(T2.click_brand_id5,0) as click_brand_id5,
nvl(T2.click_brand_id6,0) as click_brand_id6,
nvl(T2.add_category2_id1,0) as add_category2_id1,
nvl(T2.add_category2_id2,0) as add_category2_id2,
nvl(T2.add_category2_id3,0) as add_category2_id3,
nvl(T2.add_category2_id4,0) as add_category2_id4,
nvl(T2.add_category2_id5,0) as add_category2_id5,
nvl(T2.add_category2_id6,0) as add_category2_id6,
nvl(T2.add_brand_id1,0) as add_brand_id1,
nvl(T2.add_brand_id2,0) as add_brand_id2,
nvl(T2.add_brand_id3,0) as add_brand_id3,
nvl(T2.add_brand_id4,0) as add_brand_id4,
nvl(T2.add_brand_id5,0) as add_brand_id5,
nvl(T2.add_brand_id6,0) as add_brand_id6,
nvl(T2.pay_category2_id1,0) as pay_category2_id1,
nvl(T2.pay_category2_id2,0) as pay_category2_id2,
nvl(T2.pay_category2_id3,0) as pay_category2_id3,
nvl(T2.pay_category2_id4,0) as pay_category2_id4,
nvl(T2.pay_category2_id5,0) as pay_category2_id5,
nvl(T2.pay_category2_id6,0) as pay_category2_id6,
nvl(T2.pay_brand_id1,0) as pay_brand_id1,
nvl(T2.pay_brand_id2,0) as pay_brand_id2,
nvl(T2.pay_brand_id3,0) as pay_brand_id3,
nvl(T2.pay_brand_id4,0) as pay_brand_id4,
nvl(T2.pay_brand_id5,0) as pay_brand_id5,
nvl(T2.pay_brand_id6,0) as pay_brand_id6,
nvl(T3.grow_level,0) as grow_level
from
$table_name
T1
left join secoo_search.user_brand_category2_favorite T2 on T1.device_id = T2.device_id and T2.p_day = '
${
today_param
}
'
left join secoo_dim.dim_user_basic_p_day_full T3 on T1.device_id = T3.device_id and T3.p_day = '
${
yesterday
}
'"
;
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment