Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
search-model-data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
search-model-data
Commits
43003b5b
Commit
43003b5b
authored
Jun 15, 2021
by
王玉龙
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'TECH-SEAR-1485' into 'master'
query及用户query特征抽取 See merge request tianchuan/search-model-data!5
parents
80ff1d09
780b0206
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
462 additions
and
0 deletions
+462
-0
create_cross_feature_query_user.sql
src/main/scripts/cross_feature/query_user/create_cross_feature_query_user.sql
+18
-0
insert_cross_feature_query_user.sql
src/main/scripts/cross_feature/query_user/insert_cross_feature_query_user.sql
+281
-0
query_user_compose.sh
src/main/scripts/cross_feature/query_user/query_user_compose.sh
+12
-0
create_query_brand_category_feature_table.sql
src/main/scripts/query_feature/create_query_brand_category_feature_table.sql
+26
-0
insert_query_brand_category_table.sql
src/main/scripts/query_feature/insert_query_brand_category_table.sql
+113
-0
query_feature_brand_category_compose.sh
src/main/scripts/query_feature/query_feature_brand_category_compose.sh
+12
-0
No files found.
src/main/scripts/cross_feature/query_user/create_cross_feature_query_user.sql
0 → 100644
View file @
43003b5b
create
external
table
if
not
exists
secoo_search
.
search_query_user_cross
(
key_word
string
comment
'query'
,
device_id
string
comment
'device_id'
,
user_query_first_category_cross
bigint
comment
'用户一级品类偏好和Query的识别品类的交叉'
,
user_query_second_category_cross
bigint
comment
'用户二级品类偏好和Query的识别品类的交叉'
,
user_query_thrid_category_cross
bigint
comment
'用户三级品类偏好和Query的识别品类的交叉'
,
user_query_brand_cross
bigint
comment
'用户品牌偏好和Query的识别品牌的交叉'
,
user_query_cart_first_category_cross
bigint
comment
'用户一级品类偏好和Query的加购品类的交叉'
,
user_query_cart_second_category_cross
bigint
comment
'用户二级品类偏好和Query的加购品类的交叉'
,
user_query_cart_thrid_category_cross
bigint
comment
'用户三级品类偏好和Query的加购品类的交叉'
,
user_query_cart_brand_cross
bigint
comment
'用户品牌偏好和Query的加购品牌的交叉'
,
user_query_pay_first_category_cross
bigint
comment
'用户一级品类偏好和Query的购买品类的交叉'
,
user_query_pay_second_category_cross
bigint
comment
'用户二级品类偏好和Query的购买品类的交叉'
,
user_query_pay_thrid_category_cross
bigint
comment
'用户三级品类偏好和Query的购买品类的交叉'
,
user_query_pay_brand_cross
bigint
comment
'用户品牌偏好和Query的购买品牌的交叉'
)
comment
'搜索用户query交叉特征'
partitioned
by
(
p_day
date
comment
'分区日期'
)
stored
as
parquet
;
src/main/scripts/cross_feature/query_user/insert_cross_feature_query_user.sql
0 → 100644
View file @
43003b5b
--用户一级品类偏好
drop
table
tmp
.
tmp_user_first_category_favor_lsh
;
create
table
if
not
exists
tmp
.
tmp_user_first_category_favor_lsh
as
select
device_id
,
collect_list
(
category_id
)
category_ids
from
(
select
device_id
,
category_id
,
score
from
(
select
device_id
,
split
(
category_ids
,
','
)[
0
]
category_id
,
split
(
category_ids
,
','
)[
1
]
score
from
secoo_rcmd
.
seckill_device_favors_profile
lateral
view
explode
(
split
(
category_1_favor
,
'%'
))
adTable
as
category_ids
where
dt
=
date_sub
(
current_date
(),
1
))
t
join
(
select
category_id_1
from
secoo_rcmd_features
.
feature_category_1_dim
)
t1
on
category_id
=
category_id_1
order
by
score
desc
)
t2
group
by
device_id
;
--用户二级品类偏好
drop
table
tmp
.
tmp_user_second_category_favor_lsh
;
create
table
if
not
exists
tmp
.
tmp_user_second_category_favor_lsh
as
select
device_id
,
collect_list
(
category_id
)
category_ids
from
(
select
device_id
,
category_id
,
score
from
(
select
device_id
,
split
(
category_ids
,
','
)[
0
]
category_id
,
split
(
category_ids
,
','
)[
1
]
score
from
secoo_rcmd
.
seckill_device_favors_profile
lateral
view
explode
(
split
(
category_favor
,
'%'
))
adTable
as
category_ids
where
dt
=
date_sub
(
current_date
(),
1
))
t
join
(
select
category_id_2
from
secoo_rcmd_features
.
feature_category_2_dim
)
t1
on
category_id
=
category_id_2
order
by
score
desc
)
t2
group
by
device_id
;
--用户三级品类偏好
drop
table
tmp
.
tmp_user_thrid_category_favor_lsh
;
create
table
if
not
exists
tmp
.
tmp_user_thrid_category_favor_lsh
as
select
device_id
,
collect_list
(
category_id
)
category_ids
from
(
select
device_id
,
category_id
,
score
from
(
select
device_id
,
split
(
category_ids
,
','
)[
0
]
category_id
,
split
(
category_ids
,
','
)[
1
]
score
from
secoo_rcmd
.
seckill_device_favors_profile
lateral
view
explode
(
split
(
category_3_favor
,
'%'
))
adTable
as
category_ids
where
dt
=
date_sub
(
current_date
(),
1
))
t
join
(
select
category_id_3
from
secoo_rcmd_features
.
feature_category_3_dim
)
t1
on
category_id
=
category_id_3
order
by
score
desc
)
t2
group
by
device_id
;
--用户品牌偏好
drop
table
tmp
.
tmp_user_brand_favor_lsh
;
create
table
if
not
exists
tmp
.
tmp_user_brand_favor_lsh
as
select
device_id
,
collect_list
(
category_id
)
brand_ids
from
(
select
device_id
,
category_id
,
score
from
(
select
device_id
,
split
(
brand_ids
,
','
)[
0
]
category_id
,
split
(
brand_ids
,
','
)[
1
]
score
from
secoo_rcmd
.
seckill_device_favors_profile
lateral
view
explode
(
split
(
brand_favor
,
'%'
))
adTable
as
brand_ids
where
dt
=
date_sub
(
current_date
(),
1
))
t
join
(
select
brand_id
from
secoo_rcmd_features
.
feature_brand_dim
where
brand_id
is
not
null
)
t1
on
category_id
=
brand_id
order
by
score
desc
)
t2
group
by
device_id
;
--搜索识别品类交叉特征基础表
drop
table
tmp
.
tmp_query_category_base_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_category_base_cross_lsh
as
select
key_word
,
search_device_id
,
split
(
search_categorys
,
','
)[
0
]
search_categorys
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
search_device_id
is
not
null
and
p_day
>=
date_sub
(
current_date
(),
60
)
and
p_day
<=
date_sub
(
current_date
(),
1
)
group
by
key_word
,
search_device_id
,
split
(
search_categorys
,
','
)[
0
];
--搜索识别品牌交叉特征基础表
drop
table
tmp
.
tmp_query_brand_base_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_brand_base_cross_lsh
as
select
key_word
,
search_device_id
,
split
(
search_brands
,
','
)[
0
]
search_brands
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
search_device_id
is
not
null
and
p_day
>=
date_sub
(
current_date
(),
60
)
and
p_day
<=
date_sub
(
current_date
(),
1
)
group
by
key_word
,
search_device_id
,
split
(
search_brands
,
','
)[
0
];
--用户一级品类偏好和Query的识别品类的交叉
drop
table
tmp
.
tmp_query_user_first_category_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_first_category_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
search_categorys
as
string
)),
1
,
0
)
user_query_first_category_cross
from
tmp
.
tmp_user_first_category_favor_lsh
t0
join
tmp
.
tmp_query_category_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
search_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
search_categorys
as
string
)),
1
,
0
);
--用户二级品类偏好和Query的识别品类的交叉
drop
table
tmp
.
tmp_query_user_second_category_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_second_category_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
search_categorys
as
string
)),
1
,
0
)
user_query_second_category_cross
from
tmp
.
tmp_user_second_category_favor_lsh
t0
join
tmp
.
tmp_query_category_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
search_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
search_categorys
as
string
)),
1
,
0
);
--用户三级品类偏好和Query的识别品类的交叉
drop
table
tmp
.
tmp_query_user_thrid_category_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_thrid_category_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
search_categorys
as
string
)),
1
,
0
)
user_query_thrid_category_cross
from
tmp
.
tmp_user_thrid_category_favor_lsh
t0
join
tmp
.
tmp_query_category_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
search_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
search_categorys
as
string
)),
1
,
0
);
--用户品牌偏好和Query的识别品牌的交叉
drop
table
tmp
.
tmp_query_user_brand_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_brand_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
brand_ids
,
cast
(
search_brands
as
string
)),
1
,
0
)
user_query_brand_cross
from
tmp
.
tmp_user_brand_favor_lsh
t0
join
tmp
.
tmp_query_brand_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
search_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
brand_ids
,
cast
(
search_brands
as
string
)),
1
,
0
);
--搜索加购品类交叉特征基础表
drop
table
tmp
.
tmp_query_category_cart_base_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_category_cart_base_cross_lsh
as
select
key_word
,
add_cart_device_id
,
product_category_id
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
add_cart_device_id
is
not
null
and
p_day
>=
date_sub
(
current_date
(),
60
)
and
p_day
<=
date_sub
(
current_date
(),
1
)
group
by
key_word
,
add_cart_device_id
,
product_category_id
;
--搜索加购品牌交叉特征基础表
drop
table
tmp
.
tmp_query_brand_cart_base_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_brand_cart_base_cross_lsh
as
select
key_word
,
add_cart_device_id
,
product_brand_id
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
add_cart_device_id
is
not
null
and
p_day
>=
date_sub
(
current_date
(),
60
)
and
p_day
<=
date_sub
(
current_date
(),
1
)
group
by
key_word
,
add_cart_device_id
,
product_brand_id
;
--用户一级品类偏好和Query的加购品类的交叉
drop
table
tmp
.
tmp_query_user_cart_first_category_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_cart_first_category_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
)
user_query_cart_first_category_cross
from
tmp
.
tmp_user_first_category_favor_lsh
t0
join
tmp
.
tmp_query_category_cart_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
add_cart_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
);
--用户二级品类偏好和Query的加购品类的交叉
drop
table
tmp
.
tmp_query_user_cart_second_category_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_cart_second_category_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
)
user_query_cart_second_category_cross
from
tmp
.
tmp_user_second_category_favor_lsh
t0
join
tmp
.
tmp_query_category_cart_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
add_cart_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
);
--用户三级品类偏好和Query的加购品类的交叉
drop
table
tmp
.
tmp_query_user_cart_thrid_category_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_cart_thrid_category_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
)
user_query_cart_thrid_category_cross
from
tmp
.
tmp_user_thrid_category_favor_lsh
t0
join
tmp
.
tmp_query_category_cart_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
add_cart_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
);
--用户品牌偏好和Query的加购品牌的交叉
drop
table
tmp
.
tmp_query_user_cart_brand_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_cart_brand_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
brand_ids
,
cast
(
product_brand_id
as
string
)),
1
,
0
)
user_query_cart_brand_cross
from
tmp
.
tmp_user_brand_favor_lsh
t0
join
tmp
.
tmp_query_brand_cart_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
add_cart_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
brand_ids
,
cast
(
product_brand_id
as
string
)),
1
,
0
);
--搜索购买品类交叉特征基础表
drop
table
tmp
.
tmp_query_category_pay_base_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_category_pay_base_cross_lsh
as
select
key_word
,
pay_device_id
,
product_category_id
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
pay_device_id
is
not
null
and
p_day
>=
date_sub
(
current_date
(),
60
)
and
p_day
<=
date_sub
(
current_date
(),
1
)
group
by
key_word
,
pay_device_id
,
product_category_id
;
--搜索购买品牌交叉特征基础表
drop
table
tmp
.
tmp_query_brand_pay_base_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_brand_pay_base_cross_lsh
as
select
key_word
,
pay_device_id
,
product_brand_id
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
pay_device_id
is
not
null
and
p_day
>=
date_sub
(
current_date
(),
60
)
and
p_day
<=
date_sub
(
current_date
(),
1
)
group
by
key_word
,
pay_device_id
,
product_brand_id
;
--用户一级品类偏好和Query的购买品类的交叉
drop
table
tmp
.
tmp_query_user_pay_first_category_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_pay_first_category_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
)
user_query_pay_first_category_cross
from
tmp
.
tmp_user_first_category_favor_lsh
t0
join
tmp
.
tmp_query_category_pay_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
pay_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
);
--用户二级品类偏好和Query的购买品类的交叉
drop
table
tmp
.
tmp_query_user_pay_second_category_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_pay_second_category_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
)
user_query_pay_second_category_cross
from
tmp
.
tmp_user_second_category_favor_lsh
t0
join
tmp
.
tmp_query_category_pay_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
pay_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
);
--用户三级品类偏好和Query的购买品类的交叉
drop
table
tmp
.
tmp_query_user_pay_thrid_category_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_pay_thrid_category_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
)
user_query_pay_thrid_category_cross
from
tmp
.
tmp_user_thrid_category_favor_lsh
t0
join
tmp
.
tmp_query_category_pay_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
pay_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
category_ids
,
cast
(
product_category_id
as
string
)),
1
,
0
);
--用户品牌偏好和Query的购买品牌的交叉
drop
table
tmp
.
tmp_query_user_pay_brand_cross_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_user_pay_brand_cross_lsh
as
select
key_word
,
device_id
,
if
(
array_contains
(
brand_ids
,
cast
(
product_brand_id
as
string
)),
1
,
0
)
user_query_pay_brand_cross
from
tmp
.
tmp_user_brand_favor_lsh
t0
join
tmp
.
tmp_query_brand_pay_base_cross_lsh
t1
on
t0
.
device_id
=
t1
.
pay_device_id
group
by
key_word
,
device_id
,
if
(
array_contains
(
brand_ids
,
cast
(
product_brand_id
as
string
)),
1
,
0
);
--drop table tmp.tmp_query_user_cross_lsh;
--create table if not exists tmp.tmp_query_user_cross_lsh as
insert
overwrite
table
secoo_search
.
search_query_user_cross
partition
(
p_day
=
${
yesterday
}
)
select
t0
.
key_word
,
t0
.
device_id
,
user_query_first_category_cross
,
user_query_second_category_cross
,
user_query_thrid_category_cross
,
user_query_brand_cross
,
user_query_cart_first_category_cross
,
user_query_cart_second_category_cross
,
user_query_cart_thrid_category_cross
,
user_query_cart_brand_cross
,
user_query_pay_first_category_cross
,
user_query_pay_second_category_cross
,
user_query_pay_thrid_category_cross
,
user_query_pay_brand_cross
from
(
select
key_word
,
device_id
from
(
select
key_word
,
device_id
from
tmp
.
tmp_query_user_first_category_cross_lsh
union
all
select
key_word
,
device_id
from
tmp
.
tmp_query_user_second_category_cross_lsh
union
all
select
key_word
,
device_id
from
tmp
.
tmp_query_user_thrid_category_cross_lsh
union
all
select
key_word
,
device_id
from
tmp
.
tmp_query_user_brand_cross_lsh
union
all
select
key_word
,
device_id
from
tmp
.
tmp_query_user_cart_first_category_cross_lsh
union
all
select
key_word
,
device_id
from
tmp
.
tmp_query_user_cart_second_category_cross_lsh
union
all
select
key_word
,
device_id
from
tmp
.
tmp_query_user_cart_thrid_category_cross_lsh
union
all
select
key_word
,
device_id
from
tmp
.
tmp_query_user_cart_brand_cross_lsh
union
all
select
key_word
,
device_id
from
tmp
.
tmp_query_user_pay_first_category_cross_lsh
union
all
select
key_word
,
device_id
from
tmp
.
tmp_query_user_pay_second_category_cross_lsh
union
all
select
key_word
,
device_id
from
tmp
.
tmp_query_user_pay_thrid_category_cross_lsh
union
all
select
key_word
,
device_id
from
tmp
.
tmp_query_user_pay_brand_cross_lsh
)
s0
where
key_word
is
not
null
and
device_id
is
not
null
and
key_word
<>
''
and
device_id
<>
''
group
by
key_word
,
device_id
)
t0
left
join
tmp
.
tmp_query_user_first_category_cross_lsh
t1
on
t0
.
key_word
=
t1
.
key_word
and
t0
.
device_id
=
t1
.
device_id
left
join
tmp
.
tmp_query_user_second_category_cross_lsh
t2
on
t0
.
key_word
=
t2
.
key_word
and
t0
.
device_id
=
t2
.
device_id
left
join
tmp
.
tmp_query_user_thrid_category_cross_lsh
t3
on
t0
.
key_word
=
t3
.
key_word
and
t0
.
device_id
=
t3
.
device_id
left
join
tmp
.
tmp_query_user_brand_cross_lsh
t4
on
t0
.
key_word
=
t4
.
key_word
and
t0
.
device_id
=
t4
.
device_id
left
join
tmp
.
tmp_query_user_cart_first_category_cross_lsh
t5
on
t0
.
key_word
=
t5
.
key_word
and
t0
.
device_id
=
t5
.
device_id
left
join
tmp
.
tmp_query_user_cart_second_category_cross_lsh
t6
on
t0
.
key_word
=
t6
.
key_word
and
t0
.
device_id
=
t6
.
device_id
left
join
tmp
.
tmp_query_user_cart_thrid_category_cross_lsh
t7
on
t0
.
key_word
=
t7
.
key_word
and
t0
.
device_id
=
t7
.
device_id
left
join
tmp
.
tmp_query_user_cart_brand_cross_lsh
t8
on
t0
.
key_word
=
t8
.
key_word
and
t0
.
device_id
=
t8
.
device_id
left
join
tmp
.
tmp_query_user_pay_first_category_cross_lsh
t9
on
t0
.
key_word
=
t9
.
key_word
and
t0
.
device_id
=
t9
.
device_id
left
join
tmp
.
tmp_query_user_pay_second_category_cross_lsh
t10
on
t0
.
key_word
=
t10
.
key_word
and
t0
.
device_id
=
t10
.
device_id
left
join
tmp
.
tmp_query_user_pay_thrid_category_cross_lsh
t11
on
t0
.
key_word
=
t11
.
key_word
and
t0
.
device_id
=
t11
.
device_id
left
join
tmp
.
tmp_query_user_pay_brand_cross_lsh
t12
on
t0
.
key_word
=
t12
.
key_word
and
t0
.
device_id
=
t12
.
device_id
;
\ No newline at end of file
src/main/scripts/cross_feature/query_user/query_user_compose.sh
0 → 100644
View file @
43003b5b
work_dir
=
"/data/zhaoyanchao/java/shell/cross_feature/query_user/"
delta_day
=
1
today_param
=
$1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
partition
=
`
hive
-e
"show partitions secoo_search.search_query_user_cross"
|tail
-n
1 | head
-n
1
`
partition_day
=
${
partition
:6:10
}
echo
"today_param is
$today_param
yesterday is
$yesterday
and partition_day is
$partition_day
"
hive
--hivevar
yesterday
=
"'
$yesterday
'"
-f
"
$work_dir
"
insert_cross_feature_query_user.sql
src/main/scripts/query_feature/create_query_brand_category_feature_table.sql
0 → 100644
View file @
43003b5b
create
external
table
if
not
exists
secoo_search
.
search_query_brand_category
(
key_word
string
comment
'query'
,
cart_brand_1
int
comment
'query最近加购的品牌Top5'
,
cart_brand_2
int
comment
'query最近加购的品牌Top5'
,
cart_brand_3
int
comment
'query最近加购的品牌Top5'
,
cart_brand_4
int
comment
'query最近加购的品牌Top5'
,
cart_brand_5
int
comment
'query最近加购的品牌Top5'
,
cart_category_1
int
comment
'query最近加购的品类Top5'
,
cart_category_2
int
comment
'query最近加购的品类Top5'
,
cart_category_3
int
comment
'query最近加购的品类Top5'
,
cart_category_4
int
comment
'query最近加购的品类Top5'
,
cart_category_5
int
comment
'query最近加购的品类Top5'
,
pay_brand_1
int
comment
'query最近购买的品牌Top5'
,
pay_brand_2
int
comment
'query最近购买的品牌Top5'
,
pay_brand_3
int
comment
'query最近购买的品牌Top5'
,
pay_brand_4
int
comment
'query最近购买的品牌Top5'
,
pay_brand_5
int
comment
'query最近购买的品牌Top5'
,
pay_category_1
int
comment
'query最近购买的品类Top5'
,
pay_category_2
int
comment
'query最近购买的品类Top5'
,
pay_category_3
int
comment
'query最近购买的品类Top5'
,
pay_category_4
int
comment
'query最近购买的品类Top5'
,
pay_category_5
int
comment
'query最近购买的品类Top5'
)
comment
'搜索query品牌品类特征特征'
partitioned
by
(
p_day
date
comment
'分区日期'
)
stored
as
parquet
;
\ No newline at end of file
src/main/scripts/query_feature/insert_query_brand_category_table.sql
0 → 100644
View file @
43003b5b
drop
table
tmp
.
tmp_query_base_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_base_lsh
as
select
key_word
,
product_brand_id
,
product_category_id
,
is_action_add_cart
,
is_pay_success
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
current_date
(),
60
)
and
p_day
<=
date_sub
(
current_date
(),
1
);
--query最近加购的品牌 Top5
drop
table
tmp
.
tmp_query_cart_brand_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_cart_brand_lsh
as
select
key_word
,
max
(
case
when
rk
=
1
then
product_brand_id
else
0
end
)
cart_brand_1
,
max
(
case
when
rk
=
2
then
product_brand_id
else
0
end
)
cart_brand_2
,
max
(
case
when
rk
=
3
then
product_brand_id
else
0
end
)
cart_brand_3
,
max
(
case
when
rk
=
4
then
product_brand_id
else
0
end
)
cart_brand_4
,
max
(
case
when
rk
=
5
then
product_brand_id
else
0
end
)
cart_brand_5
from
(
select
key_word
,
product_brand_id
,
row_number
()
over
(
partition
by
key_word
order
by
nums
desc
)
rk
from
(
select
key_word
,
product_brand_id
,
count
(
*
)
nums
from
tmp
.
tmp_query_base_lsh
where
is_action_add_cart
=
1
group
by
key_word
,
product_brand_id
)
t0
)
t
where
rk
<=
5
group
by
key_word
;
--query最近加购的品类 Top5
drop
table
tmp
.
tmp_query_cart_category_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_cart_category_lsh
as
select
key_word
,
max
(
case
when
rk
=
1
then
product_category_id
else
0
end
)
cart_category_1
,
max
(
case
when
rk
=
2
then
product_category_id
else
0
end
)
cart_category_2
,
max
(
case
when
rk
=
3
then
product_category_id
else
0
end
)
cart_category_3
,
max
(
case
when
rk
=
4
then
product_category_id
else
0
end
)
cart_category_4
,
max
(
case
when
rk
=
5
then
product_category_id
else
0
end
)
cart_category_5
from
(
select
key_word
,
product_category_id
,
row_number
()
over
(
partition
by
key_word
order
by
nums
desc
)
rk
from
(
select
key_word
,
product_category_id
,
count
(
*
)
nums
from
tmp
.
tmp_query_base_lsh
where
is_action_add_cart
=
1
group
by
key_word
,
product_category_id
)
t0
)
t
where
rk
<=
5
group
by
key_word
;
--query最近购买的品牌Top5
drop
table
tmp
.
tmp_query_pay_brand_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_pay_brand_lsh
as
select
key_word
,
max
(
case
when
rk
=
1
then
product_brand_id
else
0
end
)
pay_brand_1
,
max
(
case
when
rk
=
2
then
product_brand_id
else
0
end
)
pay_brand_2
,
max
(
case
when
rk
=
3
then
product_brand_id
else
0
end
)
pay_brand_3
,
max
(
case
when
rk
=
4
then
product_brand_id
else
0
end
)
pay_brand_4
,
max
(
case
when
rk
=
5
then
product_brand_id
else
0
end
)
pay_brand_5
from
(
select
key_word
,
product_brand_id
,
row_number
()
over
(
partition
by
key_word
order
by
nums
desc
)
rk
from
(
select
key_word
,
product_brand_id
,
count
(
*
)
nums
from
tmp
.
tmp_query_base_lsh
where
is_pay_success
=
1
group
by
key_word
,
product_brand_id
)
t0
)
t
where
rk
<=
5
group
by
key_word
;
--query最近购买的品类 Top5
drop
table
tmp
.
tmp_query_pay_category_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_pay_category_lsh
as
select
key_word
,
max
(
case
when
rk
=
1
then
product_category_id
else
0
end
)
pay_category_1
,
max
(
case
when
rk
=
2
then
product_category_id
else
0
end
)
pay_category_2
,
max
(
case
when
rk
=
3
then
product_category_id
else
0
end
)
pay_category_3
,
max
(
case
when
rk
=
4
then
product_category_id
else
0
end
)
pay_category_4
,
max
(
case
when
rk
=
5
then
product_category_id
else
0
end
)
pay_category_5
from
(
select
key_word
,
product_category_id
,
row_number
()
over
(
partition
by
key_word
order
by
nums
desc
)
rk
from
(
select
key_word
,
product_category_id
,
count
(
*
)
nums
from
tmp
.
tmp_query_base_lsh
where
is_pay_success
=
1
group
by
key_word
,
product_category_id
)
t0
)
t
where
rk
<=
5
group
by
key_word
;
--drop table tmp.tmp_query_final_lsh;
--create table if not exists tmp.tmp_query_final_lsh as
insert
overwrite
table
secoo_search
.
search_query_brand_category
partition
(
p_day
=
${
yesterday
}
)
select
t0
.
key_word
,
cart_brand_1
,
cart_brand_2
,
cart_brand_3
,
cart_brand_4
,
cart_brand_5
,
cart_category_1
,
cart_category_2
,
cart_category_3
,
cart_category_4
,
cart_category_5
,
pay_brand_1
,
pay_brand_2
,
pay_brand_3
,
pay_brand_4
,
pay_brand_5
,
pay_category_1
,
pay_category_2
,
pay_category_3
,
pay_category_4
,
pay_category_5
from
(
select
key_word
from
(
select
key_word
from
tmp
.
tmp_query_cart_brand_lsh
union
all
select
key_word
from
tmp
.
tmp_query_cart_category_lsh
union
all
select
key_word
from
tmp
.
tmp_query_pay_brand_lsh
union
all
select
key_word
from
tmp
.
tmp_query_pay_category_lsh
)
s0
group
by
key_word
)
t0
left
join
tmp
.
tmp_query_cart_brand_lsh
t1
on
t0
.
key_word
=
t1
.
key_word
left
join
tmp
.
tmp_query_cart_category_lsh
t2
on
t0
.
key_word
=
t2
.
key_word
left
join
tmp
.
tmp_query_pay_brand_lsh
t3
on
t0
.
key_word
=
t3
.
key_word
left
join
tmp
.
tmp_query_pay_category_lsh
t4
on
t0
.
key_word
=
t4
.
key_word
;
\ No newline at end of file
src/main/scripts/query_feature/query_feature_brand_category_compose.sh
0 → 100644
View file @
43003b5b
work_dir
=
"/data/zhaoyanchao/java/shell/cross_feature/query_feature/"
delta_day
=
1
today_param
=
$1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
partition
=
`
hive
-e
"show partitions secoo_search.search_query_brand_category"
|tail
-n
1 | head
-n
1
`
partition_day
=
${
partition
:6:10
}
echo
"today_param is
$today_param
yesterday is
$yesterday
and partition_day is
$partition_day
"
hive
--hivevar
yesterday
=
"'
$yesterday
'"
-f
"
$work_dir
"
insert_query_brand_category_table.sql
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment