Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
search-model-data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
search-model-data
Commits
bdf04c7b
Commit
bdf04c7b
authored
Jun 11, 2021
by
lishihang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
'ADD:feature'
parent
80ff1d09
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
181 additions
and
0 deletions
+181
-0
create_cross_feature_query_user.sql
src/main/scripts/cross_feature/query_user/create_cross_feature_query_user.sql
+18
-0
insert_cross_feature_query_user.sql
src/main/scripts/cross_feature/query_user/insert_cross_feature_query_user.sql
+0
-0
query_user_compose.sh
src/main/scripts/cross_feature/query_user/query_user_compose.sh
+12
-0
create_query_brand_category_feature_table.sql
src/main/scripts/query_feature/create_query_brand_category_feature_table.sql
+26
-0
insert_query_brand_category_table.sql
src/main/scripts/query_feature/insert_query_brand_category_table.sql
+113
-0
query_feature_brand_category_compose.sh
src/main/scripts/query_feature/query_feature_brand_category_compose.sh
+12
-0
No files found.
src/main/scripts/cross_feature/query_user/create_cross_feature_query_user.sql
0 → 100644
View file @
bdf04c7b
create
external
table
if
not
exists
secoo_search
.
search_query_user_cross
(
key_word
string
comment
'query'
,
device_id
string
comment
'device_id'
,
user_query_first_category_cross
bigint
comment
'用户一级品类偏好和Query的识别品类的交叉'
,
user_query_second_category_cross
bigint
comment
'用户二级品类偏好和Query的识别品类的交叉'
,
user_query_thrid_category_cross
bigint
comment
'用户三级品类偏好和Query的识别品类的交叉'
,
user_query_brand_cross
bigint
comment
'用户品牌偏好和Query的识别品牌的交叉'
,
user_query_cart_first_category_cross
bigint
comment
'用户一级品类偏好和Query的加购品类的交叉'
,
user_query_cart_second_category_cross
bigint
comment
'用户二级品类偏好和Query的加购品类的交叉'
,
user_query_cart_thrid_category_cross
bigint
comment
'用户三级品类偏好和Query的加购品类的交叉'
,
user_query_cart_brand_cross
bigint
comment
'用户品牌偏好和Query的加购品牌的交叉'
,
user_query_pay_first_category_cross
bigint
comment
'用户一级品类偏好和Query的购买品类的交叉'
,
user_query_pay_second_category_cross
bigint
comment
'用户二级品类偏好和Query的购买品类的交叉'
,
user_query_pay_thrid_category_cross
bigint
comment
'用户三级品类偏好和Query的购买品类的交叉'
,
user_query_pay_brand_cross
bigint
comment
'用户品牌偏好和Query的购买品牌的交叉'
)
comment
'搜索用户query交叉特征'
partitioned
by
(
p_day
date
comment
'分区日期'
)
stored
as
parquet
;
src/main/scripts/cross_feature/query_user/insert_cross_feature_query_user.sql
0 → 100644
View file @
bdf04c7b
This diff is collapsed.
Click to expand it.
src/main/scripts/cross_feature/query_user/query_user_compose.sh
0 → 100644
View file @
bdf04c7b
work_dir
=
"/data/zhaoyanchao/java/shell/cross_feature/query_user/"
delta_day
=
1
today_param
=
$1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
partition
=
`
hive
-e
"show partitions secoo_search.search_query_user_cross"
|tail
-n
1 | head
-n
1
`
partition_day
=
${
partition
:6:10
}
echo
"today_param is
$today_param
yesterday is
$yesterday
and partition_day is
$partition_day
"
hive
--hivevar
yesterday
=
"'
$yesterday
'"
-f
"
$work_dir
"
insert_cross_feature_query_user.sql
src/main/scripts/query_feature/create_query_brand_category_feature_table.sql
0 → 100644
View file @
bdf04c7b
create
external
table
if
not
exists
secoo_search
.
search_query_brand_category
(
key_word
string
comment
'query'
,
cart_brand_1
int
comment
'query最近加购的品牌Top5'
,
cart_brand_2
int
comment
'query最近加购的品牌Top5'
,
cart_brand_3
int
comment
'query最近加购的品牌Top5'
,
cart_brand_4
int
comment
'query最近加购的品牌Top5'
,
cart_brand_5
int
comment
'query最近加购的品牌Top5'
,
cart_category_1
int
comment
'query最近加购的品类Top5'
,
cart_category_2
int
comment
'query最近加购的品类Top5'
,
cart_category_3
int
comment
'query最近加购的品类Top5'
,
cart_category_4
int
comment
'query最近加购的品类Top5'
,
cart_category_5
int
comment
'query最近加购的品类Top5'
,
pay_brand_1
int
comment
'query最近购买的品牌Top5'
,
pay_brand_2
int
comment
'query最近购买的品牌Top5'
,
pay_brand_3
int
comment
'query最近购买的品牌Top5'
,
pay_brand_4
int
comment
'query最近购买的品牌Top5'
,
pay_brand_5
int
comment
'query最近购买的品牌Top5'
,
pay_category_1
int
comment
'query最近购买的品类Top5'
,
pay_category_2
int
comment
'query最近购买的品类Top5'
,
pay_category_3
int
comment
'query最近购买的品类Top5'
,
pay_category_4
int
comment
'query最近购买的品类Top5'
,
pay_category_5
int
comment
'query最近购买的品类Top5'
)
comment
'搜索query品牌品类特征特征'
partitioned
by
(
p_day
date
comment
'分区日期'
)
stored
as
parquet
;
\ No newline at end of file
src/main/scripts/query_feature/insert_query_brand_category_table.sql
0 → 100644
View file @
bdf04c7b
drop
table
tmp
.
tmp_query_base_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_base_lsh
as
select
key_word
,
product_brand_id
,
product_category_id
,
is_action_add_cart
,
is_pay_success
from
secoo_fact_hour
.
fact_search_detail_union_p_hour_inrc
where
p_day
>=
date_sub
(
current_date
(),
15
)
and
p_day
<=
date_sub
(
current_date
(),
1
);
--query最近加购的品牌 Top5
drop
table
tmp
.
tmp_query_cart_brand_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_cart_brand_lsh
as
select
key_word
,
max
(
case
when
rk
=
1
then
product_brand_id
else
0
end
)
cart_brand_1
,
max
(
case
when
rk
=
2
then
product_brand_id
else
0
end
)
cart_brand_2
,
max
(
case
when
rk
=
3
then
product_brand_id
else
0
end
)
cart_brand_3
,
max
(
case
when
rk
=
4
then
product_brand_id
else
0
end
)
cart_brand_4
,
max
(
case
when
rk
=
5
then
product_brand_id
else
0
end
)
cart_brand_5
from
(
select
key_word
,
product_brand_id
,
row_number
()
over
(
partition
by
key_word
order
by
nums
desc
)
rk
from
(
select
key_word
,
product_brand_id
,
count
(
*
)
nums
from
tmp
.
tmp_query_base_lsh
where
is_action_add_cart
=
1
group
by
key_word
,
product_brand_id
)
t0
)
t
where
rk
<=
5
group
by
key_word
;
--query最近加购的品类 Top5
drop
table
tmp
.
tmp_query_cart_category_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_cart_category_lsh
as
select
key_word
,
max
(
case
when
rk
=
1
then
product_category_id
else
0
end
)
cart_category_1
,
max
(
case
when
rk
=
2
then
product_category_id
else
0
end
)
cart_category_2
,
max
(
case
when
rk
=
3
then
product_category_id
else
0
end
)
cart_category_3
,
max
(
case
when
rk
=
4
then
product_category_id
else
0
end
)
cart_category_4
,
max
(
case
when
rk
=
5
then
product_category_id
else
0
end
)
cart_category_5
from
(
select
key_word
,
product_category_id
,
row_number
()
over
(
partition
by
key_word
order
by
nums
desc
)
rk
from
(
select
key_word
,
product_category_id
,
count
(
*
)
nums
from
tmp
.
tmp_query_base_lsh
where
is_action_add_cart
=
1
group
by
key_word
,
product_category_id
)
t0
)
t
where
rk
<=
5
group
by
key_word
;
--query最近购买的品牌Top5
drop
table
tmp
.
tmp_query_pay_brand_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_pay_brand_lsh
as
select
key_word
,
max
(
case
when
rk
=
1
then
product_brand_id
else
0
end
)
pay_brand_1
,
max
(
case
when
rk
=
2
then
product_brand_id
else
0
end
)
pay_brand_2
,
max
(
case
when
rk
=
3
then
product_brand_id
else
0
end
)
pay_brand_3
,
max
(
case
when
rk
=
4
then
product_brand_id
else
0
end
)
pay_brand_4
,
max
(
case
when
rk
=
5
then
product_brand_id
else
0
end
)
pay_brand_5
from
(
select
key_word
,
product_brand_id
,
row_number
()
over
(
partition
by
key_word
order
by
nums
desc
)
rk
from
(
select
key_word
,
product_brand_id
,
count
(
*
)
nums
from
tmp
.
tmp_query_base_lsh
where
is_pay_success
=
1
group
by
key_word
,
product_brand_id
)
t0
)
t
where
rk
<=
5
group
by
key_word
;
--query最近购买的品类 Top5
drop
table
tmp
.
tmp_query_pay_category_lsh
;
create
table
if
not
exists
tmp
.
tmp_query_pay_category_lsh
as
select
key_word
,
max
(
case
when
rk
=
1
then
product_category_id
else
0
end
)
pay_category_1
,
max
(
case
when
rk
=
2
then
product_category_id
else
0
end
)
pay_category_2
,
max
(
case
when
rk
=
3
then
product_category_id
else
0
end
)
pay_category_3
,
max
(
case
when
rk
=
4
then
product_category_id
else
0
end
)
pay_category_4
,
max
(
case
when
rk
=
5
then
product_category_id
else
0
end
)
pay_category_5
from
(
select
key_word
,
product_category_id
,
row_number
()
over
(
partition
by
key_word
order
by
nums
desc
)
rk
from
(
select
key_word
,
product_category_id
,
count
(
*
)
nums
from
tmp
.
tmp_query_base_lsh
where
is_pay_success
=
1
group
by
key_word
,
product_category_id
)
t0
)
t
where
rk
<=
5
group
by
key_word
;
--drop table tmp.tmp_query_final_lsh;
--create table if not exists tmp.tmp_query_final_lsh as
insert
overwrite
table
secoo_search
.
search_query_brand_category
partition
(
p_day
=
${
yesterday
}
)
select
t0
.
key_word
,
cart_brand_1
,
cart_brand_2
,
cart_brand_3
,
cart_brand_4
,
cart_brand_5
,
cart_category_1
,
cart_category_2
,
cart_category_3
,
cart_category_4
,
cart_category_5
,
pay_brand_1
,
pay_brand_2
,
pay_brand_3
,
pay_brand_4
,
pay_brand_5
,
pay_category_1
,
pay_category_2
,
pay_category_3
,
pay_category_4
,
pay_category_5
from
(
select
key_word
from
(
select
key_word
from
tmp
.
tmp_query_cart_brand_lsh
union
all
select
key_word
from
tmp
.
tmp_query_cart_category_lsh
union
all
select
key_word
from
tmp
.
tmp_query_pay_brand_lsh
union
all
select
key_word
from
tmp
.
tmp_query_pay_category_lsh
)
s0
group
by
key_word
)
t0
left
join
tmp
.
tmp_query_cart_brand_lsh
t1
on
t0
.
key_word
=
t1
.
key_word
left
join
tmp
.
tmp_query_cart_category_lsh
t2
on
t0
.
key_word
=
t2
.
key_word
left
join
tmp
.
tmp_query_pay_brand_lsh
t3
on
t0
.
key_word
=
t3
.
key_word
left
join
tmp
.
tmp_query_pay_category_lsh
t4
on
t0
.
key_word
=
t4
.
key_word
;
\ No newline at end of file
src/main/scripts/query_feature/query_feature_brand_category_compose.sh
0 → 100644
View file @
bdf04c7b
work_dir
=
"/data/zhaoyanchao/java/shell/cross_feature/query_feature/"
delta_day
=
1
today_param
=
$1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
partition
=
`
hive
-e
"show partitions secoo_search.search_query_brand_category"
|tail
-n
1 | head
-n
1
`
partition_day
=
${
partition
:6:10
}
echo
"today_param is
$today_param
yesterday is
$yesterday
and partition_day is
$partition_day
"
hive
--hivevar
yesterday
=
"'
$yesterday
'"
-f
"
$work_dir
"
insert_query_brand_category_table.sql
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment