Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
search-model-data
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
search-model-data
Commits
5c1d537f
Commit
5c1d537f
authored
Jun 17, 2021
by
王玉龙
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'FEATURE-SEARCH-903-queryFeature' into 'master'
1.query特征数据优化 See merge request tianchuan/search-model-data!6
parents
d37803c9
2a9c67b8
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
120 additions
and
22 deletions
+120
-22
KeywordFeatureExtractJob.java
src/main/java/com/secoo/search/job/keyword/KeywordFeatureExtractJob.java
+1
-1
create_query_feature_table.sql
src/main/scripts/query_feature/create_query_feature_table.sql
+33
-10
create_query_semantic_feature_table.sql
src/main/scripts/query_feature/create_query_semantic_feature_table.sql
+23
-0
insert_query_feature_table.sql
src/main/scripts/query_feature/insert_query_feature_table.sql
+42
-0
query_feature_extract.sh
src/main/scripts/query_feature/query_feature_extract.sh
+3
-11
query_feature_semantic_extract.sh
src/main/scripts/query_feature/query_feature_semantic_extract.sh
+18
-0
No files found.
src/main/java/com/secoo/search/job/keyword/KeywordFeatureExtractJob.java
View file @
5c1d537f
...
...
@@ -58,7 +58,7 @@ public class KeywordFeatureExtractJob extends Configured implements Tool {
job
.
setReducerClass
(
KeywordFeatureExtractReduce
.
class
);
job
.
setOutputFormatClass
(
TextOutputFormat
.
class
);
FileOutputFormat
.
setOutputPath
(
job
,
new
Path
(
"hdfs://tesla-cluster/apps/hive/warehouse/secoo_search.db/search_data_query_
original
_feature"
));
FileOutputFormat
.
setOutputPath
(
job
,
new
Path
(
"hdfs://tesla-cluster/apps/hive/warehouse/secoo_search.db/search_data_query_
semantic
_feature"
));
boolean
success
=
job
.
waitForCompletion
(
true
);
if
(!
success
)
{
...
...
src/main/scripts/query_feature/create_query_feature_table.sql
View file @
5c1d537f
...
...
@@ -2,21 +2,43 @@ create external table if not exists secoo_search.search_data_query_original_fea
(
keyword
string
comment
'query词'
,
query_cat_1
bigint
comment
'识别类目1'
,
query_cat_2
bigint
comment
'识别类目2'
,
query_cat_3
bigint
comment
'识别类目3'
,
query_cat_4
bigint
comment
'识别类目4'
,
query_cat_5
bigint
comment
'识别类目5'
,
query_cat_1
bigint
comment
'识别类目1'
,
query_cat_2
bigint
comment
'识别类目2'
,
query_cat_3
bigint
comment
'识别类目3'
,
query_cat_4
bigint
comment
'识别类目4'
,
query_cat_5
bigint
comment
'识别类目5'
,
query_brand_1
bigint
comment
'识别品牌1'
,
query_brand_2
bigint
comment
'识别品牌2'
,
query_brand_3
bigint
comment
'识别品牌3'
,
query_brand_1
bigint
comment
'识别品牌1'
,
query_brand_2
bigint
comment
'识别品牌2'
,
query_brand_3
bigint
comment
'识别品牌3'
,
query_gender
tinyint
comment
'识别性别,1是男,2是女, 0是没有'
,
query_gender
tinyint
comment
'识别性别,1是男,2是女, 0是没有'
,
query_contains_other_word
tinyint
comment
'是否含其他词'
,
query_word_size
tinyint
comment
'query分词个数'
,
query_search_pv
bigint
comment
'query搜索次数'
,
query_search_uv
bigint
comment
'query搜索人数'
query_search_uv
bigint
comment
'query搜索人数'
,
cart_brand_1
bigint
comment
'query最近加购的品牌Top5'
,
cart_brand_2
bigint
comment
'query最近加购的品牌Top5'
,
cart_brand_3
bigint
comment
'query最近加购的品牌Top5'
,
cart_brand_4
bigint
comment
'query最近加购的品牌Top5'
,
cart_brand_5
bigint
comment
'query最近加购的品牌Top5'
,
cart_category_1
bigint
comment
'query最近加购的品类Top5'
,
cart_category_2
bigint
comment
'query最近加购的品类Top5'
,
cart_category_3
bigint
comment
'query最近加购的品类Top5'
,
cart_category_4
bigint
comment
'query最近加购的品类Top5'
,
cart_category_5
bigint
comment
'query最近加购的品类Top5'
,
pay_brand_1
bigint
comment
'query最近购买的品牌Top5'
,
pay_brand_2
bigint
comment
'query最近购买的品牌Top5'
,
pay_brand_3
bigint
comment
'query最近购买的品牌Top5'
,
pay_brand_4
bigint
comment
'query最近购买的品牌Top5'
,
pay_brand_5
bigint
comment
'query最近购买的品牌Top5'
,
pay_category_1
bigint
comment
'query最近购买的品类Top5'
,
pay_category_2
bigint
comment
'query最近购买的品类Top5'
,
pay_category_3
bigint
comment
'query最近购买的品类Top5'
,
pay_category_4
bigint
comment
'query最近购买的品类Top5'
,
pay_category_5
bigint
comment
'query最近购买的品类Top5'
)
comment
'query原始特征'
row
format
delimited
fields
terminated
by
'
\t
'
stored
as
textfile
;
\ No newline at end of file
src/main/scripts/query_feature/create_query_semantic_feature_table.sql
0 → 100644
View file @
5c1d537f
create
external
table
if
not
exists
secoo_search
.
search_data_query_semantic_feature
(
keyword
string
comment
'query词'
,
query_cat_1
bigint
comment
'识别类目1'
,
query_cat_2
bigint
comment
'识别类目2'
,
query_cat_3
bigint
comment
'识别类目3'
,
query_cat_4
bigint
comment
'识别类目4'
,
query_cat_5
bigint
comment
'识别类目5'
,
query_brand_1
bigint
comment
'识别品牌1'
,
query_brand_2
bigint
comment
'识别品牌2'
,
query_brand_3
bigint
comment
'识别品牌3'
,
query_gender
tinyint
comment
'识别性别,1是男,2是女, 0是没有'
,
query_contains_other_word
tinyint
comment
'是否含其他词'
,
query_word_size
tinyint
comment
'query分词个数'
,
query_search_pv
bigint
comment
'query搜索次数'
,
query_search_uv
bigint
comment
'query搜索人数'
)
comment
'query原始特征'
row
format
delimited
fields
terminated
by
'
\t
'
stored
as
textfile
;
\ No newline at end of file
src/main/scripts/query_feature/insert_query_feature_table.sql
0 → 100644
View file @
5c1d537f
INSERT
overwrite
TABLE
secoo_search
.
search_data_query_original_feature
SELECT
T1
.
keyword
,
T1
.
query_cat_1
,
T1
.
query_cat_2
,
T1
.
query_cat_3
,
T1
.
query_cat_4
,
T1
.
query_cat_5
,
T1
.
query_brand_1
,
T1
.
query_brand_2
,
T1
.
query_brand_3
,
T1
.
query_gender
,
T1
.
query_contains_other_word
,
T1
.
query_word_size
,
T1
.
query_search_pv
,
T1
.
query_search_uv
,
T2
.
cart_brand_1
,
T2
.
cart_brand_2
,
T2
.
cart_brand_3
,
T2
.
cart_brand_4
,
T2
.
cart_brand_5
,
T2
.
cart_category_1
,
T2
.
cart_category_2
,
T2
.
cart_category_3
,
T2
.
cart_category_4
,
T2
.
cart_category_5
,
T2
.
pay_brand_1
,
T2
.
pay_brand_2
,
T2
.
pay_brand_3
,
T2
.
pay_brand_4
,
T2
.
pay_brand_5
,
T2
.
pay_category_1
,
T2
.
pay_category_2
,
T2
.
pay_category_3
,
T2
.
pay_category_4
,
T2
.
pay_category_5
FROM
secoo_search
.
search_data_query_semantic_feature
T1
LEFT
JOIN
secoo_search
.
search_query_brand_category
T2
ON
T1
.
keyword
=
T2
.
key_word
AND
T2
.
p_day
=
${
yesterday
}
\ No newline at end of file
src/main/scripts/query_feature/query_feature_extract.sh
View file @
5c1d537f
work_dir
=
"/data/zhaoyanchao/java/shell/query_feature/"
#搜索query源数据建表
hive
-e
"drop table secoo_search.search_data_
original_query_last_year
;"
hive
-f
"
$work_dir
"
create_query_
original
_table.sql
hive
-e
"drop table secoo_search.search_data_
query_original_feature
;"
hive
-f
"
$work_dir
"
create_query_
feature
_table.sql
#搜索query源数据表数据写入
today_param
=
$1
delta_day
=
1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
echo
${
yesterday
}
hive
--hivevar
yesterday
=
"'
$yesterday
'"
-f
"
$work_dir
"
insert_query_original_table.sql
# 删除原输出文件
hdfs dfs
-rm
-r
hdfs://tesla-cluster/apps/hive/warehouse/secoo_search.db/search_data_query_original_feature
# 提取特征到输出表
work_jar_dir
=
"/data/soft/data-warehouse_jar/"
yarn jar
"
${
work_jar_dir
}
"
search-model-data-1.0-SNAPSHOT-jar-with-dependencies.jar com.secoo.search.job.keyword.KeywordFeatureExtractJob
yarn jar /data/soft/data-warehouse_jar/search-model-data-1.0-SNAPSHOT-jar-with-dependencies.jar com.secoo.search.job.keyword.KeywordFeatureExtractJob
hive
--hivevar
yesterday
=
"'
$yesterday
'"
-f
"
$work_dir
"
insert_query_feature_table.sql
src/main/scripts/query_feature/query_feature_semantic_extract.sh
0 → 100644
View file @
5c1d537f
work_dir
=
"/data/zhaoyanchao/java/shell/query_feature/"
#搜索query源数据建表
hive
-e
"drop table secoo_search.search_data_original_query_last_year;"
hive
-f
"
$work_dir
"
create_query_original_table.sql
#搜索query源数据表数据写入
today_param
=
$1
delta_day
=
1
yesterday
=
`
date
-d
"
${
today_param
}
-
$delta_day
day"
"+%Y-%m-%d"
`
echo
${
yesterday
}
hive
--hivevar
yesterday
=
"'
$yesterday
'"
-f
"
$work_dir
"
insert_query_original_table.sql
# 删除原输出文件
hdfs dfs
-rm
-r
hdfs://tesla-cluster/apps/hive/warehouse/secoo_search.db/search_data_query_semantic_feature
# 提取特征到输出表
work_jar_dir
=
"/data/soft/data-warehouse_jar/"
yarn jar
"
${
work_jar_dir
}
"
search-model-data-1.0-SNAPSHOT-jar-with-dependencies.jar com.secoo.search.job.keyword.KeywordFeatureExtractJob
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment