Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
suggest-task
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
suggest-task
Commits
5b449033
Commit
5b449033
authored
Aug 02, 2022
by
王明范
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
merge tag
parent
c31088f6
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
93 additions
and
2 deletions
+93
-2
SuggestTask.java
suggest-task/src/main/java/com/secoo/so/suggest/task/SuggestTask.java
+68
-1
StringUtils.java
suggest-task/src/main/java/com/secoo/so/suggest/util/StringUtils.java
+25
-1
No files found.
suggest-task/src/main/java/com/secoo/so/suggest/task/SuggestTask.java
View file @
5b449033
...
...
@@ -243,7 +243,7 @@ public class SuggestTask {
esSuggestKeywordMap
.
put
(
fillKeyword
,
buildDefaultEsSuggestKeywordInfo
(
fillKeyword
));
}
}
mergeKeywordTag
(
esSuggestKeywordMap
);
// 处理部分keyword,合并为其他词的tag
// 过滤词
List
<
EsSuggestKeywordInfo
>
suggestKeywordInfoList
=
new
ArrayList
<>();
int
processCount
=
0
;
...
...
@@ -277,6 +277,73 @@ public class SuggestTask {
saveSuggestKeywordToEs
(
suggestKeywordInfoList
);
}
}
private
static
void
mergeKeywordTag
(
ConcurrentHashMap
<
String
,
EsSuggestKeywordInfo
>
esSuggestKeywordMap
)
{
int
maxCount
=
100
;
List
<
String
>
keyList
=
new
ArrayList
<>(
esSuggestKeywordMap
.
keySet
());
Collections
.
sort
(
keyList
);
// 按照字符排序,确保扩展词都在本词后面
for
(
int
i
=
0
;
i
<
keyList
.
size
();
i
++)
{
String
word
=
keyList
.
get
(
i
);
EsSuggestKeywordInfo
suggestInfo
=
esSuggestKeywordMap
.
get
(
word
);
int
wordLen
=
StringUtils
.
getByteLength
(
word
);
int
length
=
word
.
length
();
if
(
wordLen
>
3
&&
wordLen
<=
15
&&
StringUtils
.
isBlank
(
suggestInfo
.
getSuggestTags
()))
{
boolean
isEN
=
StringUtils
.
isEnAndMidSpaceStr
(
word
);
List
<
EsSuggestKeywordInfo
>
suggestList
=
new
ArrayList
<>();
int
keyCount
=
0
;
for
(
int
j
=
i
+
1
;
j
<
keyList
.
size
();
j
++)
{
if
(
keyCount
>
maxCount
)
{
break
;
}
String
fulWord
=
keyList
.
get
(
j
);
int
fulLen
=
StringUtils
.
getByteLength
(
fulWord
);
if
(
fulWord
.
startsWith
(
word
)
&&
fulLen
-
wordLen
>
3
&&
fulLen
-
wordLen
<=
12
)
{
String
subWord
=
fulWord
.
substring
(
length
,
length
+
1
);
String
rightWord
=
fulWord
.
substring
(
length
,
fulWord
.
length
()).
trim
();
if
(
isEN
&&
StringUtils
.
isEnStr
(
subWord
)){
continue
;
}
int
rightLen
=
StringUtils
.
getByteLength
(
rightWord
);
if
(
rightLen
-
wordLen
<=
3
||
rightLen
-
wordLen
>
12
)
{
break
;
}
suggestList
.
add
(
esSuggestKeywordMap
.
get
(
fulWord
));
keyCount
++;
}
else
{
break
;
}
}
if
(
suggestList
.
size
()
>
0
)
{
Collections
.
sort
(
suggestList
,
(
t1
,
t2
)
->
{
Double
score1
=
t1
.
getWordABRank
();
Double
score2
=
t2
.
getWordABRank
();
if
(
score1
!=
null
&&
score2
!=
null
)
{
return
score2
.
compareTo
(
score1
);
}
else
if
(
score1
==
null
&&
score2
==
null
)
{
return
0
;
}
else
if
(
score1
==
null
)
{
return
1
;
}
else
{
return
-
1
;
}
});
StringBuffer
sb
=
new
StringBuffer
(
""
);
for
(
int
k
=
0
,
count
=
0
;
k
<
suggestList
.
size
()
&&
count
<
3
;
k
++,
count
++)
{
EsSuggestKeywordInfo
info
=
suggestList
.
get
(
k
);
String
fulWord
=
info
.
getKeyword
();
int
fulLen
=
fulWord
.
length
();
String
subWord
=
fulWord
.
substring
(
length
,
fulLen
).
trim
();
if
(
k
>
0
)
{
sb
.
append
(
","
);
}
sb
.
append
(
subWord
);
}
if
(
sb
.
length
()
>
0
)
{
esSuggestKeywordMap
.
get
(
word
).
setSuggestTags
(
sb
.
toString
());
}
}
}
}
}
private
static
EsSuggestKeywordInfo
buildDefaultEsSuggestKeywordInfo
(
String
keyword
)
{
EsSuggestKeywordInfo
esSuggestKeywordInfo
=
new
EsSuggestKeywordInfo
();
...
...
suggest-task/src/main/java/com/secoo/so/suggest/util/StringUtils.java
View file @
5b449033
...
...
@@ -2067,6 +2067,17 @@ public abstract class StringUtils {
}
/**
* 判断是否是英文字符串,两边有空格认为不是英文,包含在中间的空格认为是英文
*/
public
static
boolean
isEnAndMidSpaceStr
(
String
word
)
{
if
(
word
.
startsWith
(
" "
)
||
word
.
endsWith
(
" "
))
{
return
false
;
}
boolean
result
=
word
.
trim
().
matches
(
"[a-zA-Z\\s]+"
);
return
result
;
}
/**
* 判断是否包含中文
*/
public
static
boolean
isContainChStr
(
String
word
)
{
...
...
@@ -2109,6 +2120,19 @@ public abstract class StringUtils {
return
0
;
}
public
static
void
main
(
String
[]
arg
)
{
String
word
=
"ab c "
;
String
word1
=
"ab c 中文"
;
int
wordLen
=
StringUtils
.
getByteLength
(
word
);
int
len
=
StringUtils
.
getByteLength
(
word1
);
String
aaa
=
word1
.
substring
(
wordLen
,
wordLen
+
1
);
String
bbb
=
word1
.
substring
(
wordLen
,
word1
.
length
());
System
.
out
.
println
(
isEnAndMidSpaceStr
(
word
));
System
.
out
.
println
(
isEnAndMidSpaceStr
(
bbb
));
System
.
out
.
println
(
aaa
);
System
.
out
.
println
(
bbb
);
}
/**
* 32位md5加密
*/
...
...
@@ -2153,7 +2177,7 @@ public abstract class StringUtils {
*
* <p>If the stripChars String is {@code null}, whitespace is
* stripped as defined by {@link Character#isWhitespace(char)}.
* Alternatively use {@link #strip(String)}.</p>
* Alternatively use {@link #strip(String
, String
)}.</p>
*
* <pre>
* StringUtils.strip(null, *) = null
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment