Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
suggest-task
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
田川
suggest-task
Commits
c70de180
Commit
c70de180
authored
Dec 11, 2019
by
zhaoyanchao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
过滤掉特殊字符 '\ufffc', 全角输入的词转为半角
parent
78e27da4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
55 additions
and
67 deletions
+55
-67
.gitignore
.gitignore
+2
-0
suggest-task-dependency.go
main/suggest-task-dependency.go
+21
-32
suggest-task.go
main/suggest-task.go
+1
-27
test.go
main/test.go
+31
-8
No files found.
.gitignore
View file @
c70de180
.idea/
.idea/
main/info.log
main/info.log
main/test.go
\ No newline at end of file
main/suggest-task-dependency.go
View file @
c70de180
package
main
package
main
import
(
import
(
"container/list"
"strings"
"strings"
"github.com/mozillazg/go-pinyin"
"github.com/mozillazg/go-pinyin"
"fmt"
"fmt"
...
@@ -41,36 +40,6 @@ var RUN_ENV = prod_env
...
@@ -41,36 +40,6 @@ var RUN_ENV = prod_env
/************************* 下面是 util 方法 *****************************/
/************************* 下面是 util 方法 *****************************/
// 求阶乘
func
factorial
(
n
int64
)
int64
{
if
n
==
1
{
return
1
}
return
n
*
factorial
(
n
-
1
)
}
// 求数组的全排列, 放置到list 中
func
permutation
(
arr
[]
string
,
begin
int
,
lst
*
list
.
List
)
{
if
begin
==
len
(
arr
)
{
lst
.
PushBack
(
strings
.
Join
(
arr
,
" "
))
return
}
for
i
:=
begin
;
i
<
len
(
arr
);
i
++
{
tmp
:=
arr
[
begin
]
arr
[
begin
]
=
arr
[
i
]
arr
[
i
]
=
tmp
permutation
(
arr
,
begin
+
1
,
lst
)
arr
[
i
]
=
arr
[
begin
]
arr
[
begin
]
=
tmp
}
}
//func main() {
// var h = []string{"a","b","c","d"}
// lst := list.New()
// permutation(h,1,lst)
// for p := lst.Front(); p != nil; p = p.Next() {
// fmt.Println(p.Value)
// }
//}
func
convertToPinyin
(
str
string
)
string
{
func
convertToPinyin
(
str
string
)
string
{
var
ret
string
var
ret
string
...
@@ -91,7 +60,7 @@ func convertToPinyin(str string) string {
...
@@ -91,7 +60,7 @@ func convertToPinyin(str string) string {
func
cleanKeyword
(
keyword
string
)
string
{
func
cleanKeyword
(
keyword
string
)
string
{
out
,
err
:=
t2s
.
Convert
(
keyword
)
out
,
err
:=
t2s
.
Convert
(
keyword
)
if
err
!=
nil
{
fmt
.
Println
(
err
)
}
if
err
!=
nil
{
fmt
.
Println
(
err
)
}
keyword
=
strings
.
T
rimSpace
(
strings
.
ToLower
(
out
))
keyword
=
strings
.
T
oLower
(
strings
.
Trim
(
DBC2SBC
(
strings
.
TrimSpace
(
out
)),
"
\ufffc
|,"
))
return
strings
.
Join
(
strings
.
Fields
(
keyword
),
" "
)
return
strings
.
Join
(
strings
.
Fields
(
keyword
),
" "
)
}
}
...
@@ -101,3 +70,22 @@ func strToInt(str string) int32 {
...
@@ -101,3 +70,22 @@ func strToInt(str string) int32 {
if
err
!=
nil
{
fmt
.
Println
(
err
)
}
if
err
!=
nil
{
fmt
.
Println
(
err
)
}
return
int32
(
v
)
return
int32
(
v
)
}
}
// 全角转半角
func
DBC2SBC
(
s
string
)
string
{
var
strLst
[]
string
for
_
,
i
:=
range
s
{
insideCode
:=
i
if
insideCode
==
12288
{
insideCode
=
32
}
else
{
insideCode
-=
65248
}
if
insideCode
<
32
||
insideCode
>
126
{
strLst
=
append
(
strLst
,
string
(
i
))
}
else
{
strLst
=
append
(
strLst
,
string
(
insideCode
))
}
}
return
strings
.
Join
(
strLst
,
""
)
}
\ No newline at end of file
main/suggest-task.go
View file @
c70de180
...
@@ -19,7 +19,6 @@ import (
...
@@ -19,7 +19,6 @@ import (
"time"
"time"
"unicode"
"unicode"
"unicode/utf8"
"unicode/utf8"
"container/list"
)
)
type
Word
struct
{
type
Word
struct
{
...
@@ -109,7 +108,7 @@ func main() {
...
@@ -109,7 +108,7 @@ func main() {
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
if
err
!=
nil
{
log
.
Print
(
err
.
Error
())
}
defer
datawareDB
.
Close
()
defer
datawareDB
.
Close
()
fmt
.
Printf
(
"
Cost %d ms
\n
"
,
time
.
Since
(
startTime
)
.
Nanoseconds
()
/
1e6
)
fmt
.
Printf
(
"
%s task finish Cost %d ms
\n
"
,
dateStr
,
time
.
Since
(
startTime
)
.
Nanoseconds
()
/
1e6
)
}
}
func
checkUnusedData
(
bulkProcessor
*
elastic
.
BulkProcessor
)
{
func
checkUnusedData
(
bulkProcessor
*
elastic
.
BulkProcessor
)
{
...
@@ -274,9 +273,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
...
@@ -274,9 +273,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
if
v
,
isExist
:=
wordMap
.
Load
(
key
);
isExist
{
if
v
,
isExist
:=
wordMap
.
Load
(
key
);
isExist
{
merge
(
w
,
v
)
merge
(
w
,
v
)
}
else
if
v
,
isExist
:=
existSameWord
(
key
);
isExist
{
fmt
.
Println
(
"find same word, now is:"
+
w
.
Keyword
+
" exist is:"
+
v
.
(
*
Word
)
.
Keyword
)
merge
(
w
,
v
)
}
}
addWord
(
w
,
bulkProcessor
)
addWord
(
w
,
bulkProcessor
)
...
@@ -288,28 +284,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
...
@@ -288,28 +284,6 @@ func queryIndex(idFlag int, db *sql.DB, bulkProcessor *elastic.BulkProcessor, wg
}
}
// 以空格为分隔符分开的词,如果第一个词相同,其余的词 只有顺序差异,则视为相同的记录,需要合并
// 如 nike 男 鞋, nike 鞋 男。 第一个词相同,且总体词的集合相同,则合并
// 而 nike 男鞋 和 男鞋 nike 则不能合并(nike 男鞋 合并到 男鞋 nike 上,则减少了输入nike 时的可能提示 )
func
existSameWord
(
keyword
string
)
(
value
interface
{},
ok
bool
)
{
fields
:=
strings
.
Fields
(
keyword
)
length
:=
len
(
fields
)
if
length
==
1
{
return
wordMap
.
Load
(
keyword
)
}
if
length
>
5
{
return
nil
,
false
}
lst
:=
list
.
New
()
permutation
(
fields
,
1
,
lst
)
for
p
:=
lst
.
Front
();
p
!=
nil
;
p
=
p
.
Next
()
{
if
v
,
isExist
:=
wordMap
.
Load
(
p
.
Value
);
isExist
{
return
v
,
true
}
}
return
nil
,
false
}
func
merge
(
word
*
Word
,
v
interface
{})
{
func
merge
(
word
*
Word
,
v
interface
{})
{
...
...
main/test.go
View file @
c70de180
...
@@ -3,12 +3,14 @@ package main
...
@@ -3,12 +3,14 @@ package main
import
(
import
(
"sync"
"sync"
"fmt"
"fmt"
"github.com/liuzl/gocc"
)
)
var
tmap
sync
.
Map
var
tmap
sync
.
Map
var
t2s1
,
_
=
gocc
.
New
(
"t2s"
)
func
main
()
{
func
main
()
{
//var t2s, _ = gocc.New("t2s")
//var _, err = t2s.Convert("中國")
//var _, err = t2s.Convert("中國")
//if err != nil { fmt.Println("succ")}
//if err != nil { fmt.Println("succ")}
...
@@ -31,9 +33,9 @@ func main() {
...
@@ -31,9 +33,9 @@ func main() {
//}
//}
add
()
var
val
,
_
=
tmap
.
Load
(
"a
"
)
var
s
=
cleanKeyword
(
"zhong
"
)
fmt
.
Print
(
val
)
fmt
.
Print
(
s
)
}
}
func
add
()
{
func
add
()
{
tmap
.
Store
(
"a"
,
"b"
)
tmap
.
Store
(
"a"
,
"b"
)
...
@@ -41,11 +43,32 @@ func add() {
...
@@ -41,11 +43,32 @@ func add() {
fmt
.
Print
(
val
)
fmt
.
Print
(
val
)
}
}
//// 求阶乘
//func cleanKeyword(keyword string) string {
//func factorial(n int64) int64 {
// out, err := t2s1.Convert(keyword)
// if n == 1 { return 1}
// if err != nil { fmt.Println(err) }
// return n * factorial(n-1)
// keyword = strings.ToLower(strings.Trim(DBC2SBC(strings.TrimSpace(out)),"\ufffc|,"))
// return strings.Join(strings.Fields(keyword)," ")
//}
//}
//
//
//// 全角转半角
//func DBC2SBC(s string) string {
// var strLst []string
// for _, i := range s {
// insideCode := i
// if insideCode == 12288 {
// insideCode = 32
// } else {
// insideCode -= 65248
// }
// if insideCode < 32 || insideCode > 126 {
// strLst = append(strLst, string(i))
// } else {
// strLst = append(strLst, string(insideCode))
// }
// }
// return strings.Join(strLst, "")
//}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment