This commit is contained in:
Aliaksandr Valialkin 2024-05-09 20:25:20 +02:00
parent 28db8795ee
commit 6da2f28d36
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB

View file

@ -10,16 +10,12 @@ import (
// the order of returned tokens is unspecified. // the order of returned tokens is unspecified.
func tokenizeStrings(dst, a []string) []string { func tokenizeStrings(dst, a []string) []string {
t := getTokenizer() t := getTokenizer()
m := t.m
for i, s := range a { for i, s := range a {
if i > 0 && s == a[i-1] { if i > 0 && s == a[i-1] {
// This string has been already tokenized // This string has been already tokenized
continue continue
} }
tokenizeString(m, s) dst = t.tokenizeString(dst, s)
}
for k := range t.m {
dst = append(dst, k)
} }
putTokenizer(t) putTokenizer(t)
@ -34,7 +30,8 @@ func (t *tokenizer) reset() {
clear(t.m) clear(t.m)
} }
func tokenizeString(dst map[string]struct{}, s string) { func (t *tokenizer) tokenizeString(dst []string, s string) []string {
m := t.m
for len(s) > 0 { for len(s) > 0 {
// Search for the next token. // Search for the next token.
nextIdx := len(s) nextIdx := len(s)
@ -55,10 +52,14 @@ func tokenizeString(dst map[string]struct{}, s string) {
} }
token := s[:nextIdx] token := s[:nextIdx]
if len(token) > 0 { if len(token) > 0 {
dst[token] = struct{}{} if _, ok := m[token]; ok {
m[token] = struct{}{}
dst = append(dst, token)
}
} }
s = s[nextIdx:] s = s[nextIdx:]
} }
return dst
} }
func isTokenRune(c rune) bool { func isTokenRune(c rune) bool {