This commit is contained in:
Aliaksandr Valialkin 2024-05-09 21:24:43 +02:00
parent 713172308e
commit b2ff38c41e
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB

View file

@ -79,38 +79,31 @@ func (t *tokenizer) tokenizeString(dst []string, s string) []string {
func (t *tokenizer) tokenizeStringUnicode(dst []string, s string) []string {
m := t.m
i := 0
for i < len(s) {
for len(s) > 0 {
// Search for the next token.
start := len(s)
for i < len(s) {
r, size := utf8.DecodeRuneInString(s[i:])
if !isTokenRune(r) {
i += size
continue
}
start = i
i += size
break
}
// Search for the end of the token.
end := len(s)
for i < len(s) {
r, size := utf8.DecodeRuneInString(s[i:])
n := len(s)
for offset, r := range s {
if isTokenRune(r) {
i += size
continue
}
end = i
i += size
n = offset
break
}
if end <= start {
}
s = s[n:]
// Search for the end of the token.
n = len(s)
for offset, r := range s {
if !isTokenRune(r) {
n = offset
break
}
}
if n == 0 {
break
}
// Register the token
token := s[start:end]
token := s[:n]
s = s[n:]
if _, ok := m[token]; !ok {
m[token] = struct{}{}
dst = append(dst, token)