mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
wip
This commit is contained in:
parent
3008c58ac8
commit
0f31264e83
10 changed files with 470 additions and 22 deletions
|
@ -213,11 +213,11 @@ func (p *parser) parseMetricExprOrFuncCall() (Expr, error) {
|
|||
// Metric epxression or bool expression or None.
|
||||
if isBool(ident) {
|
||||
be := &BoolExpr{
|
||||
B: strings.ToLower(ident) == "true",
|
||||
B: strings.EqualFold(ident, "true"),
|
||||
}
|
||||
return be, nil
|
||||
}
|
||||
if strings.ToLower(ident) == "none" {
|
||||
if strings.EqualFold(ident, "none") {
|
||||
nne := &NoneExpr{}
|
||||
return nne, nil
|
||||
}
|
||||
|
|
|
@ -112,7 +112,7 @@ func binaryOpNeqFunc(bfa *binaryOpFuncArg) ([]*timeseries, error) {
|
|||
}
|
||||
|
||||
func isUnionFunc(e metricsql.Expr) bool {
|
||||
if fe, ok := e.(*metricsql.FuncExpr); ok && (fe.Name == "" || strings.ToLower(fe.Name) == "union") {
|
||||
if fe, ok := e.(*metricsql.FuncExpr); ok && (fe.Name == "" || strings.EqualFold(fe.Name, "union")) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -303,7 +303,7 @@ func ensureSingleTimeseries(side string, be *metricsql.BinaryOpExpr, tss []*time
|
|||
func groupJoin(singleTimeseriesSide string, be *metricsql.BinaryOpExpr, rvsLeft, rvsRight, tssLeft, tssRight []*timeseries) ([]*timeseries, []*timeseries, error) {
|
||||
joinTags := be.JoinModifier.Args
|
||||
var skipTags []string
|
||||
if strings.ToLower(be.GroupModifier.Op) == "on" {
|
||||
if strings.EqualFold(be.GroupModifier.Op, "on") {
|
||||
skipTags = be.GroupModifier.Args
|
||||
}
|
||||
joinPrefix := ""
|
||||
|
|
|
@ -542,7 +542,7 @@ func execBinaryOpArgs(qt *querytracer.Tracer, ec *EvalConfig, exprFirst, exprSec
|
|||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if len(tssFirst) == 0 && strings.ToLower(be.Op) != "or" {
|
||||
if len(tssFirst) == 0 && !strings.EqualFold(be.Op, "or") {
|
||||
// Fast path: there is no sense in executing the exprSecond when exprFirst returns an empty result,
|
||||
// since the "exprFirst op exprSecond" would return an empty result in any case.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3349
|
||||
|
@ -1168,7 +1168,7 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string,
|
|||
return evalExpr(qt, ec, be)
|
||||
case "rate":
|
||||
if iafc != nil {
|
||||
if strings.ToLower(iafc.ae.Name) != "sum" {
|
||||
if !strings.EqualFold(iafc.ae.Name, "sum") {
|
||||
qt.Printf("do not apply instant rollup optimization for incremental aggregate %s()", iafc.ae.Name)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
|
@ -1214,7 +1214,7 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string,
|
|||
return evalExpr(qt, ec, be)
|
||||
case "max_over_time":
|
||||
if iafc != nil {
|
||||
if strings.ToLower(iafc.ae.Name) != "max" {
|
||||
if !strings.EqualFold(iafc.ae.Name, "max") {
|
||||
qt.Printf("do not apply instant rollup optimization for non-max incremental aggregate %s()", iafc.ae.Name)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
|
@ -1276,7 +1276,7 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string,
|
|||
return tss, nil
|
||||
case "min_over_time":
|
||||
if iafc != nil {
|
||||
if strings.ToLower(iafc.ae.Name) != "min" {
|
||||
if !strings.EqualFold(iafc.ae.Name, "min") {
|
||||
qt.Printf("do not apply instant rollup optimization for non-min incremental aggregate %s()", iafc.ae.Name)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
|
@ -1345,7 +1345,7 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string,
|
|||
"increase",
|
||||
"increase_pure",
|
||||
"sum_over_time":
|
||||
if iafc != nil && strings.ToLower(iafc.ae.Name) != "sum" {
|
||||
if iafc != nil && !strings.EqualFold(iafc.ae.Name, "sum") {
|
||||
qt.Printf("do not apply instant rollup optimization for non-sum incremental aggregate %s()", iafc.ae.Name)
|
||||
return evalAt(qt, timestamp, window)
|
||||
}
|
||||
|
|
|
@ -122,7 +122,7 @@ func maySortResults(e metricsql.Expr) bool {
|
|||
return false
|
||||
}
|
||||
case *metricsql.BinaryOpExpr:
|
||||
if strings.ToLower(v.Op) == "or" {
|
||||
if strings.EqualFold(v.Op, "or") {
|
||||
// Do not sort results for `a or b` in the same way as Prometheus does.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4763
|
||||
return false
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
|
||||
)
|
||||
|
||||
func getFilterBitmap(bitsLen int) *filterBitmap {
|
||||
|
@ -1276,6 +1277,9 @@ type anyCasePrefixFilter struct {
|
|||
fieldName string
|
||||
prefix string
|
||||
|
||||
prefixLowercaseOnce sync.Once
|
||||
prefixLowercase string
|
||||
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
}
|
||||
|
@ -1296,9 +1300,18 @@ func (pf *anyCasePrefixFilter) initTokens() {
|
|||
pf.tokens = getTokensSkipLast(pf.prefix)
|
||||
}
|
||||
|
||||
func (pf *anyCasePrefixFilter) getPrefixLowercase() string {
|
||||
pf.prefixLowercaseOnce.Do(pf.initPrefixLowercase)
|
||||
return pf.prefixLowercase
|
||||
}
|
||||
|
||||
func (pf *anyCasePrefixFilter) initPrefixLowercase() {
|
||||
pf.prefixLowercase = strings.ToLower(pf.prefix)
|
||||
}
|
||||
|
||||
func (pf *anyCasePrefixFilter) apply(bs *blockSearch, bm *filterBitmap) {
|
||||
fieldName := pf.fieldName
|
||||
prefixLowercase := strings.ToLower(pf.prefix)
|
||||
prefixLowercase := pf.getPrefixLowercase()
|
||||
|
||||
// Verify whether pf matches const column
|
||||
v := bs.csh.getConstColumnValue(fieldName)
|
||||
|
@ -1427,6 +1440,9 @@ type anyCasePhraseFilter struct {
|
|||
fieldName string
|
||||
phrase string
|
||||
|
||||
phraseLowercaseOnce sync.Once
|
||||
phraseLowercase string
|
||||
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
}
|
||||
|
@ -1444,9 +1460,18 @@ func (pf *anyCasePhraseFilter) initTokens() {
|
|||
pf.tokens = tokenizeStrings(nil, []string{pf.phrase})
|
||||
}
|
||||
|
||||
func (pf *anyCasePhraseFilter) getPhraseLowercase() string {
|
||||
pf.phraseLowercaseOnce.Do(pf.initPhraseLowercase)
|
||||
return pf.phraseLowercase
|
||||
}
|
||||
|
||||
func (pf *anyCasePhraseFilter) initPhraseLowercase() {
|
||||
pf.phraseLowercase = strings.ToLower(pf.phrase)
|
||||
}
|
||||
|
||||
func (pf *anyCasePhraseFilter) apply(bs *blockSearch, bm *filterBitmap) {
|
||||
fieldName := pf.fieldName
|
||||
phraseLowercase := strings.ToLower(pf.phrase)
|
||||
phraseLowercase := pf.getPhraseLowercase()
|
||||
|
||||
// Verify whether pf matches const column
|
||||
v := bs.csh.getConstColumnValue(fieldName)
|
||||
|
@ -2787,15 +2812,52 @@ func visitValues(bs *blockSearch, ch *columnHeader, bm *filterBitmap, f func(val
|
|||
}
|
||||
|
||||
func matchAnyCasePrefix(s, prefixLowercase string) bool {
|
||||
sLowercase := strings.ToLower(s)
|
||||
return matchPrefix(sLowercase, prefixLowercase)
|
||||
if len(prefixLowercase) == 0 {
|
||||
// Special case - empty prefix matches any non-empty string.
|
||||
return len(s) > 0
|
||||
}
|
||||
if len(prefixLowercase) > len(s) {
|
||||
return false
|
||||
}
|
||||
|
||||
if isASCIILowercase(s) {
|
||||
// Fast path - s is in lowercase
|
||||
return matchPrefix(s, prefixLowercase)
|
||||
}
|
||||
|
||||
// Slow path - convert s to lowercase before matching
|
||||
bb := bbPool.Get()
|
||||
bb.B = stringsutil.AppendLowercase(bb.B, s)
|
||||
sLowercase := bytesutil.ToUnsafeString(bb.B)
|
||||
ok := matchPrefix(sLowercase, prefixLowercase)
|
||||
bbPool.Put(bb)
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
func isASCIILowercase(s string) bool {
|
||||
for i := 0; i < len(s); i++ {
|
||||
c := s[i]
|
||||
if c >= utf8.RuneSelf || (c >= 'A' && c <= 'Z') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func matchPrefix(s, prefix string) bool {
|
||||
if len(prefix) == 0 {
|
||||
// Special case - empty prefix matches any string.
|
||||
return len(s) > 0
|
||||
}
|
||||
r, _ := utf8.DecodeRuneInString(prefix)
|
||||
if len(prefix) > len(s) {
|
||||
return false
|
||||
}
|
||||
|
||||
r := rune(prefix[0])
|
||||
if r >= utf8.RuneSelf {
|
||||
r, _ = utf8.DecodeRuneInString(prefix)
|
||||
}
|
||||
startsWithToken := isTokenRune(r)
|
||||
offset := 0
|
||||
for {
|
||||
|
@ -2806,7 +2868,10 @@ func matchPrefix(s, prefix string) bool {
|
|||
offset += n
|
||||
// Make sure that the found phrase contains non-token chars at the beginning
|
||||
if startsWithToken && offset > 0 {
|
||||
r, _ := utf8.DecodeLastRuneInString(s[:offset])
|
||||
r := rune(s[offset-1])
|
||||
if r >= utf8.RuneSelf {
|
||||
r, _ = utf8.DecodeLastRuneInString(s[:offset])
|
||||
}
|
||||
if r == utf8.RuneError || isTokenRune(r) {
|
||||
offset++
|
||||
continue
|
||||
|
@ -2853,8 +2918,27 @@ func matchSequence(s string, phrases []string) bool {
|
|||
}
|
||||
|
||||
func matchAnyCasePhrase(s, phraseLowercase string) bool {
|
||||
sLowercase := strings.ToLower(s)
|
||||
return matchPhrase(sLowercase, phraseLowercase)
|
||||
if len(phraseLowercase) == 0 {
|
||||
// Special case - empty phrase matches only empty string.
|
||||
return len(s) == 0
|
||||
}
|
||||
if len(phraseLowercase) > len(s) {
|
||||
return false
|
||||
}
|
||||
|
||||
if isASCIILowercase(s) {
|
||||
// Fast path - s is in lowercase
|
||||
return matchPhrase(s, phraseLowercase)
|
||||
}
|
||||
|
||||
// Slow path - convert s to lowercase before matching
|
||||
bb := bbPool.Get()
|
||||
bb.B = stringsutil.AppendLowercase(bb.B, s)
|
||||
sLowercase := bytesutil.ToUnsafeString(bb.B)
|
||||
ok := matchPhrase(sLowercase, phraseLowercase)
|
||||
bbPool.Put(bb)
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
func matchExactPrefix(s, prefix string) bool {
|
||||
|
@ -2863,6 +2947,7 @@ func matchExactPrefix(s, prefix string) bool {
|
|||
|
||||
func matchPhrase(s, phrase string) bool {
|
||||
if len(phrase) == 0 {
|
||||
// Special case - empty phrase matches only empty string.
|
||||
return len(s) == 0
|
||||
}
|
||||
n := getPhrasePos(s, phrase)
|
||||
|
@ -2870,10 +2955,25 @@ func matchPhrase(s, phrase string) bool {
|
|||
}
|
||||
|
||||
func getPhrasePos(s, phrase string) int {
|
||||
r, _ := utf8.DecodeRuneInString(phrase)
|
||||
if len(phrase) == 0 {
|
||||
return 0
|
||||
}
|
||||
if len(phrase) > len(s) {
|
||||
return -1
|
||||
}
|
||||
|
||||
r := rune(phrase[0])
|
||||
if r >= utf8.RuneSelf {
|
||||
r, _ = utf8.DecodeRuneInString(phrase)
|
||||
}
|
||||
startsWithToken := isTokenRune(r)
|
||||
r, _ = utf8.DecodeLastRuneInString(phrase)
|
||||
|
||||
r = rune(phrase[len(phrase)-1])
|
||||
if r >= utf8.RuneSelf {
|
||||
r, _ = utf8.DecodeLastRuneInString(phrase)
|
||||
}
|
||||
endsWithToken := isTokenRune(r)
|
||||
|
||||
pos := 0
|
||||
for {
|
||||
n := strings.Index(s[pos:], phrase)
|
||||
|
@ -2883,14 +2983,20 @@ func getPhrasePos(s, phrase string) int {
|
|||
pos += n
|
||||
// Make sure that the found phrase contains non-token chars at the beginning and at the end
|
||||
if startsWithToken && pos > 0 {
|
||||
r, _ := utf8.DecodeLastRuneInString(s[:pos])
|
||||
r := rune(s[pos-1])
|
||||
if r >= utf8.RuneSelf {
|
||||
r, _ = utf8.DecodeLastRuneInString(s[:pos])
|
||||
}
|
||||
if r == utf8.RuneError || isTokenRune(r) {
|
||||
pos++
|
||||
continue
|
||||
}
|
||||
}
|
||||
if endsWithToken && pos+len(phrase) < len(s) {
|
||||
r, _ := utf8.DecodeRuneInString(s[pos+len(phrase):])
|
||||
r := rune(s[pos+len(phrase)])
|
||||
if r >= utf8.RuneSelf {
|
||||
r, _ = utf8.DecodeRuneInString(s[pos+len(phrase):])
|
||||
}
|
||||
if r == utf8.RuneError || isTokenRune(r) {
|
||||
pos++
|
||||
continue
|
||||
|
|
|
@ -10,6 +10,74 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
|
||||
func TestMatchAnyCasePrefix(t *testing.T) {
|
||||
f := func(s, prefixLowercase string, resultExpected bool) {
|
||||
t.Helper()
|
||||
result := matchAnyCasePrefix(s, prefixLowercase)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
|
||||
}
|
||||
}
|
||||
|
||||
// empty prefix matches non-empty strings
|
||||
f("", "", false)
|
||||
f("foo", "", true)
|
||||
f("тест", "", true)
|
||||
|
||||
// empty string doesn't match non-empty prefix
|
||||
f("", "foo", false)
|
||||
f("", "тест", false)
|
||||
|
||||
// full match
|
||||
f("foo", "foo", true)
|
||||
f("FOo", "foo", true)
|
||||
f("Test ТЕСт 123", "test тест 123", true)
|
||||
|
||||
// prefix match
|
||||
f("foo", "f", true)
|
||||
f("foo тест bar", "те", true)
|
||||
f("foo ТЕСТ bar", "те", true)
|
||||
|
||||
// mismatch
|
||||
f("foo", "o", false)
|
||||
f("тест", "foo", false)
|
||||
f("Тест", "ест", false)
|
||||
}
|
||||
|
||||
func TestMatchAnyCasePhrase(t *testing.T) {
|
||||
f := func(s, phraseLowercase string, resultExpected bool) {
|
||||
t.Helper()
|
||||
result := matchAnyCasePhrase(s, phraseLowercase)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
|
||||
}
|
||||
}
|
||||
|
||||
// empty phrase matches only empty string
|
||||
f("", "", true)
|
||||
f("foo", "", false)
|
||||
f("тест", "", false)
|
||||
|
||||
// empty string doesn't match non-empty phrase
|
||||
f("", "foo", false)
|
||||
f("", "тест", false)
|
||||
|
||||
// full match
|
||||
f("foo", "foo", true)
|
||||
f("FOo", "foo", true)
|
||||
f("Test ТЕСт 123", "test тест 123", true)
|
||||
|
||||
// phrase match
|
||||
f("a foo", "foo", true)
|
||||
f("foo тест bar", "тест", true)
|
||||
f("foo ТЕСТ bar", "тест bar", true)
|
||||
|
||||
// mismatch
|
||||
f("foo", "fo", false)
|
||||
f("тест", "foo", false)
|
||||
f("Тест", "ест", false)
|
||||
}
|
||||
|
||||
func TestMatchLenRange(t *testing.T) {
|
||||
f := func(s string, minLen, maxLen uint64, resultExpected bool) {
|
||||
t.Helper()
|
||||
|
|
128
lib/logstorage/filters_timing_test.go
Normal file
128
lib/logstorage/filters_timing_test.go
Normal file
|
@ -0,0 +1,128 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkMatchAnyCasePrefix(b *testing.B) {
|
||||
b.Run("match-ascii-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "err", []string{"error here", "another error here", "foo bar baz error"}, true)
|
||||
})
|
||||
b.Run("match-ascii-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "err", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, true)
|
||||
})
|
||||
b.Run("match-unicode-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "err", []string{"error здесь", "another error здесь", "тест bar baz error"}, true)
|
||||
})
|
||||
b.Run("match-unicode-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "err", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, true)
|
||||
})
|
||||
|
||||
b.Run("mismatch-partial-ascii-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "rror", []string{"error here", "another error here", "foo bar baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-partial-ascii-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "rror", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-partial-unicode-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "rror", []string{"error здесь", "another error здесь", "тест bar baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-partial-unicode-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "rror", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, false)
|
||||
})
|
||||
|
||||
b.Run("mismatch-full-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "warning", []string{"error here", "another error here", "foo bar baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-full-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "warning", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-full-unicode-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "warning", []string{"error здесь", "another error здесь", "тест bar baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-full-unicode-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePrefix(b, "warning", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, false)
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkMatchAnyCasePrefix(b *testing.B, phraseLowercase string, a []string, resultExpected bool) {
|
||||
n := 0
|
||||
for _, s := range a {
|
||||
n += len(s)
|
||||
}
|
||||
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(n))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
for _, s := range a {
|
||||
result := matchAnyCasePrefix(s, phraseLowercase)
|
||||
if result != resultExpected {
|
||||
panic(fmt.Errorf("unexpected result for matchAnyCasePrefix(%q, %q); got %v; want %v", s, phraseLowercase, result, resultExpected))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkMatchAnyCasePhrase(b *testing.B) {
|
||||
b.Run("match-ascii-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "error", []string{"error here", "another error here", "foo bar baz error"}, true)
|
||||
})
|
||||
b.Run("match-ascii-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "error", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, true)
|
||||
})
|
||||
b.Run("match-unicode-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "error", []string{"error здесь", "another error здесь", "тест bar baz error"}, true)
|
||||
})
|
||||
b.Run("match-unicode-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "error", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, true)
|
||||
})
|
||||
|
||||
b.Run("mismatch-partial-ascii-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "rror", []string{"error here", "another error here", "foo bar baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-partial-ascii-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "rror", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-partial-unicode-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "rror", []string{"error здесь", "another error здесь", "тест bar baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-partial-unicode-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "rror", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, false)
|
||||
})
|
||||
|
||||
b.Run("mismatch-full-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "warning", []string{"error here", "another error here", "foo bar baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-full-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "warning", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-full-unicode-lowercase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "warning", []string{"error здесь", "another error здесь", "тест bar baz error"}, false)
|
||||
})
|
||||
b.Run("mismatch-full-unicode-mixcase", func(b *testing.B) {
|
||||
benchmarkMatchAnyCasePhrase(b, "warning", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, false)
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkMatchAnyCasePhrase(b *testing.B, phraseLowercase string, a []string, resultExpected bool) {
|
||||
n := 0
|
||||
for _, s := range a {
|
||||
n += len(s)
|
||||
}
|
||||
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(n))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
for _, s := range a {
|
||||
result := matchAnyCasePhrase(s, phraseLowercase)
|
||||
if result != resultExpected {
|
||||
panic(fmt.Errorf("unexpected result for matchAnyCasePhrase(%q, %q); got %v; want %v", s, phraseLowercase, result, resultExpected))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
|
@ -1,5 +1,10 @@
|
|||
package stringsutil
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// LimitStringLen limits the length of s with maxLen.
|
||||
//
|
||||
// If len(s) > maxLen, then s is replaced with "s_prefix..s_suffix",
|
||||
|
@ -14,3 +19,33 @@ func LimitStringLen(s string, maxLen int) string {
|
|||
n := (maxLen / 2) - 1
|
||||
return s[:n] + ".." + s[len(s)-n:]
|
||||
}
|
||||
|
||||
// AppendLowercase appends lowercase s to dst and returns the result.
|
||||
//
|
||||
// It is faster alternative to strings.ToLower.
|
||||
func AppendLowercase(dst []byte, s string) []byte {
|
||||
dstLen := len(dst)
|
||||
|
||||
// Try fast path at first by assuming that s contains only ASCII chars.
|
||||
hasUnicodeChars := false
|
||||
for i := 0; i < len(s); i++ {
|
||||
c := s[i]
|
||||
if c >= utf8.RuneSelf {
|
||||
hasUnicodeChars = true
|
||||
break
|
||||
}
|
||||
if c >= 'A' && c <= 'Z' {
|
||||
c += 'a' - 'A'
|
||||
}
|
||||
dst = append(dst, c)
|
||||
}
|
||||
if hasUnicodeChars {
|
||||
// Slow path - s contains non-ASCII chars. Use Unicode encoding.
|
||||
dst = dst[:dstLen]
|
||||
for _, r := range s {
|
||||
r = unicode.ToLower(r)
|
||||
dst = utf8.AppendRune(dst, r)
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
|
|
@ -22,3 +22,19 @@ func TestLimitStringLen(t *testing.T) {
|
|||
f("abcde", 4, "a..e")
|
||||
f("abcde", 5, "abcde")
|
||||
}
|
||||
|
||||
func TestAppendLowercase(t *testing.T) {
|
||||
f := func(s, resultExpected string) {
|
||||
t.Helper()
|
||||
|
||||
result := AppendLowercase(nil, s)
|
||||
if string(result) != resultExpected {
|
||||
t.Fatalf("unexpected result; got %q; want %q", result, resultExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("", "")
|
||||
f("foo", "foo")
|
||||
f("FOO", "foo")
|
||||
f("foo БаР baz 123", "foo бар baz 123")
|
||||
}
|
||||
|
|
95
lib/stringsutil/stringsutil_timing_test.go
Normal file
95
lib/stringsutil/stringsutil_timing_test.go
Normal file
|
@ -0,0 +1,95 @@
|
|||
package stringsutil
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkAppendLowercase(b *testing.B) {
|
||||
b.Run("ascii-all-lowercase", func(b *testing.B) {
|
||||
benchmarkAppendLowercase(b, []string{"foo bar baz abc def", "23k umlkds", "lq, poweri2349)"})
|
||||
})
|
||||
b.Run("ascii-some-uppercase", func(b *testing.B) {
|
||||
benchmarkAppendLowercase(b, []string{"Foo Bar baz ABC def", "23k umlKDs", "lq, Poweri2349)"})
|
||||
})
|
||||
b.Run("ascii-all-uppercase", func(b *testing.B) {
|
||||
benchmarkAppendLowercase(b, []string{"FOO BAR BAZ ABC DEF", "23K UMLKDS", "LQ, POWERI2349)"})
|
||||
})
|
||||
b.Run("unicode-all-lowercase", func(b *testing.B) {
|
||||
benchmarkAppendLowercase(b, []string{"хщцукодл длобючф дл", "23и юбывлц", "лф, длощшу2349)"})
|
||||
})
|
||||
b.Run("unicode-some-uppercase", func(b *testing.B) {
|
||||
benchmarkAppendLowercase(b, []string{"Хщцукодл Длобючф ДЛ", "23и юбыВЛц", "лф, Длощшу2349)"})
|
||||
})
|
||||
b.Run("unicode-all-uppercase", func(b *testing.B) {
|
||||
benchmarkAppendLowercase(b, []string{"ХЩЦУКОДЛ ДЛОБЮЧФ ДЛ", "23И ЮБЫВЛЦ", "ЛФ, ДЛОЩШУ2349)"})
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkAppendLowercase(b *testing.B, a []string) {
|
||||
n := 0
|
||||
for _, s := range a {
|
||||
n += len(s)
|
||||
}
|
||||
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(n))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var buf []byte
|
||||
var n uint64
|
||||
for pb.Next() {
|
||||
buf = buf[:0]
|
||||
for _, s := range a {
|
||||
buf = AppendLowercase(buf, s)
|
||||
}
|
||||
n += uint64(len(buf))
|
||||
}
|
||||
GlobalSink.Add(n)
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkStringsToLower(b *testing.B) {
|
||||
b.Run("ascii-all-lowercase", func(b *testing.B) {
|
||||
benchmarkStringsToLower(b, []string{"foo bar baz abc def", "23k umlkds", "lq, poweri2349)"})
|
||||
})
|
||||
b.Run("ascii-some-uppercase", func(b *testing.B) {
|
||||
benchmarkStringsToLower(b, []string{"Foo Bar baz ABC def", "23k umlKDs", "lq, Poweri2349)"})
|
||||
})
|
||||
b.Run("ascii-all-uppercase", func(b *testing.B) {
|
||||
benchmarkStringsToLower(b, []string{"FOO BAR BAZ ABC DEF", "23K UMLKDS", "LQ, POWERI2349)"})
|
||||
})
|
||||
b.Run("unicode-all-lowercase", func(b *testing.B) {
|
||||
benchmarkStringsToLower(b, []string{"хщцукодл длобючф дл", "23и юбывлц", "лф, длощшу2349)"})
|
||||
})
|
||||
b.Run("unicode-some-uppercase", func(b *testing.B) {
|
||||
benchmarkStringsToLower(b, []string{"Хщцукодл Длобючф ДЛ", "23и юбыВЛц", "лф, Длощшу2349)"})
|
||||
})
|
||||
b.Run("unicode-all-uppercase", func(b *testing.B) {
|
||||
benchmarkStringsToLower(b, []string{"ХЩЦУКОДЛ ДЛОБЮЧФ ДЛ", "23И ЮБЫВЛЦ", "ЛФ, ДЛОЩШУ2349)"})
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkStringsToLower(b *testing.B, a []string) {
|
||||
n := 0
|
||||
for _, s := range a {
|
||||
n += len(s)
|
||||
}
|
||||
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(n))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var buf []byte
|
||||
var n uint64
|
||||
for pb.Next() {
|
||||
for _, s := range a {
|
||||
sLower := strings.ToLower(s)
|
||||
buf = append(buf, sLower...)
|
||||
}
|
||||
n += uint64(len(buf))
|
||||
}
|
||||
GlobalSink.Add(n)
|
||||
})
|
||||
}
|
||||
|
||||
var GlobalSink atomic.Uint64
|
Loading…
Reference in a new issue