From 0f31264e837817eb5b3b9be73b806b7c8cb89b04 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sun, 28 Apr 2024 16:21:08 +0200 Subject: [PATCH] wip --- app/vmselect/graphiteql/parser.go | 4 +- app/vmselect/promql/binary_op.go | 4 +- app/vmselect/promql/eval.go | 10 +- app/vmselect/promql/exec.go | 2 +- lib/logstorage/filters.go | 130 +++++++++++++++++++-- lib/logstorage/filters_test.go | 68 +++++++++++ lib/logstorage/filters_timing_test.go | 128 ++++++++++++++++++++ lib/stringsutil/stringsutil.go | 35 ++++++ lib/stringsutil/stringsutil_test.go | 16 +++ lib/stringsutil/stringsutil_timing_test.go | 95 +++++++++++++++ 10 files changed, 470 insertions(+), 22 deletions(-) create mode 100644 lib/logstorage/filters_timing_test.go create mode 100644 lib/stringsutil/stringsutil_timing_test.go diff --git a/app/vmselect/graphiteql/parser.go b/app/vmselect/graphiteql/parser.go index b827c0f0e..a56bf74e6 100644 --- a/app/vmselect/graphiteql/parser.go +++ b/app/vmselect/graphiteql/parser.go @@ -213,11 +213,11 @@ func (p *parser) parseMetricExprOrFuncCall() (Expr, error) { // Metric epxression or bool expression or None. if isBool(ident) { be := &BoolExpr{ - B: strings.ToLower(ident) == "true", + B: strings.EqualFold(ident, "true"), } return be, nil } - if strings.ToLower(ident) == "none" { + if strings.EqualFold(ident, "none") { nne := &NoneExpr{} return nne, nil } diff --git a/app/vmselect/promql/binary_op.go b/app/vmselect/promql/binary_op.go index 0cf2f7528..ac347e504 100644 --- a/app/vmselect/promql/binary_op.go +++ b/app/vmselect/promql/binary_op.go @@ -112,7 +112,7 @@ func binaryOpNeqFunc(bfa *binaryOpFuncArg) ([]*timeseries, error) { } func isUnionFunc(e metricsql.Expr) bool { - if fe, ok := e.(*metricsql.FuncExpr); ok && (fe.Name == "" || strings.ToLower(fe.Name) == "union") { + if fe, ok := e.(*metricsql.FuncExpr); ok && (fe.Name == "" || strings.EqualFold(fe.Name, "union")) { return true } return false @@ -303,7 +303,7 @@ func ensureSingleTimeseries(side string, be *metricsql.BinaryOpExpr, tss []*time func groupJoin(singleTimeseriesSide string, be *metricsql.BinaryOpExpr, rvsLeft, rvsRight, tssLeft, tssRight []*timeseries) ([]*timeseries, []*timeseries, error) { joinTags := be.JoinModifier.Args var skipTags []string - if strings.ToLower(be.GroupModifier.Op) == "on" { + if strings.EqualFold(be.GroupModifier.Op, "on") { skipTags = be.GroupModifier.Args } joinPrefix := "" diff --git a/app/vmselect/promql/eval.go b/app/vmselect/promql/eval.go index e4ab2f42e..d7cffb08b 100644 --- a/app/vmselect/promql/eval.go +++ b/app/vmselect/promql/eval.go @@ -542,7 +542,7 @@ func execBinaryOpArgs(qt *querytracer.Tracer, ec *EvalConfig, exprFirst, exprSec if err != nil { return nil, nil, err } - if len(tssFirst) == 0 && strings.ToLower(be.Op) != "or" { + if len(tssFirst) == 0 && !strings.EqualFold(be.Op, "or") { // Fast path: there is no sense in executing the exprSecond when exprFirst returns an empty result, // since the "exprFirst op exprSecond" would return an empty result in any case. // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3349 @@ -1168,7 +1168,7 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string, return evalExpr(qt, ec, be) case "rate": if iafc != nil { - if strings.ToLower(iafc.ae.Name) != "sum" { + if !strings.EqualFold(iafc.ae.Name, "sum") { qt.Printf("do not apply instant rollup optimization for incremental aggregate %s()", iafc.ae.Name) return evalAt(qt, timestamp, window) } @@ -1214,7 +1214,7 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string, return evalExpr(qt, ec, be) case "max_over_time": if iafc != nil { - if strings.ToLower(iafc.ae.Name) != "max" { + if !strings.EqualFold(iafc.ae.Name, "max") { qt.Printf("do not apply instant rollup optimization for non-max incremental aggregate %s()", iafc.ae.Name) return evalAt(qt, timestamp, window) } @@ -1276,7 +1276,7 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string, return tss, nil case "min_over_time": if iafc != nil { - if strings.ToLower(iafc.ae.Name) != "min" { + if !strings.EqualFold(iafc.ae.Name, "min") { qt.Printf("do not apply instant rollup optimization for non-min incremental aggregate %s()", iafc.ae.Name) return evalAt(qt, timestamp, window) } @@ -1345,7 +1345,7 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string, "increase", "increase_pure", "sum_over_time": - if iafc != nil && strings.ToLower(iafc.ae.Name) != "sum" { + if iafc != nil && !strings.EqualFold(iafc.ae.Name, "sum") { qt.Printf("do not apply instant rollup optimization for non-sum incremental aggregate %s()", iafc.ae.Name) return evalAt(qt, timestamp, window) } diff --git a/app/vmselect/promql/exec.go b/app/vmselect/promql/exec.go index 81ae717b7..c62170a75 100644 --- a/app/vmselect/promql/exec.go +++ b/app/vmselect/promql/exec.go @@ -122,7 +122,7 @@ func maySortResults(e metricsql.Expr) bool { return false } case *metricsql.BinaryOpExpr: - if strings.ToLower(v.Op) == "or" { + if strings.EqualFold(v.Op, "or") { // Do not sort results for `a or b` in the same way as Prometheus does. // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4763 return false diff --git a/lib/logstorage/filters.go b/lib/logstorage/filters.go index 48b8c0fec..6c17d9cfe 100644 --- a/lib/logstorage/filters.go +++ b/lib/logstorage/filters.go @@ -13,6 +13,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil" ) func getFilterBitmap(bitsLen int) *filterBitmap { @@ -1276,6 +1277,9 @@ type anyCasePrefixFilter struct { fieldName string prefix string + prefixLowercaseOnce sync.Once + prefixLowercase string + tokensOnce sync.Once tokens []string } @@ -1296,9 +1300,18 @@ func (pf *anyCasePrefixFilter) initTokens() { pf.tokens = getTokensSkipLast(pf.prefix) } +func (pf *anyCasePrefixFilter) getPrefixLowercase() string { + pf.prefixLowercaseOnce.Do(pf.initPrefixLowercase) + return pf.prefixLowercase +} + +func (pf *anyCasePrefixFilter) initPrefixLowercase() { + pf.prefixLowercase = strings.ToLower(pf.prefix) +} + func (pf *anyCasePrefixFilter) apply(bs *blockSearch, bm *filterBitmap) { fieldName := pf.fieldName - prefixLowercase := strings.ToLower(pf.prefix) + prefixLowercase := pf.getPrefixLowercase() // Verify whether pf matches const column v := bs.csh.getConstColumnValue(fieldName) @@ -1427,6 +1440,9 @@ type anyCasePhraseFilter struct { fieldName string phrase string + phraseLowercaseOnce sync.Once + phraseLowercase string + tokensOnce sync.Once tokens []string } @@ -1444,9 +1460,18 @@ func (pf *anyCasePhraseFilter) initTokens() { pf.tokens = tokenizeStrings(nil, []string{pf.phrase}) } +func (pf *anyCasePhraseFilter) getPhraseLowercase() string { + pf.phraseLowercaseOnce.Do(pf.initPhraseLowercase) + return pf.phraseLowercase +} + +func (pf *anyCasePhraseFilter) initPhraseLowercase() { + pf.phraseLowercase = strings.ToLower(pf.phrase) +} + func (pf *anyCasePhraseFilter) apply(bs *blockSearch, bm *filterBitmap) { fieldName := pf.fieldName - phraseLowercase := strings.ToLower(pf.phrase) + phraseLowercase := pf.getPhraseLowercase() // Verify whether pf matches const column v := bs.csh.getConstColumnValue(fieldName) @@ -2787,15 +2812,52 @@ func visitValues(bs *blockSearch, ch *columnHeader, bm *filterBitmap, f func(val } func matchAnyCasePrefix(s, prefixLowercase string) bool { - sLowercase := strings.ToLower(s) - return matchPrefix(sLowercase, prefixLowercase) + if len(prefixLowercase) == 0 { + // Special case - empty prefix matches any non-empty string. + return len(s) > 0 + } + if len(prefixLowercase) > len(s) { + return false + } + + if isASCIILowercase(s) { + // Fast path - s is in lowercase + return matchPrefix(s, prefixLowercase) + } + + // Slow path - convert s to lowercase before matching + bb := bbPool.Get() + bb.B = stringsutil.AppendLowercase(bb.B, s) + sLowercase := bytesutil.ToUnsafeString(bb.B) + ok := matchPrefix(sLowercase, prefixLowercase) + bbPool.Put(bb) + + return ok +} + +func isASCIILowercase(s string) bool { + for i := 0; i < len(s); i++ { + c := s[i] + if c >= utf8.RuneSelf || (c >= 'A' && c <= 'Z') { + return false + } + } + return true } func matchPrefix(s, prefix string) bool { if len(prefix) == 0 { + // Special case - empty prefix matches any string. return len(s) > 0 } - r, _ := utf8.DecodeRuneInString(prefix) + if len(prefix) > len(s) { + return false + } + + r := rune(prefix[0]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeRuneInString(prefix) + } startsWithToken := isTokenRune(r) offset := 0 for { @@ -2806,7 +2868,10 @@ func matchPrefix(s, prefix string) bool { offset += n // Make sure that the found phrase contains non-token chars at the beginning if startsWithToken && offset > 0 { - r, _ := utf8.DecodeLastRuneInString(s[:offset]) + r := rune(s[offset-1]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeLastRuneInString(s[:offset]) + } if r == utf8.RuneError || isTokenRune(r) { offset++ continue @@ -2853,8 +2918,27 @@ func matchSequence(s string, phrases []string) bool { } func matchAnyCasePhrase(s, phraseLowercase string) bool { - sLowercase := strings.ToLower(s) - return matchPhrase(sLowercase, phraseLowercase) + if len(phraseLowercase) == 0 { + // Special case - empty phrase matches only empty string. + return len(s) == 0 + } + if len(phraseLowercase) > len(s) { + return false + } + + if isASCIILowercase(s) { + // Fast path - s is in lowercase + return matchPhrase(s, phraseLowercase) + } + + // Slow path - convert s to lowercase before matching + bb := bbPool.Get() + bb.B = stringsutil.AppendLowercase(bb.B, s) + sLowercase := bytesutil.ToUnsafeString(bb.B) + ok := matchPhrase(sLowercase, phraseLowercase) + bbPool.Put(bb) + + return ok } func matchExactPrefix(s, prefix string) bool { @@ -2863,6 +2947,7 @@ func matchExactPrefix(s, prefix string) bool { func matchPhrase(s, phrase string) bool { if len(phrase) == 0 { + // Special case - empty phrase matches only empty string. return len(s) == 0 } n := getPhrasePos(s, phrase) @@ -2870,10 +2955,25 @@ func matchPhrase(s, phrase string) bool { } func getPhrasePos(s, phrase string) int { - r, _ := utf8.DecodeRuneInString(phrase) + if len(phrase) == 0 { + return 0 + } + if len(phrase) > len(s) { + return -1 + } + + r := rune(phrase[0]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeRuneInString(phrase) + } startsWithToken := isTokenRune(r) - r, _ = utf8.DecodeLastRuneInString(phrase) + + r = rune(phrase[len(phrase)-1]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeLastRuneInString(phrase) + } endsWithToken := isTokenRune(r) + pos := 0 for { n := strings.Index(s[pos:], phrase) @@ -2883,14 +2983,20 @@ func getPhrasePos(s, phrase string) int { pos += n // Make sure that the found phrase contains non-token chars at the beginning and at the end if startsWithToken && pos > 0 { - r, _ := utf8.DecodeLastRuneInString(s[:pos]) + r := rune(s[pos-1]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeLastRuneInString(s[:pos]) + } if r == utf8.RuneError || isTokenRune(r) { pos++ continue } } if endsWithToken && pos+len(phrase) < len(s) { - r, _ := utf8.DecodeRuneInString(s[pos+len(phrase):]) + r := rune(s[pos+len(phrase)]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeRuneInString(s[pos+len(phrase):]) + } if r == utf8.RuneError || isTokenRune(r) { pos++ continue diff --git a/lib/logstorage/filters_test.go b/lib/logstorage/filters_test.go index a565e3838..e06093683 100644 --- a/lib/logstorage/filters_test.go +++ b/lib/logstorage/filters_test.go @@ -10,6 +10,74 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" ) +func TestMatchAnyCasePrefix(t *testing.T) { + f := func(s, prefixLowercase string, resultExpected bool) { + t.Helper() + result := matchAnyCasePrefix(s, prefixLowercase) + if result != resultExpected { + t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) + } + } + + // empty prefix matches non-empty strings + f("", "", false) + f("foo", "", true) + f("тест", "", true) + + // empty string doesn't match non-empty prefix + f("", "foo", false) + f("", "тест", false) + + // full match + f("foo", "foo", true) + f("FOo", "foo", true) + f("Test ТЕСт 123", "test тест 123", true) + + // prefix match + f("foo", "f", true) + f("foo тест bar", "те", true) + f("foo ТЕСТ bar", "те", true) + + // mismatch + f("foo", "o", false) + f("тест", "foo", false) + f("Тест", "ест", false) +} + +func TestMatchAnyCasePhrase(t *testing.T) { + f := func(s, phraseLowercase string, resultExpected bool) { + t.Helper() + result := matchAnyCasePhrase(s, phraseLowercase) + if result != resultExpected { + t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) + } + } + + // empty phrase matches only empty string + f("", "", true) + f("foo", "", false) + f("тест", "", false) + + // empty string doesn't match non-empty phrase + f("", "foo", false) + f("", "тест", false) + + // full match + f("foo", "foo", true) + f("FOo", "foo", true) + f("Test ТЕСт 123", "test тест 123", true) + + // phrase match + f("a foo", "foo", true) + f("foo тест bar", "тест", true) + f("foo ТЕСТ bar", "тест bar", true) + + // mismatch + f("foo", "fo", false) + f("тест", "foo", false) + f("Тест", "ест", false) +} + func TestMatchLenRange(t *testing.T) { f := func(s string, minLen, maxLen uint64, resultExpected bool) { t.Helper() diff --git a/lib/logstorage/filters_timing_test.go b/lib/logstorage/filters_timing_test.go new file mode 100644 index 000000000..19502248d --- /dev/null +++ b/lib/logstorage/filters_timing_test.go @@ -0,0 +1,128 @@ +package logstorage + +import ( + "fmt" + "testing" +) + +func BenchmarkMatchAnyCasePrefix(b *testing.B) { + b.Run("match-ascii-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "err", []string{"error here", "another error here", "foo bar baz error"}, true) + }) + b.Run("match-ascii-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "err", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, true) + }) + b.Run("match-unicode-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "err", []string{"error здесь", "another error здесь", "тест bar baz error"}, true) + }) + b.Run("match-unicode-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "err", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, true) + }) + + b.Run("mismatch-partial-ascii-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "rror", []string{"error here", "another error here", "foo bar baz error"}, false) + }) + b.Run("mismatch-partial-ascii-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "rror", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, false) + }) + b.Run("mismatch-partial-unicode-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "rror", []string{"error здесь", "another error здесь", "тест bar baz error"}, false) + }) + b.Run("mismatch-partial-unicode-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "rror", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, false) + }) + + b.Run("mismatch-full-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "warning", []string{"error here", "another error here", "foo bar baz error"}, false) + }) + b.Run("mismatch-full-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "warning", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, false) + }) + b.Run("mismatch-full-unicode-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "warning", []string{"error здесь", "another error здесь", "тест bar baz error"}, false) + }) + b.Run("mismatch-full-unicode-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePrefix(b, "warning", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, false) + }) +} + +func benchmarkMatchAnyCasePrefix(b *testing.B, phraseLowercase string, a []string, resultExpected bool) { + n := 0 + for _, s := range a { + n += len(s) + } + + b.ReportAllocs() + b.SetBytes(int64(n)) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + for _, s := range a { + result := matchAnyCasePrefix(s, phraseLowercase) + if result != resultExpected { + panic(fmt.Errorf("unexpected result for matchAnyCasePrefix(%q, %q); got %v; want %v", s, phraseLowercase, result, resultExpected)) + } + } + } + }) +} + +func BenchmarkMatchAnyCasePhrase(b *testing.B) { + b.Run("match-ascii-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "error", []string{"error here", "another error here", "foo bar baz error"}, true) + }) + b.Run("match-ascii-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "error", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, true) + }) + b.Run("match-unicode-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "error", []string{"error здесь", "another error здесь", "тест bar baz error"}, true) + }) + b.Run("match-unicode-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "error", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, true) + }) + + b.Run("mismatch-partial-ascii-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "rror", []string{"error here", "another error here", "foo bar baz error"}, false) + }) + b.Run("mismatch-partial-ascii-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "rror", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, false) + }) + b.Run("mismatch-partial-unicode-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "rror", []string{"error здесь", "another error здесь", "тест bar baz error"}, false) + }) + b.Run("mismatch-partial-unicode-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "rror", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, false) + }) + + b.Run("mismatch-full-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "warning", []string{"error here", "another error here", "foo bar baz error"}, false) + }) + b.Run("mismatch-full-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "warning", []string{"Error here", "another eRROr here", "foo BAR Baz error"}, false) + }) + b.Run("mismatch-full-unicode-lowercase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "warning", []string{"error здесь", "another error здесь", "тест bar baz error"}, false) + }) + b.Run("mismatch-full-unicode-mixcase", func(b *testing.B) { + benchmarkMatchAnyCasePhrase(b, "warning", []string{"error Здесь", "another Error здесь", "тEст bar baz ErRor"}, false) + }) +} + +func benchmarkMatchAnyCasePhrase(b *testing.B, phraseLowercase string, a []string, resultExpected bool) { + n := 0 + for _, s := range a { + n += len(s) + } + + b.ReportAllocs() + b.SetBytes(int64(n)) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + for _, s := range a { + result := matchAnyCasePhrase(s, phraseLowercase) + if result != resultExpected { + panic(fmt.Errorf("unexpected result for matchAnyCasePhrase(%q, %q); got %v; want %v", s, phraseLowercase, result, resultExpected)) + } + } + } + }) +} diff --git a/lib/stringsutil/stringsutil.go b/lib/stringsutil/stringsutil.go index 034612292..faa5126b9 100644 --- a/lib/stringsutil/stringsutil.go +++ b/lib/stringsutil/stringsutil.go @@ -1,5 +1,10 @@ package stringsutil +import ( + "unicode" + "unicode/utf8" +) + // LimitStringLen limits the length of s with maxLen. // // If len(s) > maxLen, then s is replaced with "s_prefix..s_suffix", @@ -14,3 +19,33 @@ func LimitStringLen(s string, maxLen int) string { n := (maxLen / 2) - 1 return s[:n] + ".." + s[len(s)-n:] } + +// AppendLowercase appends lowercase s to dst and returns the result. +// +// It is faster alternative to strings.ToLower. +func AppendLowercase(dst []byte, s string) []byte { + dstLen := len(dst) + + // Try fast path at first by assuming that s contains only ASCII chars. + hasUnicodeChars := false + for i := 0; i < len(s); i++ { + c := s[i] + if c >= utf8.RuneSelf { + hasUnicodeChars = true + break + } + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + dst = append(dst, c) + } + if hasUnicodeChars { + // Slow path - s contains non-ASCII chars. Use Unicode encoding. + dst = dst[:dstLen] + for _, r := range s { + r = unicode.ToLower(r) + dst = utf8.AppendRune(dst, r) + } + } + return dst +} diff --git a/lib/stringsutil/stringsutil_test.go b/lib/stringsutil/stringsutil_test.go index bda93fe4f..c0809197c 100644 --- a/lib/stringsutil/stringsutil_test.go +++ b/lib/stringsutil/stringsutil_test.go @@ -22,3 +22,19 @@ func TestLimitStringLen(t *testing.T) { f("abcde", 4, "a..e") f("abcde", 5, "abcde") } + +func TestAppendLowercase(t *testing.T) { + f := func(s, resultExpected string) { + t.Helper() + + result := AppendLowercase(nil, s) + if string(result) != resultExpected { + t.Fatalf("unexpected result; got %q; want %q", result, resultExpected) + } + } + + f("", "") + f("foo", "foo") + f("FOO", "foo") + f("foo БаР baz 123", "foo бар baz 123") +} diff --git a/lib/stringsutil/stringsutil_timing_test.go b/lib/stringsutil/stringsutil_timing_test.go new file mode 100644 index 000000000..c2eddc454 --- /dev/null +++ b/lib/stringsutil/stringsutil_timing_test.go @@ -0,0 +1,95 @@ +package stringsutil + +import ( + "strings" + "sync/atomic" + "testing" +) + +func BenchmarkAppendLowercase(b *testing.B) { + b.Run("ascii-all-lowercase", func(b *testing.B) { + benchmarkAppendLowercase(b, []string{"foo bar baz abc def", "23k umlkds", "lq, poweri2349)"}) + }) + b.Run("ascii-some-uppercase", func(b *testing.B) { + benchmarkAppendLowercase(b, []string{"Foo Bar baz ABC def", "23k umlKDs", "lq, Poweri2349)"}) + }) + b.Run("ascii-all-uppercase", func(b *testing.B) { + benchmarkAppendLowercase(b, []string{"FOO BAR BAZ ABC DEF", "23K UMLKDS", "LQ, POWERI2349)"}) + }) + b.Run("unicode-all-lowercase", func(b *testing.B) { + benchmarkAppendLowercase(b, []string{"хщцукодл длобючф дл", "23и юбывлц", "лф, длощшу2349)"}) + }) + b.Run("unicode-some-uppercase", func(b *testing.B) { + benchmarkAppendLowercase(b, []string{"Хщцукодл Длобючф ДЛ", "23и юбыВЛц", "лф, Длощшу2349)"}) + }) + b.Run("unicode-all-uppercase", func(b *testing.B) { + benchmarkAppendLowercase(b, []string{"ХЩЦУКОДЛ ДЛОБЮЧФ ДЛ", "23И ЮБЫВЛЦ", "ЛФ, ДЛОЩШУ2349)"}) + }) +} + +func benchmarkAppendLowercase(b *testing.B, a []string) { + n := 0 + for _, s := range a { + n += len(s) + } + + b.ReportAllocs() + b.SetBytes(int64(n)) + b.RunParallel(func(pb *testing.PB) { + var buf []byte + var n uint64 + for pb.Next() { + buf = buf[:0] + for _, s := range a { + buf = AppendLowercase(buf, s) + } + n += uint64(len(buf)) + } + GlobalSink.Add(n) + }) +} + +func BenchmarkStringsToLower(b *testing.B) { + b.Run("ascii-all-lowercase", func(b *testing.B) { + benchmarkStringsToLower(b, []string{"foo bar baz abc def", "23k umlkds", "lq, poweri2349)"}) + }) + b.Run("ascii-some-uppercase", func(b *testing.B) { + benchmarkStringsToLower(b, []string{"Foo Bar baz ABC def", "23k umlKDs", "lq, Poweri2349)"}) + }) + b.Run("ascii-all-uppercase", func(b *testing.B) { + benchmarkStringsToLower(b, []string{"FOO BAR BAZ ABC DEF", "23K UMLKDS", "LQ, POWERI2349)"}) + }) + b.Run("unicode-all-lowercase", func(b *testing.B) { + benchmarkStringsToLower(b, []string{"хщцукодл длобючф дл", "23и юбывлц", "лф, длощшу2349)"}) + }) + b.Run("unicode-some-uppercase", func(b *testing.B) { + benchmarkStringsToLower(b, []string{"Хщцукодл Длобючф ДЛ", "23и юбыВЛц", "лф, Длощшу2349)"}) + }) + b.Run("unicode-all-uppercase", func(b *testing.B) { + benchmarkStringsToLower(b, []string{"ХЩЦУКОДЛ ДЛОБЮЧФ ДЛ", "23И ЮБЫВЛЦ", "ЛФ, ДЛОЩШУ2349)"}) + }) +} + +func benchmarkStringsToLower(b *testing.B, a []string) { + n := 0 + for _, s := range a { + n += len(s) + } + + b.ReportAllocs() + b.SetBytes(int64(n)) + b.RunParallel(func(pb *testing.PB) { + var buf []byte + var n uint64 + for pb.Next() { + for _, s := range a { + sLower := strings.ToLower(s) + buf = append(buf, sLower...) + } + n += uint64(len(buf)) + } + GlobalSink.Add(n) + }) +} + +var GlobalSink atomic.Uint64