lib/promrelabel: optimize action: {labeldrop,labelkeep,keep,drop} with regex containing alternate values

For example, the following relabeling rule must work much faster now:

- action: labeldrop
  regex: "foo|bar|baz"
This commit is contained in:
Aliaksandr Valialkin 2022-08-24 17:54:26 +03:00
parent 0d46e24af5
commit 0d4ea03a73
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
9 changed files with 308 additions and 128 deletions

View file

@ -23,6 +23,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
* FEATURE: return shorter error messages to Grafana and to other clients requesting [/api/v1/query](https://docs.victoriametrics.com/keyConcepts.html#instant-query) and [/api/v1/query_range](https://docs.victoriametrics.com/keyConcepts.html#range-query) endpoints. This should simplify reading these errors by humans. The long error message with full context is still written to logs. * FEATURE: return shorter error messages to Grafana and to other clients requesting [/api/v1/query](https://docs.victoriametrics.com/keyConcepts.html#instant-query) and [/api/v1/query_range](https://docs.victoriametrics.com/keyConcepts.html#range-query) endpoints. This should simplify reading these errors by humans. The long error message with full context is still written to logs.
* FEATURE: add the ability to fine-tune the number of points, which can be generated per each matching time series during [subquery](https://docs.victoriametrics.com/MetricsQL.html#subqueries) evaluation. This can be done with the `-search.maxPointsSubqueryPerTimeseries` command-line flag. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2922). * FEATURE: add the ability to fine-tune the number of points, which can be generated per each matching time series during [subquery](https://docs.victoriametrics.com/MetricsQL.html#subqueries) evaluation. This can be done with the `-search.maxPointsSubqueryPerTimeseries` command-line flag. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2922).
* FEATURE: improve the performance for `action: keep`, `action: drop`, `action: labelkeep` and `action: labeldrop` relabeling rules for `regex` containing the list of matching values. For example, `regex: "foo|bar|baz"`.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add ability to accept [multitenant](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) data via OpenTSDB `/api/put` protocol at `/insert/<tenantID>/opentsdb/api/put` http endpoint if [multitenant support](https://docs.victoriametrics.com/vmagent.html#multitenancy) is enabled at `vmagent`. Thanks to @chengjianyun for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3015). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add ability to accept [multitenant](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) data via OpenTSDB `/api/put` protocol at `/insert/<tenantID>/opentsdb/api/put` http endpoint if [multitenant support](https://docs.victoriametrics.com/vmagent.html#multitenancy) is enabled at `vmagent`. Thanks to @chengjianyun for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3015).
* FEATURE: [monitoring](https://docs.victoriametrics.com/#monitoring): expose `vm_hourly_series_limit_max_series`, `vm_hourly_series_limit_current_series`, `vm_daily_series_limit_max_series` and `vm_daily_series_limit_current_series` metrics when `-search.maxHourlySeries` or `-search.maxDailySeries` limits are set. This allows alerting when the number of unique series reaches the configured limits. See [these docs](https://docs.victoriametrics.com/#cardinality-limiter) for details. * FEATURE: [monitoring](https://docs.victoriametrics.com/#monitoring): expose `vm_hourly_series_limit_max_series`, `vm_hourly_series_limit_current_series`, `vm_daily_series_limit_max_series` and `vm_daily_series_limit_current_series` metrics when `-search.maxHourlySeries` or `-search.maxDailySeries` limits are set. This allows alerting when the number of unique series reaches the configured limits. See [these docs](https://docs.victoriametrics.com/#cardinality-limiter) for details.
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): reduce the amounts of logging at `vmstorage` when `vmselect` connects/disconnects to `vmstorage`. * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): reduce the amounts of logging at `vmstorage` when `vmselect` connects/disconnects to `vmstorage`.

View file

@ -8,6 +8,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate" "github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
"gopkg.in/yaml.v2" "gopkg.in/yaml.v2"
) )
@ -199,6 +200,7 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
targetLabel := rc.TargetLabel targetLabel := rc.TargetLabel
regexCompiled := defaultRegexForRelabelConfig regexCompiled := defaultRegexForRelabelConfig
regexOriginalCompiled := defaultOriginalRegexForRelabelConfig regexOriginalCompiled := defaultOriginalRegexForRelabelConfig
var regexOrValues []string
if rc.Regex != nil { if rc.Regex != nil {
regex := rc.Regex.S regex := rc.Regex.S
regexOrig := regex regexOrig := regex
@ -215,6 +217,7 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
return nil, fmt.Errorf("cannot parse `regex` %q: %w", regexOrig, err) return nil, fmt.Errorf("cannot parse `regex` %q: %w", regexOrig, err)
} }
regexOriginalCompiled = reOriginal regexOriginalCompiled = reOriginal
regexOrValues = regexutil.GetOrValues(regexOrig)
} }
modulus := rc.Modulus modulus := rc.Modulus
replacement := "$1" replacement := "$1"
@ -344,6 +347,7 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
graphiteLabelRules: graphiteLabelRules, graphiteLabelRules: graphiteLabelRules,
regexOriginal: regexOriginalCompiled, regexOriginal: regexOriginalCompiled,
regexOrValues: regexOrValues,
hasCaptureGroupInTargetLabel: strings.Contains(targetLabel, "$"), hasCaptureGroupInTargetLabel: strings.Contains(targetLabel, "$"),
hasCaptureGroupInReplacement: strings.Contains(replacement, "$"), hasCaptureGroupInReplacement: strings.Contains(replacement, "$"),

View file

@ -29,6 +29,7 @@ type parsedRelabelConfig struct {
graphiteLabelRules []graphiteLabelRule graphiteLabelRules []graphiteLabelRule
regexOriginal *regexp.Regexp regexOriginal *regexp.Regexp
regexOrValues []string
hasCaptureGroupInTargetLabel bool hasCaptureGroupInTargetLabel bool
hasCaptureGroupInReplacement bool hasCaptureGroupInReplacement bool
@ -413,6 +414,14 @@ func (prc *parsedRelabelConfig) replaceStringSubmatches(s, replacement string, h
} }
func (prc *parsedRelabelConfig) matchString(s string) bool { func (prc *parsedRelabelConfig) matchString(s string) bool {
if len(prc.regexOrValues) > 0 {
for _, orValue := range prc.regexOrValues {
if s == orValue {
return true
}
}
return false
}
prefix, complete := prc.regexOriginal.LiteralPrefix() prefix, complete := prc.regexOriginal.LiteralPrefix()
if complete { if complete {
return prefix == s return prefix == s

View file

@ -726,3 +726,61 @@ func TestFillLabelReferences(t *testing.T) {
f(`{{bar}}-aa`, `foo{bar="baz"}`, `baz-aa`) f(`{{bar}}-aa`, `foo{bar="baz"}`, `baz-aa`)
f(`{{bar}}-aa{{__name__}}.{{bar}}{{non-existing-label}}`, `foo{bar="baz"}`, `baz-aafoo.baz`) f(`{{bar}}-aa{{__name__}}.{{bar}}{{non-existing-label}}`, `foo{bar="baz"}`, `baz-aafoo.baz`)
} }
func TestRegexpMatchStringSuccess(t *testing.T) {
f := func(pattern, s string) {
t.Helper()
rc := &RelabelConfig{
Action: "labeldrop",
Regex: &MultiLineRegex{
S: pattern,
},
}
prc, err := parseRelabelConfig(rc)
if err != nil {
t.Fatalf("unexpected error in parseRelabelConfig: %s", err)
}
if !prc.matchString(s) {
t.Fatalf("unexpected matchString(%q) result; got false; want true", s)
}
}
f("", "")
f("foo", "foo")
f(".*", "")
f(".*", "foo")
f("foo.*", "foobar")
f("foo.+", "foobar")
f("f.+o", "foo")
f("foo|bar", "bar")
f("^(foo|bar)$", "foo")
f("foo.+", "foobar")
f("^foo$", "foo")
}
func TestRegexpMatchStringFailure(t *testing.T) {
f := func(pattern, s string) {
t.Helper()
rc := &RelabelConfig{
Action: "labeldrop",
Regex: &MultiLineRegex{
S: pattern,
},
}
prc, err := parseRelabelConfig(rc)
if err != nil {
t.Fatalf("unexpected error in parseRelabelConfig: %s", err)
}
if prc.matchString(s) {
t.Fatalf("unexpected matchString(%q) result; got true; want false", s)
}
}
f("", "foo")
f("foo", "")
f("foo.*", "foa")
f("foo.+", "foo")
f("f.+o", "foor")
f("foo|bar", "barz")
f("^(foo|bar)$", "xfoo")
f("foo.+", "foo")
f("^foo$", "foobar")
}

View file

@ -2,11 +2,90 @@ package promrelabel
import ( import (
"fmt" "fmt"
"regexp"
"testing" "testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
) )
func BenchmarkMatchRegexOrValuesMatchOptimized(b *testing.B) {
const pattern = "foo|bar|baz|abc"
const s = "foo"
rc := &RelabelConfig{
Action: "labeldrop",
Regex: &MultiLineRegex{
S: pattern,
},
}
prc, err := parseRelabelConfig(rc)
if err != nil {
panic(fmt.Errorf("unexpected error in parseRelabelConfig: %s", err))
}
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !prc.matchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexOrValuesMismatchOptimized(b *testing.B) {
const pattern = "foo|bar|baz|abc"
const s = "qwert"
rc := &RelabelConfig{
Action: "labeldrop",
Regex: &MultiLineRegex{
S: pattern,
},
}
prc, err := parseRelabelConfig(rc)
if err != nil {
panic(fmt.Errorf("unexpected error in parseRelabelConfig: %s", err))
}
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if prc.matchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexOrValuesMatchUnoptimized(b *testing.B) {
const pattern = "foo|bar|baz|abc"
const s = "foo"
re := regexp.MustCompile(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !re.MatchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexOrValuesMismatchUnoptimized(b *testing.B) {
const pattern = "foo|bar|baz|abc"
const s = "qwert"
re := regexp.MustCompile(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if re.MatchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkApplyRelabelConfigs(b *testing.B) { func BenchmarkApplyRelabelConfigs(b *testing.B) {
b.Run("replace-label-copy", func(b *testing.B) { b.Run("replace-label-copy", func(b *testing.B) {
pcs := mustParseRelabelConfigs(` pcs := mustParseRelabelConfigs(`

107
lib/regexutil/regexutil.go Normal file
View file

@ -0,0 +1,107 @@
package regexutil
import (
"regexp/syntax"
"sort"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// GetOrValues returns "or" values from the given regexp expr.
//
// E.g. it returns ["foo", "bar"] for "foo|bar" regexp.
// It returns an empty list if it is impossible to extract "or" values from the regexp.
// It returns [""] for "" regexp.
func GetOrValues(expr string) []string {
sre, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing verified expr=%q: %s", expr, err)
}
orValues := getOrValuesExt(sre)
// Sort orValues for faster index seek later
sort.Strings(orValues)
return orValues
}
func getOrValuesExt(sre *syntax.Regexp) []string {
switch sre.Op {
case syntax.OpCapture:
return getOrValuesExt(sre.Sub[0])
case syntax.OpLiteral:
if !isLiteral(sre) {
return nil
}
return []string{string(sre.Rune)}
case syntax.OpEmptyMatch:
return []string{""}
case syntax.OpBeginText, syntax.OpEndText:
return []string{""}
case syntax.OpAlternate:
a := make([]string, 0, len(sre.Sub))
for _, reSub := range sre.Sub {
ca := getOrValuesExt(reSub)
if len(ca) == 0 {
return nil
}
a = append(a, ca...)
if len(a) > maxOrValues {
// It is cheaper to use regexp here.
return nil
}
}
return a
case syntax.OpCharClass:
a := make([]string, 0, len(sre.Rune)/2)
for i := 0; i < len(sre.Rune); i += 2 {
start := sre.Rune[i]
end := sre.Rune[i+1]
for start <= end {
a = append(a, string(start))
start++
if len(a) > maxOrValues {
// It is cheaper to use regexp here.
return nil
}
}
}
return a
case syntax.OpConcat:
if len(sre.Sub) < 1 {
return []string{""}
}
prefixes := getOrValuesExt(sre.Sub[0])
if len(prefixes) == 0 {
return nil
}
sre.Sub = sre.Sub[1:]
suffixes := getOrValuesExt(sre)
if len(suffixes) == 0 {
return nil
}
if len(prefixes)*len(suffixes) > maxOrValues {
// It is cheaper to use regexp here.
return nil
}
a := make([]string, 0, len(prefixes)*len(suffixes))
for _, prefix := range prefixes {
for _, suffix := range suffixes {
s := prefix + suffix
a = append(a, s)
}
}
return a
default:
return nil
}
}
func isLiteral(sre *syntax.Regexp) bool {
if sre.Op == syntax.OpCapture {
return isLiteral(sre.Sub[0])
}
return sre.Op == syntax.OpLiteral && sre.Flags&syntax.FoldCase == 0
}
const maxOrValues = 100

View file

@ -0,0 +1,48 @@
package regexutil
import (
"reflect"
"testing"
)
func TestGetOrValues(t *testing.T) {
f := func(s string, valuesExpected []string) {
t.Helper()
values := GetOrValues(s)
if !reflect.DeepEqual(values, valuesExpected) {
t.Fatalf("unexpected values for s=%q; got %q; want %q", s, values, valuesExpected)
}
}
f("", []string{""})
f("foo", []string{"foo"})
f("^foo$", []string{"foo"})
f("|foo", []string{"", "foo"})
f("|foo|", []string{"", "", "foo"})
f("foo.+", nil)
f("foo.*", nil)
f(".*", nil)
f("foo|.*", nil)
f("foobar", []string{"foobar"})
f("z|x|c", []string{"c", "x", "z"})
f("foo|bar", []string{"bar", "foo"})
f("(foo|bar)", []string{"bar", "foo"})
f("(foo|bar)baz", []string{"barbaz", "foobaz"})
f("[a-z][a-z]", nil)
f("[a-d]", []string{"a", "b", "c", "d"})
f("x[a-d]we", []string{"xawe", "xbwe", "xcwe", "xdwe"})
f("foo(bar|baz)", []string{"foobar", "foobaz"})
f("foo(ba[rz]|(xx|o))", []string{"foobar", "foobaz", "fooo", "fooxx"})
f("foo(?:bar|baz)x(qwe|rt)", []string{"foobarxqwe", "foobarxrt", "foobazxqwe", "foobazxrt"})
f("foo(bar||baz)", []string{"foo", "foobar", "foobaz"})
f("(a|b|c)(d|e|f|0|1|2)(g|h|k|x|y|z)", nil)
f("(?i)foo", nil)
f("(?i)(foo|bar)", nil)
f("^foo|bar$", []string{"bar", "foo"})
f("^(foo|bar)$", []string{"bar", "foo"})
f("^a(foo|b(?:a|r))$", []string{"aba", "abr", "afoo"})
// This is incorrect conversion, because the regexp matches nothing.
// It is OK for now, since such regexps are uncommon in practice.
// TODO: properly handle this case.
f("^a(^foo|bar$)z$", []string{"abarz", "afooz"})
}

View file

@ -15,6 +15,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/lrucache" "github.com/VictoriaMetrics/VictoriaMetrics/lib/lrucache"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory" "github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
) )
// convertToCompositeTagFilterss converts tfss to composite filters. // convertToCompositeTagFilterss converts tfss to composite filters.
@ -523,7 +524,7 @@ func getRegexpFromCache(expr []byte) (*regexpCacheValue, error) {
} }
sExpr := string(expr) sExpr := string(expr)
orValues := getOrValues(sExpr) orValues := regexutil.GetOrValues(sExpr)
var reMatch func(b []byte) bool var reMatch func(b []byte) bool
var reCost uint64 var reCost uint64
var literalSuffix string var literalSuffix string
@ -787,93 +788,6 @@ func isLiteral(sre *syntax.Regexp) bool {
return sre.Op == syntax.OpLiteral && sre.Flags&syntax.FoldCase == 0 return sre.Op == syntax.OpLiteral && sre.Flags&syntax.FoldCase == 0
} }
func getOrValues(expr string) []string {
sre, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing verified expr=%q: %s", expr, err)
}
orValues := getOrValuesExt(sre)
// Sort orValues for faster index seek later
sort.Strings(orValues)
return orValues
}
func getOrValuesExt(sre *syntax.Regexp) []string {
switch sre.Op {
case syntax.OpCapture:
return getOrValuesExt(sre.Sub[0])
case syntax.OpLiteral:
if !isLiteral(sre) {
return nil
}
return []string{string(sre.Rune)}
case syntax.OpEmptyMatch:
return []string{""}
case syntax.OpBeginText, syntax.OpEndText:
return []string{""}
case syntax.OpAlternate:
a := make([]string, 0, len(sre.Sub))
for _, reSub := range sre.Sub {
ca := getOrValuesExt(reSub)
if len(ca) == 0 {
return nil
}
a = append(a, ca...)
if len(a) > maxOrValues {
// It is cheaper to use regexp here.
return nil
}
}
return a
case syntax.OpCharClass:
a := make([]string, 0, len(sre.Rune)/2)
for i := 0; i < len(sre.Rune); i += 2 {
start := sre.Rune[i]
end := sre.Rune[i+1]
for start <= end {
a = append(a, string(start))
start++
if len(a) > maxOrValues {
// It is cheaper to use regexp here.
return nil
}
}
}
return a
case syntax.OpConcat:
if len(sre.Sub) < 1 {
return []string{""}
}
prefixes := getOrValuesExt(sre.Sub[0])
if len(prefixes) == 0 {
return nil
}
sre.Sub = sre.Sub[1:]
suffixes := getOrValuesExt(sre)
if len(suffixes) == 0 {
return nil
}
if len(prefixes)*len(suffixes) > maxOrValues {
// It is cheaper to use regexp here.
return nil
}
a := make([]string, 0, len(prefixes)*len(suffixes))
for _, prefix := range prefixes {
for _, suffix := range suffixes {
s := prefix + suffix
a = append(a, s)
}
}
return a
default:
return nil
}
}
const maxOrValues = 100
var tagCharsRegexpEscaper = strings.NewReplacer( var tagCharsRegexpEscaper = strings.NewReplacer(
"\\x00", "\\x000", // escapeChar "\\x00", "\\x000", // escapeChar
"\x00", "\\x000", // escapeChar "\x00", "\\x000", // escapeChar

View file

@ -1145,46 +1145,6 @@ func TestTagFilterMatchSuffix(t *testing.T) {
}) })
} }
func TestGetOrValues(t *testing.T) {
f := func(s string, valuesExpected []string) {
t.Helper()
values := getOrValues(s)
if !reflect.DeepEqual(values, valuesExpected) {
t.Fatalf("unexpected values for s=%q; got %q; want %q", s, values, valuesExpected)
}
}
f("", []string{""})
f("|foo", []string{"", "foo"})
f("|foo|", []string{"", "", "foo"})
f("foo.+", nil)
f("foo.*", nil)
f(".*", nil)
f("foo|.*", nil)
f("foobar", []string{"foobar"})
f("z|x|c", []string{"c", "x", "z"})
f("foo|bar", []string{"bar", "foo"})
f("(foo|bar)", []string{"bar", "foo"})
f("(foo|bar)baz", []string{"barbaz", "foobaz"})
f("[a-z][a-z]", nil)
f("[a-d]", []string{"a", "b", "c", "d"})
f("x[a-d]we", []string{"xawe", "xbwe", "xcwe", "xdwe"})
f("foo(bar|baz)", []string{"foobar", "foobaz"})
f("foo(ba[rz]|(xx|o))", []string{"foobar", "foobaz", "fooo", "fooxx"})
f("foo(?:bar|baz)x(qwe|rt)", []string{"foobarxqwe", "foobarxrt", "foobazxqwe", "foobazxrt"})
f("foo(bar||baz)", []string{"foo", "foobar", "foobaz"})
f("(a|b|c)(d|e|f|0|1|2)(g|h|k|x|y|z)", nil)
f("(?i)foo", nil)
f("(?i)(foo|bar)", nil)
f("^foo|bar$", []string{"bar", "foo"})
f("^(foo|bar)$", []string{"bar", "foo"})
f("^a(foo|b(?:a|r))$", []string{"aba", "abr", "afoo"})
// This is incorrect conversion, because the regexp matches nothing.
// It is OK for now, since such regexps are uncommon in practice.
// TODO: properly handle this case.
f("^a(^foo|bar$)z$", []string{"abarz", "afooz"})
}
func TestGetRegexpPrefix(t *testing.T) { func TestGetRegexpPrefix(t *testing.T) {
f := func(t *testing.T, s, expectedPrefix, expectedSuffix string) { f := func(t *testing.T, s, expectedPrefix, expectedSuffix string) {
t.Helper() t.Helper()