lib/promrelabel: optimize action: {drop,keep,labeldrop,labelkeep} with anchored regex prefix

The following commonly used relabeling rules must work faster now:

- action: labeldrop
  regex: "^foo.+$"

- action: labeldrop
  regex: "^bar.*"
This commit is contained in:
Aliaksandr Valialkin 2022-08-25 23:22:03 +03:00
parent d60654eb0a
commit 909e681024
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
4 changed files with 226 additions and 52 deletions

View file

@ -202,7 +202,7 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
regexOriginalCompiled := defaultOriginalRegexForRelabelConfig
var regexOrValues []string
if rc.Regex != nil {
regex := rc.Regex.S
regex := regexutil.RemoveStartEndAnchors(rc.Regex.S)
regexOrig := regex
if rc.Action != "replace_all" && rc.Action != "labelmap_all" {
regex = "^(?:" + regex + ")$"

View file

@ -1,6 +1,7 @@
package promrelabel
import (
"fmt"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
@ -730,16 +731,7 @@ func TestFillLabelReferences(t *testing.T) {
func TestRegexpMatchStringSuccess(t *testing.T) {
f := func(pattern, s string) {
t.Helper()
rc := &RelabelConfig{
Action: "labeldrop",
Regex: &MultiLineRegex{
S: pattern,
},
}
prc, err := parseRelabelConfig(rc)
if err != nil {
t.Fatalf("unexpected error in parseRelabelConfig: %s", err)
}
prc := newTestRegexRelabelConfig(pattern)
if !prc.matchString(s) {
t.Fatalf("unexpected matchString(%q) result; got false; want true", s)
}
@ -760,16 +752,7 @@ func TestRegexpMatchStringSuccess(t *testing.T) {
func TestRegexpMatchStringFailure(t *testing.T) {
f := func(pattern, s string) {
t.Helper()
rc := &RelabelConfig{
Action: "labeldrop",
Regex: &MultiLineRegex{
S: pattern,
},
}
prc, err := parseRelabelConfig(rc)
if err != nil {
t.Fatalf("unexpected error in parseRelabelConfig: %s", err)
}
prc := newTestRegexRelabelConfig(pattern)
if prc.matchString(s) {
t.Fatalf("unexpected matchString(%q) result; got true; want false", s)
}
@ -784,3 +767,17 @@ func TestRegexpMatchStringFailure(t *testing.T) {
f("foo.+", "foo")
f("^foo$", "foobar")
}
func newTestRegexRelabelConfig(pattern string) *parsedRelabelConfig {
rc := &RelabelConfig{
Action: "labeldrop",
Regex: &MultiLineRegex{
S: pattern,
},
}
prc, err := parseRelabelConfig(rc)
if err != nil {
panic(fmt.Errorf("unexpected error in parseRelabelConfig: %s", err))
}
return prc
}

View file

@ -8,19 +8,10 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
func BenchmarkMatchRegexOrValuesMatchOptimized(b *testing.B) {
const pattern = "foo|bar|baz|abc"
const s = "foo"
rc := &RelabelConfig{
Action: "labeldrop",
Regex: &MultiLineRegex{
S: pattern,
},
}
prc, err := parseRelabelConfig(rc)
if err != nil {
panic(fmt.Errorf("unexpected error in parseRelabelConfig: %s", err))
}
func BenchmarkMatchRegexPrefixDotPlusMatchOptimized(b *testing.B) {
const pattern = "^foo.+$"
const s = "foobar"
prc := newTestRegexRelabelConfig(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
@ -32,19 +23,25 @@ func BenchmarkMatchRegexOrValuesMatchOptimized(b *testing.B) {
})
}
func BenchmarkMatchRegexOrValuesMismatchOptimized(b *testing.B) {
const pattern = "foo|bar|baz|abc"
const s = "qwert"
rc := &RelabelConfig{
Action: "labeldrop",
Regex: &MultiLineRegex{
S: pattern,
},
}
prc, err := parseRelabelConfig(rc)
if err != nil {
panic(fmt.Errorf("unexpected error in parseRelabelConfig: %s", err))
}
func BenchmarkMatchRegexPrefixDotPlusMatchUnoptimized(b *testing.B) {
const pattern = "^foo.+$"
const s = "foobar"
re := regexp.MustCompile(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !re.MatchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexPrefixDotPlusMismatchOptimized(b *testing.B) {
const pattern = "^foo.+$"
const s = "xfoobar"
prc := newTestRegexRelabelConfig(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
@ -56,8 +53,98 @@ func BenchmarkMatchRegexOrValuesMismatchOptimized(b *testing.B) {
})
}
func BenchmarkMatchRegexOrValuesMatchUnoptimized(b *testing.B) {
const pattern = "foo|bar|baz|abc"
func BenchmarkMatchRegexPrefixDotPlusMismatchUnoptimized(b *testing.B) {
const pattern = "^foo.+$"
const s = "xfoobar"
re := regexp.MustCompile(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if re.MatchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexPrefixDotStarMatchOptimized(b *testing.B) {
const pattern = "^foo.*$"
const s = "foobar"
prc := newTestRegexRelabelConfig(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !prc.matchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexPrefixDotStarMatchUnoptimized(b *testing.B) {
const pattern = "^foo.*$"
const s = "foobar"
re := regexp.MustCompile(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !re.MatchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexPrefixDotStarMismatchOptimized(b *testing.B) {
const pattern = "^foo.*$"
const s = "xfoobar"
prc := newTestRegexRelabelConfig(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if prc.matchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexPrefixDotStarMismatchUnoptimized(b *testing.B) {
const pattern = "^foo.*$"
const s = "xfoobar"
re := regexp.MustCompile(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if re.MatchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexSingleValueMatchOptimized(b *testing.B) {
const pattern = "^foo$"
const s = "foo"
prc := newTestRegexRelabelConfig(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !prc.matchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexSingleValueMatchUnoptimized(b *testing.B) {
const pattern = "^foo$"
const s = "foo"
re := regexp.MustCompile(pattern)
b.ReportAllocs()
@ -71,8 +158,83 @@ func BenchmarkMatchRegexOrValuesMatchUnoptimized(b *testing.B) {
})
}
func BenchmarkMatchRegexSingleValueMismatchOptimized(b *testing.B) {
const pattern = "^foo$"
const s = "bar"
prc := newTestRegexRelabelConfig(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if prc.matchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexSingleValueMismatchUnoptimized(b *testing.B) {
const pattern = "^foo$"
const s = "bar"
re := regexp.MustCompile(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if re.MatchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexOrValuesMatchOptimized(b *testing.B) {
const pattern = "^(foo|bar|baz|abc)$"
const s = "foo"
prc := newTestRegexRelabelConfig(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !prc.matchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexOrValuesMatchUnoptimized(b *testing.B) {
const pattern = "^(foo|bar|baz|abc)$"
const s = "foo"
re := regexp.MustCompile(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !re.MatchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexOrValuesMismatchOptimized(b *testing.B) {
const pattern = "^(foo|bar|baz|abc)"
const s = "qwert"
prc := newTestRegexRelabelConfig(pattern)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if prc.matchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}
})
}
func BenchmarkMatchRegexOrValuesMismatchUnoptimized(b *testing.B) {
const pattern = "foo|bar|baz|abc"
const pattern = "^(foo|bar|baz|abc)$"
const s = "qwert"
re := regexp.MustCompile(pattern)
b.ReportAllocs()

View file

@ -3,16 +3,31 @@ package regexutil
import (
"regexp/syntax"
"sort"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// RemoveStartEndAnchors removes '^' at the start of expr and '$' at the end of the expr.
func RemoveStartEndAnchors(expr string) string {
for strings.HasPrefix(expr, "^") {
expr = expr[1:]
}
for strings.HasSuffix(expr, "$") {
expr = expr[:len(expr)-1]
}
return expr
}
// GetOrValues returns "or" values from the given regexp expr.
//
// E.g. it returns ["foo", "bar"] for "foo|bar" regexp.
// It returns an empty list if it is impossible to extract "or" values from the regexp.
// It ignores start and end anchors ('^') and ('$') at the start and the end of expr.
// It returns ["foo", "bar"] for "foo|bar" regexp.
// It returns ["foo"] for "foo" regexp.
// It returns [""] for "" regexp.
// It returns an empty list if it is impossible to extract "or" values from the regexp.
func GetOrValues(expr string) []string {
expr = RemoveStartEndAnchors(expr)
sre, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing verified expr=%q: %s", expr, err)