lib/promrelabel: use regexutil.PromRegex for regex matching in actions labeldrop,labelkeep,drop and keep

This makes possible optimizing additional cases inside regexutil.PromRegex
This commit is contained in:
Aliaksandr Valialkin 2022-08-26 15:23:41 +03:00
parent 7afe8450fc
commit 4c6916f32a
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
7 changed files with 85 additions and 93 deletions

View file

@ -8,6 +8,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
"gopkg.in/yaml.v2"
)
@ -189,6 +190,13 @@ func ParseRelabelConfigs(rcs []RelabelConfig, relabelDebug bool) (*ParsedConfigs
var (
defaultOriginalRegexForRelabelConfig = regexp.MustCompile(".*")
defaultRegexForRelabelConfig = regexp.MustCompile("^(.*)$")
defaultPromRegex = func() *regexutil.PromRegex {
pr, err := regexutil.NewPromRegex(".*")
if err != nil {
panic(fmt.Errorf("BUG: unexpected error: %s", err))
}
return pr
}()
)
func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
@ -202,9 +210,9 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
action = "replace"
}
targetLabel := rc.TargetLabel
regexCompiled := defaultRegexForRelabelConfig
regexAnchored := defaultRegexForRelabelConfig
regexOriginalCompiled := defaultOriginalRegexForRelabelConfig
var regexOrValues []string
promRegex := defaultPromRegex
if rc.Regex != nil && !isDefaultRegex(rc.Regex.S) {
regex := rc.Regex.S
regexOrig := regex
@ -216,13 +224,16 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
if err != nil {
return nil, fmt.Errorf("cannot parse `regex` %q: %w", regex, err)
}
regexCompiled = re
regexAnchored = re
reOriginal, err := regexp.Compile(regexOrig)
if err != nil {
return nil, fmt.Errorf("cannot parse `regex` %q: %w", regexOrig, err)
}
regexOriginalCompiled = reOriginal
regexOrValues = regexutil.GetOrValues(regexOrig)
promRegex, err = regexutil.NewPromRegex(regexOrig)
if err != nil {
logger.Panicf("BUG: cannot parse already parsed regex %q: %s", regexOrig, err)
}
}
modulus := rc.Modulus
replacement := "$1"
@ -335,20 +346,20 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
}
}
return &parsedRelabelConfig{
SourceLabels: sourceLabels,
Separator: separator,
TargetLabel: targetLabel,
Regex: regexCompiled,
Modulus: modulus,
Replacement: replacement,
Action: action,
If: rc.If,
SourceLabels: sourceLabels,
Separator: separator,
TargetLabel: targetLabel,
RegexAnchored: regexAnchored,
Modulus: modulus,
Replacement: replacement,
Action: action,
If: rc.If,
graphiteMatchTemplate: graphiteMatchTemplate,
graphiteLabelRules: graphiteLabelRules,
regex: promRegex,
regexOriginal: regexOriginalCompiled,
regexOrValues: regexOrValues,
hasCaptureGroupInTargetLabel: strings.Contains(targetLabel, "$"),
hasCaptureGroupInReplacement: strings.Contains(replacement, "$"),

View file

@ -126,7 +126,7 @@ func TestParsedConfigsString(t *testing.T) {
TargetLabel: "foo",
SourceLabels: []string{"aaa"},
},
}, "[SourceLabels=[aaa], Separator=;, TargetLabel=foo, Regex=^(.*)$, Modulus=0, Replacement=$1, Action=replace, If=, "+
}, "[SourceLabels=[aaa], Separator=;, TargetLabel=foo, Regex=.*, Modulus=0, Replacement=$1, Action=replace, If=, "+
"graphiteMatchTemplate=<nil>, graphiteLabelRules=[]], relabelDebug=false")
var ie IfExpression
if err := ie.Parse("{foo=~'bar'}"); err != nil {
@ -141,7 +141,7 @@ func TestParsedConfigsString(t *testing.T) {
},
If: &ie,
},
}, "[SourceLabels=[], Separator=;, TargetLabel=, Regex=^(.*)$, Modulus=0, Replacement=$1, Action=graphite, If={foo=~'bar'}, "+
}, "[SourceLabels=[], Separator=;, TargetLabel=, Regex=.*, Modulus=0, Replacement=$1, Action=graphite, If={foo=~'bar'}, "+
"graphiteMatchTemplate=foo.*.bar, graphiteLabelRules=[replaceTemplate=$1-zz, targetLabel=job]], relabelDebug=false")
f([]RelabelConfig{
{
@ -150,7 +150,7 @@ func TestParsedConfigsString(t *testing.T) {
TargetLabel: "x",
If: &ie,
},
}, "[SourceLabels=[foo bar], Separator=;, TargetLabel=x, Regex=^(.*)$, Modulus=0, Replacement=$1, Action=replace, If={foo=~'bar'}, "+
}, "[SourceLabels=[foo bar], Separator=;, TargetLabel=x, Regex=.*, Modulus=0, Replacement=$1, Action=replace, If={foo=~'bar'}, "+
"graphiteMatchTemplate=<nil>, graphiteLabelRules=[]], relabelDebug=false")
}
@ -174,13 +174,14 @@ func TestParseRelabelConfigsSuccess(t *testing.T) {
}, &ParsedConfigs{
prcs: []*parsedRelabelConfig{
{
SourceLabels: []string{"foo", "bar"},
Separator: ";",
TargetLabel: "xxx",
Regex: defaultRegexForRelabelConfig,
Replacement: "$1",
Action: "replace",
SourceLabels: []string{"foo", "bar"},
Separator: ";",
TargetLabel: "xxx",
RegexAnchored: defaultRegexForRelabelConfig,
Replacement: "$1",
Action: "replace",
regex: defaultPromRegex,
regexOriginal: defaultOriginalRegexForRelabelConfig,
hasCaptureGroupInReplacement: true,
},

View file

@ -9,6 +9,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
"github.com/cespare/xxhash/v2"
)
@ -16,20 +17,20 @@ import (
//
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
type parsedRelabelConfig struct {
SourceLabels []string
Separator string
TargetLabel string
Regex *regexp.Regexp
Modulus uint64
Replacement string
Action string
If *IfExpression
SourceLabels []string
Separator string
TargetLabel string
RegexAnchored *regexp.Regexp
Modulus uint64
Replacement string
Action string
If *IfExpression
graphiteMatchTemplate *graphiteMatchTemplate
graphiteLabelRules []graphiteLabelRule
regex *regexutil.PromRegex
regexOriginal *regexp.Regexp
regexOrValues []string
hasCaptureGroupInTargetLabel bool
hasCaptureGroupInReplacement bool
@ -39,7 +40,8 @@ type parsedRelabelConfig struct {
// String returns human-readable representation for prc.
func (prc *parsedRelabelConfig) String() string {
return fmt.Sprintf("SourceLabels=%s, Separator=%s, TargetLabel=%s, Regex=%s, Modulus=%d, Replacement=%s, Action=%s, If=%s, graphiteMatchTemplate=%s, graphiteLabelRules=%s",
prc.SourceLabels, prc.Separator, prc.TargetLabel, prc.Regex, prc.Modulus, prc.Replacement, prc.Action, prc.If, prc.graphiteMatchTemplate, prc.graphiteLabelRules)
prc.SourceLabels, prc.Separator, prc.TargetLabel, prc.regexOriginal, prc.Modulus, prc.Replacement,
prc.Action, prc.If, prc.graphiteMatchTemplate, prc.graphiteLabelRules)
}
// Apply applies pcs to labels starting from the labelsOffset.
@ -183,7 +185,7 @@ func (prc *parsedRelabelConfig) apply(labels []prompbmarshal.Label, labelsOffset
replacement = string(bb.B)
}
bb.B = concatLabelValues(bb.B[:0], src, prc.SourceLabels, prc.Separator)
if prc.Regex == defaultRegexForRelabelConfig && !prc.hasCaptureGroupInTargetLabel {
if prc.RegexAnchored == defaultRegexForRelabelConfig && !prc.hasCaptureGroupInTargetLabel {
if replacement == "$1" {
// Fast path for the rule that copies source label values to destination:
// - source_labels: [...]
@ -201,7 +203,7 @@ func (prc *parsedRelabelConfig) apply(labels []prompbmarshal.Label, labelsOffset
return labels
}
}
match := prc.Regex.FindSubmatchIndex(bb.B)
match := prc.RegexAnchored.FindSubmatchIndex(bb.B)
if match == nil {
// Fast path - nothing to replace.
relabelBufPool.Put(bb)
@ -253,7 +255,7 @@ func (prc *parsedRelabelConfig) apply(labels []prompbmarshal.Label, labelsOffset
return labels
case "keep":
// Keep the target if `source_labels` joined with `separator` match the `regex`.
if prc.Regex == defaultRegexForRelabelConfig {
if prc.RegexAnchored == defaultRegexForRelabelConfig {
// Fast path for the case with `if` and without explicitly set `regex`:
//
// - action: keep
@ -263,7 +265,7 @@ func (prc *parsedRelabelConfig) apply(labels []prompbmarshal.Label, labelsOffset
}
bb := relabelBufPool.Get()
bb.B = concatLabelValues(bb.B[:0], src, prc.SourceLabels, prc.Separator)
keep := prc.matchString(bytesutil.ToUnsafeString(bb.B))
keep := prc.regex.MatchString(bytesutil.ToUnsafeString(bb.B))
relabelBufPool.Put(bb)
if !keep {
return labels[:labelsOffset]
@ -271,7 +273,7 @@ func (prc *parsedRelabelConfig) apply(labels []prompbmarshal.Label, labelsOffset
return labels
case "drop":
// Drop the target if `source_labels` joined with `separator` don't match the `regex`.
if prc.Regex == defaultRegexForRelabelConfig {
if prc.RegexAnchored == defaultRegexForRelabelConfig {
// Fast path for the case with `if` and without explicitly set `regex`:
//
// - action: drop
@ -281,7 +283,7 @@ func (prc *parsedRelabelConfig) apply(labels []prompbmarshal.Label, labelsOffset
}
bb := relabelBufPool.Get()
bb.B = concatLabelValues(bb.B[:0], src, prc.SourceLabels, prc.Separator)
drop := prc.matchString(bytesutil.ToUnsafeString(bb.B))
drop := prc.regex.MatchString(bytesutil.ToUnsafeString(bb.B))
relabelBufPool.Put(bb)
if drop {
return labels[:labelsOffset]
@ -317,7 +319,7 @@ func (prc *parsedRelabelConfig) apply(labels []prompbmarshal.Label, labelsOffset
dst := labels[:labelsOffset]
for i := range src {
label := &src[i]
if !prc.matchString(label.Name) {
if !prc.regex.MatchString(label.Name) {
dst = append(dst, *label)
}
}
@ -327,7 +329,7 @@ func (prc *parsedRelabelConfig) apply(labels []prompbmarshal.Label, labelsOffset
dst := labels[:labelsOffset]
for i := range src {
label := &src[i]
if prc.matchString(label.Name) {
if prc.regex.MatchString(label.Name) {
dst = append(dst, *label)
}
}
@ -387,12 +389,12 @@ func (prc *parsedRelabelConfig) replaceFullString(s, replacement string, hasCapt
}
}
// Slow path - regexp processing
match := prc.Regex.FindStringSubmatchIndex(s)
match := prc.RegexAnchored.FindStringSubmatchIndex(s)
if match == nil {
return s, false
}
bb := relabelBufPool.Get()
bb.B = prc.Regex.ExpandString(bb.B[:0], replacement, s, match)
bb.B = prc.RegexAnchored.ExpandString(bb.B[:0], replacement, s, match)
result := string(bb.B)
relabelBufPool.Put(bb)
return result, true
@ -413,39 +415,9 @@ func (prc *parsedRelabelConfig) replaceStringSubmatches(s, replacement string, h
return re.ReplaceAllString(s, replacement), true
}
func (prc *parsedRelabelConfig) matchString(s string) bool {
if len(prc.regexOrValues) > 0 {
for _, orValue := range prc.regexOrValues {
if s == orValue {
return true
}
}
return false
}
prefix, complete := prc.regexOriginal.LiteralPrefix()
if complete {
return prefix == s
}
if !strings.HasPrefix(s, prefix) {
return false
}
reStr := prc.regexOriginal.String()
if strings.HasPrefix(reStr, prefix) {
// Fast path for `foo.*` and `bar.+` regexps
reSuffix := reStr[len(prefix):]
switch reSuffix {
case ".+", "(.+)":
return len(s) > len(prefix)
case ".*", "(.*)":
return true
}
}
return prc.Regex.MatchString(s)
}
func (prc *parsedRelabelConfig) expandCaptureGroups(template, source string, match []int) string {
bb := relabelBufPool.Get()
bb.B = prc.Regex.ExpandString(bb.B[:0], template, source, match)
bb.B = prc.RegexAnchored.ExpandString(bb.B[:0], template, source, match)
s := string(bb.B)
relabelBufPool.Put(bb)
return s

View file

@ -728,12 +728,12 @@ func TestFillLabelReferences(t *testing.T) {
f(`{{bar}}-aa{{__name__}}.{{bar}}{{non-existing-label}}`, `foo{bar="baz"}`, `baz-aafoo.baz`)
}
func TestRegexpMatchStringSuccess(t *testing.T) {
func TestRegexMatchStringSuccess(t *testing.T) {
f := func(pattern, s string) {
t.Helper()
prc := newTestRegexRelabelConfig(pattern)
if !prc.matchString(s) {
t.Fatalf("unexpected matchString(%q) result; got false; want true", s)
if !prc.regex.MatchString(s) {
t.Fatalf("unexpected MatchString(%q) result; got false; want true", s)
}
}
f("", "")
@ -753,8 +753,8 @@ func TestRegexpMatchStringFailure(t *testing.T) {
f := func(pattern, s string) {
t.Helper()
prc := newTestRegexRelabelConfig(pattern)
if prc.matchString(s) {
t.Fatalf("unexpected matchString(%q) result; got true; want false", s)
if prc.regex.MatchString(s) {
t.Fatalf("unexpected MatchString(%q) result; got true; want false", s)
}
}
f("", "foo")

View file

@ -16,7 +16,7 @@ func BenchmarkMatchRegexPrefixDotPlusMatchOptimized(b *testing.B) {
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !prc.matchString(s) {
if !prc.regex.MatchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
@ -46,7 +46,7 @@ func BenchmarkMatchRegexPrefixDotPlusMismatchOptimized(b *testing.B) {
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if prc.matchString(s) {
if prc.regex.MatchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}
@ -76,7 +76,7 @@ func BenchmarkMatchRegexPrefixDotStarMatchOptimized(b *testing.B) {
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !prc.matchString(s) {
if !prc.regex.MatchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
@ -106,7 +106,7 @@ func BenchmarkMatchRegexPrefixDotStarMismatchOptimized(b *testing.B) {
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if prc.matchString(s) {
if prc.regex.MatchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}
@ -136,7 +136,7 @@ func BenchmarkMatchRegexSingleValueMatchOptimized(b *testing.B) {
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !prc.matchString(s) {
if !prc.regex.MatchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
@ -166,7 +166,7 @@ func BenchmarkMatchRegexSingleValueMismatchOptimized(b *testing.B) {
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if prc.matchString(s) {
if prc.regex.MatchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}
@ -196,7 +196,7 @@ func BenchmarkMatchRegexOrValuesMatchOptimized(b *testing.B) {
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if !prc.matchString(s) {
if !prc.regex.MatchString(s) {
panic(fmt.Errorf("unexpected string mismatch for pattern=%q, s=%q", pattern, s))
}
}
@ -226,7 +226,7 @@ func BenchmarkMatchRegexOrValuesMismatchOptimized(b *testing.B) {
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if prc.matchString(s) {
if prc.regex.MatchString(s) {
panic(fmt.Errorf("unexpected string match for pattern=%q, s=%q", pattern, s))
}
}

View file

@ -16,7 +16,7 @@ import (
type PromRegex struct {
// prefix contains literal prefix for regex.
// For example, prefix="foo" for regex="foo(a|b)"
prefix string
prefix string
// Suffix contains regex suffix left after removing the prefix.
// For example, suffix="a|b" for regex="foo(a|b)"
@ -51,12 +51,12 @@ func NewPromRegex(expr string) (*PromRegex, error) {
suffixExpr := "^(?:" + suffix + ")$"
reSuffix := regexp.MustCompile(suffixExpr)
pr := &PromRegex{
prefix: prefix,
suffix: suffix,
prefix: prefix,
suffix: suffix,
substrDotStar: substrDotStar,
substrDotPlus: substrDotPlus,
orValues: orValues,
reSuffix: reSuffix,
orValues: orValues,
reSuffix: reSuffix,
}
return pr, nil
}
@ -87,7 +87,7 @@ func (pr *PromRegex) MatchString(s string) bool {
if pr.substrDotPlus != "" {
// Fast path - pr contains ".+someText.+"
n := strings.Index(s, pr.substrDotPlus)
return n > 0 && n + len(pr.substrDotPlus) < len(s)
return n > 0 && n+len(pr.substrDotPlus) < len(s)
}
switch pr.suffix {
case ".*":
@ -116,4 +116,3 @@ func getSubstringLiteral(expr, prefixSuffix string) string {
}
return prefix
}

View file

@ -19,6 +19,15 @@ func BenchmarkPromRegexMatchString(b *testing.B) {
b.Run("unpotimized-prefix-mismatch", func(b *testing.B) {
benchmarkPromRegexMatchString(b, "foo(bar.*|baz)", "zfoobarz", false)
})
b.Run("dot-star-match", func(b *testing.B) {
benchmarkPromRegexMatchString(b, ".*", "foo", true)
})
b.Run("dot-plus-match", func(b *testing.B) {
benchmarkPromRegexMatchString(b, ".+", "foo", true)
})
b.Run("dot-plus-mismatch", func(b *testing.B) {
benchmarkPromRegexMatchString(b, ".+", "", false)
})
b.Run("literal-match", func(b *testing.B) {
benchmarkPromRegexMatchString(b, "foo", "foo", true)
})