2022-08-26 11:53:02 +00:00
|
|
|
package regexutil
|
|
|
|
|
|
|
|
import (
|
|
|
|
"regexp"
|
2023-11-13 17:23:36 +00:00
|
|
|
"regexp/syntax"
|
2022-08-26 11:53:02 +00:00
|
|
|
"strings"
|
2022-09-30 07:38:44 +00:00
|
|
|
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
2022-08-26 11:53:02 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// PromRegex implements an optimized string matching for Prometheus-like regex.
|
|
|
|
//
|
|
|
|
// The following regexs are optimized:
|
|
|
|
//
|
|
|
|
// - plain string such as "foobar"
|
|
|
|
// - alternate strings such as "foo|bar|baz"
|
|
|
|
// - prefix match such as "foo.*" or "foo.+"
|
|
|
|
// - substring match such as ".*foo.*" or ".+bar.+"
|
2022-09-30 07:38:44 +00:00
|
|
|
//
|
|
|
|
// The rest of regexps are also optimized by returning cached match results for the same input strings.
|
2022-08-26 11:53:02 +00:00
|
|
|
type PromRegex struct {
|
2024-05-24 01:06:55 +00:00
|
|
|
// exprStr is the original expression.
|
|
|
|
exprStr string
|
|
|
|
|
2022-08-26 11:53:02 +00:00
|
|
|
// prefix contains literal prefix for regex.
|
|
|
|
// For example, prefix="foo" for regex="foo(a|b)"
|
2022-08-26 12:23:41 +00:00
|
|
|
prefix string
|
2022-08-26 11:53:02 +00:00
|
|
|
|
2024-05-24 01:06:55 +00:00
|
|
|
// isOnlyPrefix is set to true if the regex contains only the prefix.
|
|
|
|
isOnlyPrefix bool
|
|
|
|
|
|
|
|
// isSuffixDotStar is set to true if suffix is ".*"
|
|
|
|
isSuffixDotStar bool
|
|
|
|
|
|
|
|
// isSuffixDotPlus is set to true if suffix is ".+"
|
|
|
|
isSuffixDotPlus bool
|
2022-08-26 11:53:02 +00:00
|
|
|
|
|
|
|
// substrDotStar contains literal string for regex suffix=".*string.*"
|
|
|
|
substrDotStar string
|
|
|
|
|
|
|
|
// substrDotPlus contains literal string for regex suffix=".+string.+"
|
|
|
|
substrDotPlus string
|
|
|
|
|
|
|
|
// orValues contains or values for the suffix regex.
|
|
|
|
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
|
|
|
|
orValues []string
|
|
|
|
|
2022-09-30 07:38:44 +00:00
|
|
|
// reSuffixMatcher contains fast matcher for "^suffix$"
|
|
|
|
reSuffixMatcher *bytesutil.FastStringMatcher
|
2022-08-26 11:53:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewPromRegex returns PromRegex for the given expr.
|
|
|
|
func NewPromRegex(expr string) (*PromRegex, error) {
|
|
|
|
if _, err := regexp.Compile(expr); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-05-24 01:06:55 +00:00
|
|
|
prefix, suffix := SimplifyPromRegex(expr)
|
|
|
|
sre := mustParseRegexp(suffix)
|
|
|
|
orValues := getOrValues(sre)
|
|
|
|
isOnlyPrefix := len(orValues) == 1 && orValues[0] == ""
|
|
|
|
isSuffixDotStar := isDotOp(sre, syntax.OpStar)
|
|
|
|
isSuffixDotPlus := isDotOp(sre, syntax.OpPlus)
|
|
|
|
substrDotStar := getSubstringLiteral(sre, syntax.OpStar)
|
|
|
|
substrDotPlus := getSubstringLiteral(sre, syntax.OpPlus)
|
2022-08-26 11:53:02 +00:00
|
|
|
// It is expected that Optimize returns valid regexp in suffix, so use MustCompile here.
|
|
|
|
// Anchor suffix to the beginning and the end of the matching string.
|
|
|
|
suffixExpr := "^(?:" + suffix + ")$"
|
|
|
|
reSuffix := regexp.MustCompile(suffixExpr)
|
2022-09-30 07:38:44 +00:00
|
|
|
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
|
2022-08-26 11:53:02 +00:00
|
|
|
pr := &PromRegex{
|
2024-05-24 01:06:55 +00:00
|
|
|
exprStr: expr,
|
2022-09-30 07:38:44 +00:00
|
|
|
prefix: prefix,
|
2024-05-24 01:06:55 +00:00
|
|
|
isOnlyPrefix: isOnlyPrefix,
|
|
|
|
isSuffixDotStar: isSuffixDotStar,
|
|
|
|
isSuffixDotPlus: isSuffixDotPlus,
|
2022-09-30 07:38:44 +00:00
|
|
|
substrDotStar: substrDotStar,
|
|
|
|
substrDotPlus: substrDotPlus,
|
|
|
|
orValues: orValues,
|
|
|
|
reSuffixMatcher: reSuffixMatcher,
|
2022-08-26 11:53:02 +00:00
|
|
|
}
|
|
|
|
return pr, nil
|
|
|
|
}
|
|
|
|
|
2023-02-13 12:27:13 +00:00
|
|
|
// MatchString returns true if s matches pr.
|
2022-08-26 11:53:02 +00:00
|
|
|
//
|
|
|
|
// The pr is automatically anchored to the beginning and to the end
|
|
|
|
// of the matching string with '^' and '$'.
|
|
|
|
func (pr *PromRegex) MatchString(s string) bool {
|
2024-05-24 01:06:55 +00:00
|
|
|
if pr.isOnlyPrefix {
|
|
|
|
return s == pr.prefix
|
2022-08-26 11:53:02 +00:00
|
|
|
}
|
2024-05-24 01:06:55 +00:00
|
|
|
|
|
|
|
if len(pr.prefix) > 0 {
|
|
|
|
if !strings.HasPrefix(s, pr.prefix) {
|
|
|
|
// Fast path - s has another prefix than pr.
|
|
|
|
return false
|
2022-08-26 11:53:02 +00:00
|
|
|
}
|
2024-05-24 01:06:55 +00:00
|
|
|
s = s[len(pr.prefix):]
|
|
|
|
}
|
|
|
|
|
|
|
|
if pr.isSuffixDotStar {
|
|
|
|
// Fast path - the pr contains "prefix.*"
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if pr.isSuffixDotPlus {
|
|
|
|
// Fast path - the pr contains "prefix.+"
|
|
|
|
return len(s) > 0
|
2022-08-26 11:53:02 +00:00
|
|
|
}
|
|
|
|
if pr.substrDotStar != "" {
|
|
|
|
// Fast path - pr contains ".*someText.*"
|
|
|
|
return strings.Contains(s, pr.substrDotStar)
|
|
|
|
}
|
|
|
|
if pr.substrDotPlus != "" {
|
|
|
|
// Fast path - pr contains ".+someText.+"
|
|
|
|
n := strings.Index(s, pr.substrDotPlus)
|
2022-08-26 12:23:41 +00:00
|
|
|
return n > 0 && n+len(pr.substrDotPlus) < len(s)
|
2022-08-26 11:53:02 +00:00
|
|
|
}
|
2024-05-24 01:06:55 +00:00
|
|
|
|
|
|
|
if len(pr.orValues) > 0 {
|
|
|
|
// Fast path - pr contains only alternate strings such as 'foo|bar|baz'
|
|
|
|
for _, v := range pr.orValues {
|
|
|
|
if s == v {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
2022-08-26 11:53:02 +00:00
|
|
|
}
|
2024-05-24 01:06:55 +00:00
|
|
|
|
2022-08-26 11:53:02 +00:00
|
|
|
// Fall back to slow path by matching the original regexp.
|
2022-09-30 07:38:44 +00:00
|
|
|
return pr.reSuffixMatcher.Match(s)
|
2022-08-26 11:53:02 +00:00
|
|
|
}
|
|
|
|
|
2024-05-24 01:06:55 +00:00
|
|
|
// String returns string representation of pr.
|
|
|
|
func (pr *PromRegex) String() string {
|
|
|
|
return pr.exprStr
|
2022-08-26 11:53:02 +00:00
|
|
|
}
|