package regexutil import ( "regexp" "regexp/syntax" "strings" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" ) // PromRegex implements an optimized string matching for Prometheus-like regex. // // The following regexs are optimized: // // - plain string such as "foobar" // - alternate strings such as "foo|bar|baz" // - prefix match such as "foo.*" or "foo.+" // - substring match such as ".*foo.*" or ".+bar.+" // // The rest of regexps are also optimized by returning cached match results for the same input strings. type PromRegex struct { // prefix contains literal prefix for regex. // For example, prefix="foo" for regex="foo(a|b)" prefix string // Suffix contains regex suffix left after removing the prefix. // For example, suffix="a|b" for regex="foo(a|b)" suffix string // substrDotStar contains literal string for regex suffix=".*string.*" substrDotStar string // substrDotPlus contains literal string for regex suffix=".+string.+" substrDotPlus string // orValues contains or values for the suffix regex. // For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz" orValues []string // reSuffixMatcher contains fast matcher for "^suffix$" reSuffixMatcher *bytesutil.FastStringMatcher } // NewPromRegex returns PromRegex for the given expr. func NewPromRegex(expr string) (*PromRegex, error) { if _, err := regexp.Compile(expr); err != nil { return nil, err } prefix, suffix := Simplify(expr) orValues := GetOrValues(suffix) substrDotStar := getSubstringLiteral(suffix, ".*") substrDotPlus := getSubstringLiteral(suffix, ".+") // It is expected that Optimize returns valid regexp in suffix, so use MustCompile here. // Anchor suffix to the beginning and the end of the matching string. suffixExpr := "^(?:" + suffix + ")$" reSuffix := regexp.MustCompile(suffixExpr) reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString) pr := &PromRegex{ prefix: prefix, suffix: suffix, substrDotStar: substrDotStar, substrDotPlus: substrDotPlus, orValues: orValues, reSuffixMatcher: reSuffixMatcher, } return pr, nil } // MatchString returns true if s matches pr. // // The pr is automatically anchored to the beginning and to the end // of the matching string with '^' and '$'. func (pr *PromRegex) MatchString(s string) bool { if !strings.HasPrefix(s, pr.prefix) { // Fast path - s has another prefix than pr. return false } s = s[len(pr.prefix):] if len(pr.orValues) > 0 { // Fast path - pr contains only alternate strings such as 'foo|bar|baz' for _, v := range pr.orValues { if s == v { return true } } return false } if pr.substrDotStar != "" { // Fast path - pr contains ".*someText.*" return strings.Contains(s, pr.substrDotStar) } if pr.substrDotPlus != "" { // Fast path - pr contains ".+someText.+" n := strings.Index(s, pr.substrDotPlus) return n > 0 && n+len(pr.substrDotPlus) < len(s) } switch pr.suffix { case ".*": // Fast path - the pr contains "prefix.*" return true case ".+": // Fast path - the pr contains "prefix.+" return len(s) > 0 } // Fall back to slow path by matching the original regexp. return pr.reSuffixMatcher.Match(s) } // getSubstringLiteral returns regex part from expr surrounded by prefixSuffix. // // For example, if expr=".+foo.+" and prefixSuffix=".+", then the function returns "foo". // // An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix // or if the regex part surrounded by prefixSuffix contains alternate regexps. func getSubstringLiteral(expr, prefixSuffix string) string { // Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix. sre, err := syntax.Parse(expr, syntax.Perl) if err != nil { return "" } if sre.Op == syntax.OpAlternate { return "" } if !strings.HasPrefix(expr, prefixSuffix) { return "" } expr = expr[len(prefixSuffix):] if !strings.HasSuffix(expr, prefixSuffix) { return "" } expr = expr[:len(expr)-len(prefixSuffix)] prefix, suffix := Simplify(expr) if suffix != "" { return "" } return prefix }