mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
166 lines
4.1 KiB
Go
166 lines
4.1 KiB
Go
|
package regexutil
|
||
|
|
||
|
import (
|
||
|
"regexp"
|
||
|
"regexp/syntax"
|
||
|
"strings"
|
||
|
)
|
||
|
|
||
|
// Regex implements an optimized string matching for Go regex.
|
||
|
//
|
||
|
// The following regexs are optimized:
|
||
|
//
|
||
|
// - plain string such as "foobar"
|
||
|
// - alternate strings such as "foo|bar|baz"
|
||
|
// - prefix match such as "foo.*" or "foo.+"
|
||
|
// - substring match such as ".*foo.*" or ".+bar.+"
|
||
|
type Regex struct {
|
||
|
// prefix contains literal prefix for regex.
|
||
|
// For example, prefix="foo" for regex="foo(a|b)"
|
||
|
prefix string
|
||
|
|
||
|
// isSuffixDotStar is set to true if suffix is ".*"
|
||
|
isSuffixDotStar bool
|
||
|
|
||
|
// isSuffixDotPlus is set to true if suffix is ".+"
|
||
|
isSuffixDotPlus bool
|
||
|
|
||
|
// substrDotStar contains literal string for regex suffix=".*string.*"
|
||
|
substrDotStar string
|
||
|
|
||
|
// substrDotPlus contains literal string for regex suffix=".+string.+"
|
||
|
substrDotPlus string
|
||
|
|
||
|
// orValues contains or values for the suffix regex.
|
||
|
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
|
||
|
orValues []string
|
||
|
|
||
|
// re is the original regexp.
|
||
|
re *regexp.Regexp
|
||
|
}
|
||
|
|
||
|
// NewRegex returns Regex for the given expr.
|
||
|
func NewRegex(expr string) (*Regex, error) {
|
||
|
if _, err := regexp.Compile(expr); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
prefix, suffix := SimplifyRegex(expr)
|
||
|
orValues := GetOrValuesRegex(suffix)
|
||
|
isSuffixDotStar := isDotOpRegexp(suffix, syntax.OpStar)
|
||
|
isSuffixDotPlus := isDotOpRegexp(suffix, syntax.OpPlus)
|
||
|
substrDotStar := getSubstringLiteral(suffix, syntax.OpStar)
|
||
|
substrDotPlus := getSubstringLiteral(suffix, syntax.OpPlus)
|
||
|
|
||
|
var re *regexp.Regexp
|
||
|
if len(orValues) == 0 && substrDotStar == "" && substrDotPlus == "" && suffix != ".*" && suffix != ".+" {
|
||
|
suffixAnchored := suffix
|
||
|
if len(prefix) > 0 {
|
||
|
suffixAnchored = "^(?:" + suffix + ")"
|
||
|
}
|
||
|
// The suffixAnchored must be properly compiled, since it has been already checked above.
|
||
|
// Otherwise it is a bug, which must be fixed.
|
||
|
re = regexp.MustCompile(suffixAnchored)
|
||
|
}
|
||
|
r := &Regex{
|
||
|
prefix: prefix,
|
||
|
isSuffixDotStar: isSuffixDotStar,
|
||
|
isSuffixDotPlus: isSuffixDotPlus,
|
||
|
substrDotStar: substrDotStar,
|
||
|
substrDotPlus: substrDotPlus,
|
||
|
orValues: orValues,
|
||
|
re: re,
|
||
|
}
|
||
|
return r, nil
|
||
|
}
|
||
|
|
||
|
// MatchString returns true if s matches pr.
|
||
|
func (r *Regex) MatchString(s string) bool {
|
||
|
if len(r.prefix) == 0 {
|
||
|
return r.matchStringNoPrefix(s)
|
||
|
}
|
||
|
return r.matchStringWithPrefix(s)
|
||
|
}
|
||
|
|
||
|
func (r *Regex) matchStringNoPrefix(s string) bool {
|
||
|
if r.isSuffixDotStar {
|
||
|
return true
|
||
|
}
|
||
|
if r.isSuffixDotPlus {
|
||
|
return len(s) > 0
|
||
|
}
|
||
|
if r.substrDotStar != "" {
|
||
|
// Fast path - r contains ".*someText.*"
|
||
|
return strings.Contains(s, r.substrDotStar)
|
||
|
}
|
||
|
if r.substrDotPlus != "" {
|
||
|
// Fast path - r contains ".+someText.+"
|
||
|
n := strings.Index(s, r.substrDotPlus)
|
||
|
return n > 0 && n+len(r.substrDotPlus) < len(s)
|
||
|
}
|
||
|
|
||
|
if len(r.orValues) == 0 {
|
||
|
// Fall back to slow path by matching the original regexp.
|
||
|
return r.re.MatchString(s)
|
||
|
}
|
||
|
|
||
|
// Fast path - compare s to pr.orValues
|
||
|
for _, v := range r.orValues {
|
||
|
if strings.Contains(s, v) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func (r *Regex) matchStringWithPrefix(s string) bool {
|
||
|
n := strings.Index(s, r.prefix)
|
||
|
if n < 0 {
|
||
|
// Fast path - s doesn't contain the needed prefix
|
||
|
return false
|
||
|
}
|
||
|
sNext := s[n+1:]
|
||
|
s = s[n+len(r.prefix):]
|
||
|
|
||
|
if r.isSuffixDotStar {
|
||
|
return true
|
||
|
}
|
||
|
if r.isSuffixDotPlus {
|
||
|
return len(s) > 0
|
||
|
}
|
||
|
if r.substrDotStar != "" {
|
||
|
// Fast path - r contains ".*someText.*"
|
||
|
return strings.Contains(s, r.substrDotStar)
|
||
|
}
|
||
|
if r.substrDotPlus != "" {
|
||
|
// Fast path - r contains ".+someText.+"
|
||
|
n := strings.Index(s, r.substrDotPlus)
|
||
|
return n > 0 && n+len(r.substrDotPlus) < len(s)
|
||
|
}
|
||
|
|
||
|
for {
|
||
|
if len(r.orValues) == 0 {
|
||
|
// Fall back to slow path by matching the original regexp.
|
||
|
if r.re.MatchString(s) {
|
||
|
return true
|
||
|
}
|
||
|
} else {
|
||
|
// Fast path - compare s to pr.orValues
|
||
|
for _, v := range r.orValues {
|
||
|
if strings.HasPrefix(s, v) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Mismatch. Try again starting from the next char.
|
||
|
s = sNext
|
||
|
n := strings.Index(s, r.prefix)
|
||
|
if n < 0 {
|
||
|
// Fast path - s doesn't contain the needed prefix
|
||
|
return false
|
||
|
}
|
||
|
sNext = s[n+1:]
|
||
|
s = s[n+len(r.prefix):]
|
||
|
}
|
||
|
}
|