mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/storage: optimize matching speed for non-trivial regexp filters
Wrap re.Match into bytesutil.FastStringMatcher. This increases performance for `{foo=~"complex_regex_here"}` filters by up to 4x.
This commit is contained in:
parent
72d8653233
commit
b96fe2e265
2 changed files with 13 additions and 4 deletions
|
@ -25,7 +25,8 @@ at [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMe
|
|||
See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy-via-labels) for details.
|
||||
|
||||
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): support specifying tenant ids via `vm_account_id` and `vm_project_id` labels. See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy-via-labels) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2970).
|
||||
* FEATURE: improve [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling) performance by up to 3x for non-trivial `regex` values.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): improve [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling) performance by up to 3x for non-trivial `regex` values such as `([^:]+):.+`, which can be used for extracting a `host` part from `host:port` label value.
|
||||
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): improve performance by up to 4x for queries containing non-trivial `regex` filters such as `{path=~"/foo/.+|/bar"}`.
|
||||
* FEATURE: sanitize metric names for data ingested via [DataDog protocol](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) according to [DataDog metric naming](https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics). The behaviour can be disabled by passing `-datadog.sanitizeMetricName=false` command-line flag. Thanks to @PerGon for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3105).
|
||||
* FEATURE: add `-usePromCompatibleNaming` command-line flag to [vmagent](https://docs.victoriametrics.com/vmagent.html), to single-node VictoriaMetrics and to `vminsert` component of VictoriaMetrics cluster. This flag can be used for normalizing the ingested metric names and label names to [Prometheus-compatible form](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels). If this flag is set, then all the chars unsupported by Prometheus are replaced with `_` chars in metric names and labels of the ingested samples. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113).
|
||||
* FEATURE: accept whitespace in metric names and tags ingested via [Graphite plaintext protocol](https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) according to [the specs](https://graphite.readthedocs.io/en/latest/tags.html). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3102).
|
||||
|
|
|
@ -602,13 +602,21 @@ func getOptimizedReMatchFunc(reMatch func(b []byte) bool, expr string) (func(b [
|
|||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error when parsing verified expr=%q: %s", expr, err)
|
||||
}
|
||||
if matchFunc, literalSuffix, reCost := getOptimizedReMatchFuncExt(reMatch, sre); matchFunc != nil {
|
||||
// Prepare fast string matcher for reMatch.
|
||||
fsm := bytesutil.NewFastStringMatcher(func(s string) bool {
|
||||
return reMatch(bytesutil.ToUnsafeBytes(s))
|
||||
})
|
||||
reMatchFast := func(b []byte) bool {
|
||||
return fsm.Match(bytesutil.ToUnsafeString(b))
|
||||
}
|
||||
|
||||
if matchFunc, literalSuffix, reCost := getOptimizedReMatchFuncExt(reMatchFast, sre); matchFunc != nil {
|
||||
// Found optimized function for matching the expr.
|
||||
suffixUnescaped := tagCharsReverseRegexpEscaper.Replace(literalSuffix)
|
||||
return matchFunc, suffixUnescaped, reCost
|
||||
}
|
||||
// Fall back to un-optimized reMatch.
|
||||
return reMatch, "", reMatchCost
|
||||
// Fall back to reMatchFast.
|
||||
return reMatchFast, "", reMatchCost
|
||||
}
|
||||
|
||||
// These cost values are used for sorting tag filters in ascending order or the required CPU time for execution.
|
||||
|
|
Loading…
Reference in a new issue