diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 939aab45cd..6ab93bd4ee 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -25,7 +25,8 @@ at [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMe See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy-via-labels) for details. * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): support specifying tenant ids via `vm_account_id` and `vm_project_id` labels. See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy-via-labels) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2970). -* FEATURE: improve [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling) performance by up to 3x for non-trivial `regex` values. +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): improve [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling) performance by up to 3x for non-trivial `regex` values such as `([^:]+):.+`, which can be used for extracting a `host` part from `host:port` label value. +* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): improve performance by up to 4x for queries containing non-trivial `regex` filters such as `{path=~"/foo/.+|/bar"}`. * FEATURE: sanitize metric names for data ingested via [DataDog protocol](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) according to [DataDog metric naming](https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics). The behaviour can be disabled by passing `-datadog.sanitizeMetricName=false` command-line flag. Thanks to @PerGon for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3105). * FEATURE: add `-usePromCompatibleNaming` command-line flag to [vmagent](https://docs.victoriametrics.com/vmagent.html), to single-node VictoriaMetrics and to `vminsert` component of VictoriaMetrics cluster. This flag can be used for normalizing the ingested metric names and label names to [Prometheus-compatible form](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels). If this flag is set, then all the chars unsupported by Prometheus are replaced with `_` chars in metric names and labels of the ingested samples. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113). * FEATURE: accept whitespace in metric names and tags ingested via [Graphite plaintext protocol](https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) according to [the specs](https://graphite.readthedocs.io/en/latest/tags.html). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3102). diff --git a/lib/storage/tag_filters.go b/lib/storage/tag_filters.go index 0577d958c0..c2e4934771 100644 --- a/lib/storage/tag_filters.go +++ b/lib/storage/tag_filters.go @@ -602,13 +602,21 @@ func getOptimizedReMatchFunc(reMatch func(b []byte) bool, expr string) (func(b [ if err != nil { logger.Panicf("BUG: unexpected error when parsing verified expr=%q: %s", expr, err) } - if matchFunc, literalSuffix, reCost := getOptimizedReMatchFuncExt(reMatch, sre); matchFunc != nil { + // Prepare fast string matcher for reMatch. + fsm := bytesutil.NewFastStringMatcher(func(s string) bool { + return reMatch(bytesutil.ToUnsafeBytes(s)) + }) + reMatchFast := func(b []byte) bool { + return fsm.Match(bytesutil.ToUnsafeString(b)) + } + + if matchFunc, literalSuffix, reCost := getOptimizedReMatchFuncExt(reMatchFast, sre); matchFunc != nil { // Found optimized function for matching the expr. suffixUnescaped := tagCharsReverseRegexpEscaper.Replace(literalSuffix) return matchFunc, suffixUnescaped, reCost } - // Fall back to un-optimized reMatch. - return reMatch, "", reMatchCost + // Fall back to reMatchFast. + return reMatchFast, "", reMatchCost } // These cost values are used for sorting tag filters in ascending order or the required CPU time for execution.