From 7603446850778d770e6fe9f94c2306d7988ff6e5 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 30 Oct 2024 17:59:02 +0100 Subject: [PATCH] app/vlselect: add support for extra_filters and extra_stream_filters query args across all the HTTP querying APIs These query args are going to be used for quick filtering on field values at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7365 --- app/vlselect/logsql/logsql.go | 27 ++++++ docs/VictoriaLogs/CHANGELOG.md | 2 + docs/VictoriaLogs/querying/README.md | 34 +++++++ lib/logstorage/filter_stream.go | 4 + lib/logstorage/parser.go | 102 ++++++++++++++++++++ lib/logstorage/parser_test.go | 139 +++++++++++++++++++++++++++ lib/logstorage/stream_filter.go | 1 + 7 files changed, 309 insertions(+) diff --git a/app/vlselect/logsql/logsql.go b/app/vlselect/logsql/logsql.go index 170017c7b..d267db2f1 100644 --- a/app/vlselect/logsql/logsql.go +++ b/app/vlselect/logsql/logsql.go @@ -1017,6 +1017,20 @@ func parseCommonArgs(r *http.Request) (*logstorage.Query, []logstorage.TenantID, q.AddTimeFilter(start, end) } + // Parse optional extra_filters + extraFilters, err := getExtraFilters(r, "extra_filters") + if err != nil { + return nil, nil, err + } + q.AddExtraFilters(extraFilters) + + // Parse optional extra_stream_filters + extraStreamFilters, err := getExtraFilters(r, "extra_stream_filters") + if err != nil { + return nil, nil, err + } + q.AddExtraStreamFilters(extraStreamFilters) + return q, tenantIDs, nil } @@ -1032,3 +1046,16 @@ func getTimeNsec(r *http.Request, argName string) (int64, bool, error) { } return nsecs, true, nil } + +func getExtraFilters(r *http.Request, argName string) ([]logstorage.Field, error) { + s := r.FormValue(argName) + if s == "" { + return nil, nil + } + + var p logstorage.JSONParser + if err := p.ParseLogMessage([]byte(s)); err != nil { + return nil, fmt.Errorf("cannot parse %s: %w", argName, err) + } + return p.Fields, nil +} diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 5f6da4947..106cea8c0 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -15,6 +15,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip +* FEATURE: add support for extra filters across all the [HTTP querying APIs](https://docs.victoriametrics.com/victorialogs/querying/#http-api). See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#extra-filters) for details. This is needed for implementing quick filtering on field values at [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7365). + ## [v0.39.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.39.0-victorialogs) Released at 2024-10-30 diff --git a/docs/VictoriaLogs/querying/README.md b/docs/VictoriaLogs/querying/README.md index 20ef6c700..b177275d8 100644 --- a/docs/VictoriaLogs/querying/README.md +++ b/docs/VictoriaLogs/querying/README.md @@ -22,6 +22,7 @@ VictoriaLogs provides the following HTTP endpoints: - [`/select/logsql/field_names`](#querying-field-names) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names. - [`/select/logsql/field_values`](#querying-field-values) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values. + ### Querying logs Logs stored in VictoriaLogs can be queried at the `/select/logsql/query` HTTP endpoint. @@ -105,6 +106,7 @@ with `vl_http_requests_total{path="/select/logsql/query"}` metric. See also: +- [Extra filters](#extra-filters) - [Live tailing](#live-tailing) - [Querying hits stats](#querying-hits-stats) - [Querying log stats](#querying-log-stats) @@ -159,6 +161,7 @@ with `vl_live_tailing_requests` metric. See also: +- [Extra filters](#extra-filters) - [Querying logs](#querying-logs) - [Querying streams](#querying-streams) @@ -276,6 +279,7 @@ curl http://localhost:9428/select/logsql/hits -H 'AccountID: 12' -H 'ProjectID: See also: +- [Extra filters](#extra-filters) - [Querying logs](#querying-logs) - [Querying log stats](#querying-log-stats) - [Querying log range stats](#querying-log-range-stats) @@ -348,6 +352,7 @@ The `/select/logsql/stats_query` API is useful for generating Prometheus-compati See also: +- [Extra filters](#extra-filters) - [Querying log range stats](#querying-log-range-stats) - [Querying logs](#querying-logs) - [Querying hits stats](#querying-hits-stats) @@ -441,6 +446,7 @@ The `/select/logsql/stats_query_range` API is useful for generating Prometheus-c See also: +- [Extra filters](#extra-filters) - [Querying log stats](#querying-log-stats) - [Querying logs](#querying-logs) - [Querying hits stats](#querying-hits-stats) @@ -499,6 +505,7 @@ curl http://localhost:9428/select/logsql/stream_ids -H 'AccountID: 12' -H 'Proje See also: +- [Extra filters](#extra-filters) - [Querying streams](#querying-streams) - [Querying logs](#querying-logs) - [Querying hits stats](#querying-hits-stats) @@ -556,6 +563,7 @@ curl http://localhost:9428/select/logsql/streams -H 'AccountID: 12' -H 'ProjectI See also: +- [Extra filters](#extra-filters) - [Querying stream_ids](#querying-stream_ids) - [Querying logs](#querying-logs) - [Querying hits stats](#querying-hits-stats) @@ -610,6 +618,7 @@ curl http://localhost:9428/select/logsql/stream_field_names -H 'AccountID: 12' - See also: +- [Extra filters](#extra-filters) - [Querying stream field names](#querying-stream-field-names) - [Querying field values](#querying-field-values) - [Querying streams](#querying-streams) @@ -664,6 +673,7 @@ curl http://localhost:9428/select/logsql/stream_field_values -H 'AccountID: 12' See also: +- [Extra filters](#extra-filters) - [Querying stream field values](#querying-stream-field-values) - [Querying field names](#querying-field-names) - [Querying streams](#querying-streams) @@ -717,6 +727,7 @@ curl http://localhost:9428/select/logsql/field_names -H 'AccountID: 12' -H 'Proj See also: +- [Extra filters](#extra-filters) - [Querying stream field names](#querying-stream-field-names) - [Querying field values](#querying-field-values) - [Querying streams](#querying-streams) @@ -775,12 +786,35 @@ curl http://localhost:9428/select/logsql/field_values -H 'AccountID: 12' -H 'Pro See also: +- [Extra filters](#extra-filters) - [Querying stream field values](#querying-stream-field-values) - [Querying field names](#querying-field-names) - [Querying streams](#querying-streams) - [HTTP API](#http-api) +## Extra filters + +Alls the [HTTP querying APIs](#http-api) provided by VictoriaLogs support the following optional query args: + +- `extra_filters` - this arg may contain extra [`field:=value`](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter) filters, which must be applied + to the `query` before returning the results. +- `extra_stream_filters` - this arg may contain extra [`{field="value"}`](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter) filters, + which must be applied to the `query` before returning results. + +The filters must be passed as JSON object with `"field":"value"` entries. For example, the following JSON object applies `namespace:=my-app and env:prod` filter to the `query` +passed to [HTTP querying APIs](#http-api): + +```json +{ + "namespace":"my-app", + "env":"prod" +} +``` + +The JSON object must be properly encoded with [percent encoding](https://en.wikipedia.org/wiki/Percent-encoding) before being passed to VictoriaLogs. + + ## Web UI VictoriaLogs provides Web UI for logs [querying](https://docs.victoriametrics.com/victorialogs/logsql/) and exploration diff --git a/lib/logstorage/filter_stream.go b/lib/logstorage/filter_stream.go index e127a1e0f..4abd894e0 100644 --- a/lib/logstorage/filter_stream.go +++ b/lib/logstorage/filter_stream.go @@ -12,9 +12,13 @@ type filterStream struct { f *StreamFilter // tenantIDs is the list of tenantIDs to search for streamIDs. + // + // This field is initialized just before the search. tenantIDs []TenantID // idb is the indexdb to search for streamIDs. + // + // This field is initialized just before the search. idb *indexdb streamIDsOnce sync.Once diff --git a/lib/logstorage/parser.go b/lib/logstorage/parser.go index 2d98b90cf..113682a46 100644 --- a/lib/logstorage/parser.go +++ b/lib/logstorage/parser.go @@ -411,6 +411,108 @@ func (q *Query) AddTimeFilter(start, end int64) { } } +// AddExtraStreamFilters adds stream filters to q in the form `{f1.Name=f1.Value, ..., fN.Name=fN.Value}` +func (q *Query) AddExtraStreamFilters(filters []Field) { + if len(filters) == 0 { + return + } + + fa, ok := q.f.(*filterAnd) + if !ok { + if fs, ok := q.f.(*filterStream); ok { + addExtraStreamFilters(fs, filters) + return + } + + fa = &filterAnd{ + filters: []filter{ + newEmptyFilterStream(), + q.f, + }, + } + q.f = fa + } + + hasStreamFilters := false + for _, f := range fa.filters { + if _, ok := f.(*filterStream); ok { + hasStreamFilters = true + break + } + } + if !hasStreamFilters { + var dst []filter + dst = append(dst, newEmptyFilterStream()) + fa.filters = append(dst, fa.filters...) + } + + for _, f := range fa.filters { + if fs, ok := f.(*filterStream); ok { + addExtraStreamFilters(fs, filters) + } + } +} + +func newEmptyFilterStream() *filterStream { + return &filterStream{ + f: &StreamFilter{}, + } +} + +func addExtraStreamFilters(fs *filterStream, filters []Field) { + f := fs.f + if len(f.orFilters) == 0 { + f.orFilters = []*andStreamFilter{ + { + tagFilters: appendExtraStreamFilters(nil, filters), + }, + } + return + } + for _, af := range f.orFilters { + af.tagFilters = appendExtraStreamFilters(af.tagFilters, filters) + } +} + +func appendExtraStreamFilters(orig []*streamTagFilter, filters []Field) []*streamTagFilter { + var dst []*streamTagFilter + for _, f := range filters { + dst = append(dst, &streamTagFilter{ + tagName: f.Name, + op: "=", + value: f.Value, + }) + } + return append(dst, orig...) +} + +// AddExtraFilters adds filters to q in the form of `f1.Name:=f1.Value AND ... fN.Name:=fN.Value` +func (q *Query) AddExtraFilters(filters []Field) { + if len(filters) == 0 { + return + } + + fa, ok := q.f.(*filterAnd) + if !ok { + fa = &filterAnd{ + filters: []filter{q.f}, + } + q.f = fa + } + fa.filters = addExtraFilters(fa.filters, filters) +} + +func addExtraFilters(orig []filter, filters []Field) []filter { + var dst []filter + for _, f := range filters { + dst = append(dst, &filterExact{ + fieldName: f.Name, + value: f.Value, + }) + } + return append(dst, orig...) +} + // AddPipeLimit adds `| limit n` pipe to q. // // See https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go index ee3bc7652..9c9e18e2b 100644 --- a/lib/logstorage/parser_test.go +++ b/lib/logstorage/parser_test.go @@ -2409,3 +2409,142 @@ func TestHasTimeFilter(t *testing.T) { f(`error AND (_time: 5m AND warn) | count()`, true) f(`* | error AND _time:5m | count()`, true) } + +func TestAddExtraFilters(t *testing.T) { + f := func(qStr string, extraFilters []Field, resultExpected string) { + t.Helper() + + q, err := ParseQuery(qStr) + if err != nil { + t.Fatalf("unexpected error in ParseQuery: %s", err) + } + q.AddExtraFilters(extraFilters) + + result := q.String() + if result != resultExpected { + t.Fatalf("unexpected result;\ngot\n%s\nwant\n%s", result, resultExpected) + } + } + + f(`*`, nil, `*`) + f(`_time:5m`, nil, `_time:5m`) + f(`foo _time:5m`, nil, `foo _time:5m`) + + f(`*`, []Field{ + { + Name: "foo", + Value: "bar", + }, + }, "foo:=bar *") + + f("_time:5m", []Field{ + { + Name: "fo o", + Value: "=ba:r !", + }, + }, `"fo o":="=ba:r !" _time:5m`) + + f("_time:5m {a=b}", []Field{ + { + Name: "fo o", + Value: "=ba:r !", + }, + { + Name: "x", + Value: "y", + }, + }, `"fo o":="=ba:r !" x:=y _time:5m {a="b"}`) + + f(`a or (b c)`, []Field{ + { + Name: "foo", + Value: "bar", + }, + }, `foo:=bar (a or b c)`) +} + +func TestAddExtraStreamFilters(t *testing.T) { + f := func(qStr string, extraFilters []Field, resultExpected string) { + t.Helper() + + q, err := ParseQuery(qStr) + if err != nil { + t.Fatalf("unexpected error in ParseQuery: %s", err) + } + q.AddExtraStreamFilters(extraFilters) + + result := q.String() + if result != resultExpected { + t.Fatalf("unexpected result;\ngot\n%s\nwant\n%s", result, resultExpected) + } + } + + f(`*`, nil, `*`) + f(`_time:5m`, nil, `_time:5m`) + f(`foo _time:5m`, nil, `foo _time:5m`) + + f(`*`, []Field{ + { + Name: "foo", + Value: "bar", + }, + }, `{foo="bar"} *`) + + f(`_time:5m`, []Field{ + { + Name: "fo o=", + Value: `"bar}`, + }, + }, `{"fo o="="\"bar}"} _time:5m`) + + f(`a b`, []Field{ + { + Name: "foo", + Value: "bar", + }, + }, `{foo="bar"} a b`) + + f(`a or b {c="d"}`, []Field{ + { + Name: "foo", + Value: "bar", + }, + { + Name: "x", + Value: "y", + }, + }, `{foo="bar",x="y"} (a or b {c="d"})`) + + f(`{c=~"d|e"}`, []Field{ + { + Name: "foo", + Value: "bar", + }, + { + Name: "x", + Value: "y", + }, + }, `{foo="bar",x="y",c=~"d|e"}`) + + f(`a:b {c=~"d|e"}`, []Field{ + { + Name: "foo", + Value: "bar", + }, + { + Name: "x", + Value: "y", + }, + }, `a:b {foo="bar",x="y",c=~"d|e"}`) + + f(`a:b {c=~"d|e"} {q!="w"} asdf`, []Field{ + { + Name: "foo", + Value: "bar", + }, + { + Name: "x", + Value: "y", + }, + }, `a:b {foo="bar",x="y",c=~"d|e"} {foo="bar",x="y",q!="w"} asdf`) +} diff --git a/lib/logstorage/stream_filter.go b/lib/logstorage/stream_filter.go index e2efe6d4c..602d8509c 100644 --- a/lib/logstorage/stream_filter.go +++ b/lib/logstorage/stream_filter.go @@ -93,6 +93,7 @@ type streamTagFilter struct { // value is the value value string + // regexp is initialized for `=~` and `!~` op. regexp *regexutil.PromRegex }