lib/logstorage: work-in-progress

This commit is contained in:
Aliaksandr Valialkin 2024-05-25 21:36:16 +02:00
parent e2590f0485
commit dc55146752
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
46 changed files with 2615 additions and 808 deletions

View file

@ -6,8 +6,8 @@
{% stripspace %} {% stripspace %}
// LabelsForHits formats labels for /select/logsql/hits response // FieldsForHits formats labels for /select/logsql/hits response
{% func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) %} {% func FieldsForHits(columns []logstorage.BlockColumn, rowIdx int) %}
{ {
{% if len(columns) > 0 %} {% if len(columns) > 0 %}
{%q= columns[0].Name %}:{%q= columns[0].Values[rowIdx] %} {%q= columns[0].Name %}:{%q= columns[0].Values[rowIdx] %}

View file

@ -11,7 +11,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
) )
// LabelsForHits formats labels for /select/logsql/hits response // FieldsForHits formats labels for /select/logsql/hits response
//line app/vlselect/logsql/hits_response.qtpl:10 //line app/vlselect/logsql/hits_response.qtpl:10
import ( import (
@ -27,7 +27,7 @@ var (
) )
//line app/vlselect/logsql/hits_response.qtpl:10 //line app/vlselect/logsql/hits_response.qtpl:10
func StreamLabelsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) { func StreamFieldsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
//line app/vlselect/logsql/hits_response.qtpl:10 //line app/vlselect/logsql/hits_response.qtpl:10
qw422016.N().S(`{`) qw422016.N().S(`{`)
//line app/vlselect/logsql/hits_response.qtpl:12 //line app/vlselect/logsql/hits_response.qtpl:12
@ -58,22 +58,22 @@ func StreamLabelsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockCo
} }
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
func WriteLabelsForHits(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) { func WriteFieldsForHits(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
qw422016 := qt422016.AcquireWriter(qq422016) qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
StreamLabelsForHits(qw422016, columns, rowIdx) StreamFieldsForHits(qw422016, columns, rowIdx)
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
qt422016.ReleaseWriter(qw422016) qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
} }
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) string { func FieldsForHits(columns []logstorage.BlockColumn, rowIdx int) string {
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
qb422016 := qt422016.AcquireByteBuffer() qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
WriteLabelsForHits(qb422016, columns, rowIdx) WriteFieldsForHits(qb422016, columns, rowIdx)
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
qs422016 := string(qb422016.B) qs422016 := string(qb422016.B)
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19

View file

@ -77,7 +77,7 @@ func ProcessHitsRequest(ctx context.Context, w http.ResponseWriter, r *http.Requ
hitsStr := strings.Clone(hitsValues[i]) hitsStr := strings.Clone(hitsValues[i])
bb.Reset() bb.Reset()
WriteLabelsForHits(bb, columns, i) WriteFieldsForHits(bb, columns, i)
mLock.Lock() mLock.Lock()
hs, ok := m[string(bb.B)] hs, ok := m[string(bb.B)]
@ -189,21 +189,21 @@ func ProcessFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *ht
WriteValuesWithHitsJSON(w, values) WriteValuesWithHitsJSON(w, values)
} }
// ProcessStreamLabelNamesRequest processes /select/logsql/stream_label_names request. // ProcessStreamFieldNamesRequest processes /select/logsql/stream_field_names request.
// //
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-names // See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-names
func ProcessStreamLabelNamesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) { func ProcessStreamFieldNamesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
q, tenantIDs, err := parseCommonArgs(r) q, tenantIDs, err := parseCommonArgs(r)
if err != nil { if err != nil {
httpserver.Errorf(w, r, "%s", err) httpserver.Errorf(w, r, "%s", err)
return return
} }
// Obtain stream label names for the given query // Obtain stream field names for the given query
q.Optimize() q.Optimize()
names, err := vlstorage.GetStreamLabelNames(ctx, tenantIDs, q) names, err := vlstorage.GetStreamFieldNames(ctx, tenantIDs, q)
if err != nil { if err != nil {
httpserver.Errorf(w, r, "cannot obtain stream label names: %s", err) httpserver.Errorf(w, r, "cannot obtain stream field names: %s", err)
} }
// Write results // Write results
@ -211,20 +211,20 @@ func ProcessStreamLabelNamesRequest(ctx context.Context, w http.ResponseWriter,
WriteValuesWithHitsJSON(w, names) WriteValuesWithHitsJSON(w, names)
} }
// ProcessStreamLabelValuesRequest processes /select/logsql/stream_label_values request. // ProcessStreamFieldValuesRequest processes /select/logsql/stream_field_values request.
// //
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-values // See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-values
func ProcessStreamLabelValuesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) { func ProcessStreamFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
q, tenantIDs, err := parseCommonArgs(r) q, tenantIDs, err := parseCommonArgs(r)
if err != nil { if err != nil {
httpserver.Errorf(w, r, "%s", err) httpserver.Errorf(w, r, "%s", err)
return return
} }
// Parse labelName query arg // Parse fieldName query arg
labelName := r.FormValue("label") fieldName := r.FormValue("field")
if labelName == "" { if fieldName == "" {
httpserver.Errorf(w, r, "missing 'label' query arg") httpserver.Errorf(w, r, "missing 'field' query arg")
return return
} }
@ -238,11 +238,11 @@ func ProcessStreamLabelValuesRequest(ctx context.Context, w http.ResponseWriter,
limit = 0 limit = 0
} }
// Obtain stream label names for the given query // Obtain stream field values for the given query and the given fieldName
q.Optimize() q.Optimize()
values, err := vlstorage.GetStreamLabelValues(ctx, tenantIDs, q, labelName, uint64(limit)) values, err := vlstorage.GetStreamFieldValues(ctx, tenantIDs, q, fieldName, uint64(limit))
if err != nil { if err != nil {
httpserver.Errorf(w, r, "cannot obtain stream label values: %s", err) httpserver.Errorf(w, r, "cannot obtain stream field values: %s", err)
} }
// Write results // Write results

View file

@ -157,13 +157,13 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
logsqlQueryRequests.Inc() logsqlQueryRequests.Inc()
logsql.ProcessQueryRequest(ctx, w, r) logsql.ProcessQueryRequest(ctx, w, r)
return true return true
case "/select/logsql/stream_label_names": case "/select/logsql/stream_field_names":
logsqlStreamLabelNamesRequests.Inc() logsqlStreamFieldNamesRequests.Inc()
logsql.ProcessStreamLabelNamesRequest(ctx, w, r) logsql.ProcessStreamFieldNamesRequest(ctx, w, r)
return true return true
case "/select/logsql/stream_label_values": case "/select/logsql/stream_field_values":
logsqlStreamLabelValuesRequests.Inc() logsqlStreamFieldValuesRequests.Inc()
logsql.ProcessStreamLabelValuesRequest(ctx, w, r) logsql.ProcessStreamFieldValuesRequest(ctx, w, r)
return true return true
case "/select/logsql/streams": case "/select/logsql/streams":
logsqlStreamsRequests.Inc() logsqlStreamsRequests.Inc()
@ -192,7 +192,7 @@ var (
logsqlFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_values"}`) logsqlFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_values"}`)
logsqlHitsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/hits"}`) logsqlHitsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/hits"}`)
logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`) logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`)
logsqlStreamLabelNamesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_names"}`) logsqlStreamFieldNamesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_field_names"}`)
logsqlStreamLabelValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_values"}`) logsqlStreamFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_field_values"}`)
logsqlStreamsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/streams"}`) logsqlStreamsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/streams"}`)
) )

View file

@ -123,16 +123,16 @@ func GetFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *log
return strg.GetFieldValues(ctx, tenantIDs, q, fieldName, limit) return strg.GetFieldValues(ctx, tenantIDs, q, fieldName, limit)
} }
// GetStreamLabelNames executes q and returns stream labels names seen in results. // GetStreamFieldNames executes q and returns stream field names seen in results.
func GetStreamLabelNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]logstorage.ValueWithHits, error) { func GetStreamFieldNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]logstorage.ValueWithHits, error) {
return strg.GetStreamLabelNames(ctx, tenantIDs, q) return strg.GetStreamFieldNames(ctx, tenantIDs, q)
} }
// GetStreamLabelValues executes q and returns stream label values for the given labelName seen in results. // GetStreamFieldValues executes q and returns stream field values for the given fieldName seen in results.
// //
// If limit > 0, then up to limit unique stream label values are returned. // If limit > 0, then up to limit unique stream field values are returned.
func GetStreamLabelValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, labelName string, limit uint64) ([]logstorage.ValueWithHits, error) { func GetStreamFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, fieldName string, limit uint64) ([]logstorage.ValueWithHits, error) {
return strg.GetStreamLabelValues(ctx, tenantIDs, q, labelName, limit) return strg.GetStreamFieldValues(ctx, tenantIDs, q, fieldName, limit)
} }
// GetStreams executes q and returns streams seen in query results. // GetStreams executes q and returns streams seen in query results.

View file

@ -19,6 +19,16 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
## tip ## tip
* FEATURE: add [`pack_json` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#pack_json-pipe), which packs all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into a JSON object and stores it into the given field.
* FEATURE: add [`unroll` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unroll-pipe), which can be used for unrolling JSON arrays stored in [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
* FEATURE: add [`replace_regexp` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#replace_regexp-pipe), which allows updating [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with regular expressions.
* FEATURE: improve performance for [`format`](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe) and [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe) pipes.
* FEATURE: improve performance for [`/select/logsql/field_names` HTTP API](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-names).
* BUGFIX: prevent from panic in [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe) when VictoriaLogs runs on a system with one CPU core.
* BUGFIX: do not return referenced fields if they weren't present in the original logs. For example, `_time:5m | format if (non_existing_field:"") "abc"` could return empty `non_exiting_field`, while it shuldn't be returned because it is missing in the original logs.
* BUGFIX: properly initialize values for [`in(...)` filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter) inside [`filter` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe) if the `in(...)` contains other [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters). For example, `_time:5m | filter ip:in(user_type:admin | fields ip)` now works correctly.
## [v0.11.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.11.0-victorialogs) ## [v0.11.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.11.0-victorialogs)
Released at 2024-05-25 Released at 2024-05-25
@ -63,8 +73,8 @@ Released at 2024-05-22
* FEATURE: add ability to unpack [logfmt](https://brandur.org/logfmt) fields with [`unpack_logfmt` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-unpack_logfmt). * FEATURE: add ability to unpack [logfmt](https://brandur.org/logfmt) fields with [`unpack_logfmt` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-unpack_logfmt).
* FEATURE: add [`fields_min`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_min-stats) and [`fields_max`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_max-stats) functions for [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe), which allow returning all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for the log entry with the minimum / maximum value at the given field. * FEATURE: add [`fields_min`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_min-stats) and [`fields_max`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_max-stats) functions for [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe), which allow returning all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for the log entry with the minimum / maximum value at the given field.
* FEATURE: add `/select/logsql/streams` HTTP endpoint for returning [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-streams) for details. * FEATURE: add `/select/logsql/streams` HTTP endpoint for returning [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-streams) for details.
* FEATURE: add `/select/logsql/stream_label_names` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-names) for details. * FEATURE: add `/select/logsql/stream_field_names` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-names) for details.
* FEATURE: add `/select/logsql/stream_label_values` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values for the given label from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-values) for details. * FEATURE: add `/select/logsql/stream_field_values` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values for the given label from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-values) for details.
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): change time range limitation from `_time` in the expression to `start` and `end` query args. * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): change time range limitation from `_time` in the expression to `start` and `end` query args.
* BUGFIX: fix `invalid memory address or nil pointer dereference` panic when using [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe), [`unpack_json`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe) or [`unpack_logfmt`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) pipes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6306). * BUGFIX: fix `invalid memory address or nil pointer dereference` panic when using [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe), [`unpack_json`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe) or [`unpack_logfmt`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) pipes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6306).

View file

@ -37,6 +37,8 @@ For example, the following query finds all the logs with `error` word:
error error
``` ```
See [how to send queries to VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/).
If the queried [word](#word) clashes with LogsQL keywords, then just wrap it into quotes. If the queried [word](#word) clashes with LogsQL keywords, then just wrap it into quotes.
For example, the following query finds all the log messages with `and` [word](#word): For example, the following query finds all the log messages with `and` [word](#word):
@ -80,11 +82,32 @@ Typical LogsQL query constists of multiple [filters](#filters) joined with `AND`
So LogsQL allows omitting `AND` words. For example, the following query is equivalent to the query above: So LogsQL allows omitting `AND` words. For example, the following query is equivalent to the query above:
```logsql ```logsql
error _time:5m _time:5m error
``` ```
The query returns all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) by default. The query returns logs in arbitrary order because sorting of big amounts of logs may require non-trivial amounts of CPU and RAM.
See [how to query specific fields](#querying-specific-fields). The number of logs with `error` word over the last 5 minutes isn't usually too big (e.g. less than a few millions), so it is OK to sort them with [`sort` pipe](#sort-pipe).
The following query sorts the selected logs by [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) field:
```logsql
_time:5m error | sort by (_time)
```
It is unlikely you are going to investigate more than a few hundreds of logs returned by the query above. So you can limit the number of returned logs
with [`limit` pipe](#limit-pipe). The following query returns the last 10 logs with the `error` word over the last 5 minutes:
```logsql
_time:5m error | sort by (_time) desc | limit 10
```
By default VictoriaLogs returns all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
If you need only the given set of fields, then add [`fields` pipe](#fields-pipe) to the end of the query. For example, the following query returns only
[`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field), [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields:
```logsql
error _time:5m | fields _time, _stream, _msg
```
Suppose the query above selects too many rows because some buggy app pushes invalid error logs to VictoriaLogs. Suppose the app adds `buggy_app` [word](#word) to every log line. Suppose the query above selects too many rows because some buggy app pushes invalid error logs to VictoriaLogs. Suppose the app adds `buggy_app` [word](#word) to every log line.
Then the following query removes all the logs from the buggy app, allowing us paying attention to the real errors: Then the following query removes all the logs from the buggy app, allowing us paying attention to the real errors:
@ -93,8 +116,10 @@ Then the following query removes all the logs from the buggy app, allowing us pa
_time:5m error NOT buggy_app _time:5m error NOT buggy_app
``` ```
This query uses `NOT` [operator](#logical-filter) for removing log lines from the buggy app. The `NOT` operator is used frequently, so it can be substituted with `!` char. This query uses `NOT` [operator](#logical-filter) for removing log lines from the buggy app. The `NOT` operator is used frequently, so it can be substituted with `!` char
So the following query is equivalent to the previous one: (the `!` char is used instead of `-` char as a shorthand for `NOT` operator becasue it nicely combines with [`=`](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter)
and [`~`](https://docs.victoriametrics.com/victorialogs/logsql/#regexp-filter) filters like `!=` and `!~`).
The following query is equivalent to the previous one:
```logsql ```logsql
_time:5m error !buggy_app _time:5m error !buggy_app
@ -113,17 +138,15 @@ This query can be rewritten to more clear query with the `OR` [operator](#logica
_time:5m error !(buggy_app OR foobar) _time:5m error !(buggy_app OR foobar)
``` ```
Note that the parentheses are required here, since otherwise the query won't return the expected results. The parentheses are **required** here, since otherwise the query won't return the expected results.
The query `error !buggy_app OR foobar` is interpreted as `(error AND NOT buggy_app) OR foobar`. This query may return error logs The query `error !buggy_app OR foobar` is interpreted as `(error AND NOT buggy_app) OR foobar` according to [priorities for AND, OR and NOT operator](#logical-filters).
from the buggy app if they contain `foobar` [word](#word). This query also continues returning all the error logs from the second buggy app. This query returns logs with `foobar` [word](#word), even if do not contain `error` word or contain `buggy_app` word.
This is because of different priorities for `NOT`, `AND` and `OR` operators. So it is recommended wrapping the needed query parts into explicit parentheses if you are unsure in priority rules.
Read [these docs](#logical-filter) for more details. There is no need in remembering all these priority rules -
just wrap the needed query parts into explicit parentheses if you aren't sure in priority rules.
As an additional bonus, explicit parentheses make queries easier to read and maintain. As an additional bonus, explicit parentheses make queries easier to read and maintain.
Queries above assume that the `error` [word](#word) is stored in the [log message](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field). Queries above assume that the `error` [word](#word) is stored in the [log message](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
This word can be stored in other [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) such as `log.level`. If this word is stored in other [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) such as `log.level`, then add `log.level:` prefix
How to select error logs in this case? Just add the `log.level:` prefix in front of the `error` word: in front of the `error` word:
```logsq ```logsq
_time:5m log.level:error !(buggy_app OR foobar) _time:5m log.level:error !(buggy_app OR foobar)
@ -158,8 +181,16 @@ If the `app` field is associated with the log stream, then the query above can b
_time:5m log.level:error _stream:{app!~"buggy_app|foobar"} _time:5m log.level:error _stream:{app!~"buggy_app|foobar"}
``` ```
This query completely skips scanning for logs from `buggy_app` and `foobar` apps, thus significantly reducing disk read IO and CPU time This query skips scanning for [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) from `buggy_app` and `foobar` apps.
needed for performing the query. It inpsects only `log.level` and [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) labels.
This significantly reduces disk read IO and CPU time needed for performing the query.
LogsQL also provides [functions for statistics calculation](#stats-pipe) over the selected logs. For example, the following query returns the number of logs
with the `error` word for the last 5 minutes:
```logsql
_time:5m error | stats count() logs_with_error
```
Finally, it is recommended reading [performance tips](#performance-tips). Finally, it is recommended reading [performance tips](#performance-tips).
@ -177,13 +208,16 @@ These words are taken into account by full-text search filters such as
#### Query syntax #### Query syntax
LogsQL query must contain [filters](#filters) for selecting the matching logs. At least a single filter is required. LogsQL query must contain at least a single [filter](#filters) for selecting the matching logs.
For example, the following query selects all the logs for the last 5 minutes by using [`_time` filter](#time-filter): For example, the following query selects all the logs for the last 5 minutes by using [`_time` filter](#time-filter):
```logsql ```logsql
_time:5m _time:5m
``` ```
Tip: try [`*` filter](https://docs.victoriametrics.com/victorialogs/logsql/#any-value-filter), which selects all the logs stored in VictoriaLogs.
Do not worry - this doesn't crash VictoriaLogs, even if it contains trillions of logs. In the worst case it will return
Additionally to filters, LogQL query may contain arbitrary mix of optional actions for processing the selected logs. These actions are delimited by `|` and are known as [`pipes`](#pipes). Additionally to filters, LogQL query may contain arbitrary mix of optional actions for processing the selected logs. These actions are delimited by `|` and are known as [`pipes`](#pipes).
For example, the following query uses [`stats` pipe](#stats-pipe) for returning the number of [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) For example, the following query uses [`stats` pipe](#stats-pipe) for returning the number of [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
with the `error` [word](#word) for the last 5 minutes: with the `error` [word](#word) for the last 5 minutes:
@ -1080,13 +1114,16 @@ LogsQL supports the following pipes:
- [`format`](#format-pipe) formats ouptut field from input [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`format`](#format-pipe) formats ouptut field from input [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`limit`](#limit-pipe) limits the number selected logs. - [`limit`](#limit-pipe) limits the number selected logs.
- [`offset`](#offset-pipe) skips the given number of selected logs. - [`offset`](#offset-pipe) skips the given number of selected logs.
- [`pack_json`](#pack_json-pipe) packs [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into JSON object.
- [`rename`](#rename-pipe) renames [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`rename`](#rename-pipe) renames [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`replace`](#replace-pipe) replaces substrings in the specified [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`replace`](#replace-pipe) replaces substrings in the specified [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`replace_regexp`](#replace_regexp-pipe) updates [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with regular expressions.
- [`sort`](#sort-pipe) sorts logs by the given [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`sort`](#sort-pipe) sorts logs by the given [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`stats`](#stats-pipe) calculates various stats over the selected logs. - [`stats`](#stats-pipe) calculates various stats over the selected logs.
- [`uniq`](#uniq-pipe) returns unique log entires. - [`uniq`](#uniq-pipe) returns unique log entires.
- [`unpack_json`](#unpack_json-pipe) unpacks JSON fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`unpack_json`](#unpack_json-pipe) unpacks JSON fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`unpack_logfmt`](#unpack_logfmt-pipe) unpacks [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`unpack_logfmt`](#unpack_logfmt-pipe) unpacks [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`unroll`](#unroll-pipe) unrolls JSON arrays from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
### copy pipe ### copy pipe
@ -1178,6 +1215,9 @@ For example, the following query preserves the original `ip` field value if `foo
_time:5m | extract 'ip=<ip> ' from foo skip_empty_results _time:5m | extract 'ip=<ip> ' from foo skip_empty_results
``` ```
Performance tip: it is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `extract`.
See [general performance tips](#performance-tips) for details.
See also: See also:
- [Format for extract pipe pattern](#format-for-extract-pipe-pattern) - [Format for extract pipe pattern](#format-for-extract-pipe-pattern)
@ -1363,10 +1403,14 @@ when at least `field1` or `field2` aren't empty, while preserving the original `
_time:5m | format "<field1><field2>" as foo skip_empty_results _time:5m | format "<field1><field2>" as foo skip_empty_results
``` ```
Performance tip: it is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `format`.
See [general performance tips](#performance-tips) for details.
See also: See also:
- [Conditional format](#conditional-format) - [Conditional format](#conditional-format)
- [`replace` pipe](#replace-pipe) - [`replace` pipe](#replace-pipe)
- [`replace_regexp` pipe](#replace_regexp-pipe)
- [`extract` pipe](#extract-pipe) - [`extract` pipe](#extract-pipe)
@ -1419,6 +1463,37 @@ See also:
- [`limit` pipe](#limit-pipe) - [`limit` pipe](#limit-pipe)
- [`sort` pipe](#sort-pipe) - [`sort` pipe](#sort-pipe)
### pack_json pipe
`| pack_json as field_name` [pipe](#pipe) packs all [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into JSON object
and stores its as a string in the given `field_name`.
For example, the following query packs all the fields into JSON object and stores it into [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
for logs over the last 5 minutes:
```logsql
_time:5m | pack_json as _msg
```
The `as _msg` part can be omitted if packed JSON object is stored into [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
The following query is equivalent to the previous one:
```logsql
_time:5m | pack_json
```
The `pack_json` doesn't touch other labels. If you do not need them, then add [`| fields ...`](#fields-pipe) after the `pack_json` pipe. For example, the following query
leaves only the `foo` label with the original log fields packed into JSON:
```logsql
_time:5m | pack_json as foo | fields foo
```
See also:
- [`unpack_json` pipe](#unpack_json-pipe)
### rename pipe ### rename pipe
If some [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must be renamed, then `| rename src1 as dst1, ..., srcN as dstN` [pipe](#pipes) can be used. If some [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must be renamed, then `| rename src1 as dst1, ..., srcN as dstN` [pipe](#pipes) can be used.
@ -1470,9 +1545,13 @@ at the [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#da
_time:5m | replace ('foo', 'bar') at baz limit 1 _time:5m | replace ('foo', 'bar') at baz limit 1
``` ```
Performance tip: it is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `replace`.
See [general performance tips](#performance-tips) for details.
See also: See also:
- [Conditional replace](#conditional-replace) - [Conditional replace](#conditional-replace)
- [`replace_regexp` pipe](#replace_regexp-pipe)
- [`format` pipe](#format-pipe) - [`format` pipe](#format-pipe)
- [`extract` pipe](#extract-pipe) - [`extract` pipe](#extract-pipe)
@ -1487,6 +1566,58 @@ only if `user_type` field equals to `admin`:
_time:5m | replace if (user_type:=admin) replace ("secret", "***") at password _time:5m | replace if (user_type:=admin) replace ("secret", "***") at password
``` ```
### replace_regexp pipe
`| replace_regexp ("regexp", "replacement") at field` [pipe](#pipes) replaces all the substrings matching the given `regexp` with the given `replacement`
in the given [`field`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
The `regexp` must contain regular expression with [RE2 syntax](https://github.com/google/re2/wiki/Syntax).
The `replacement` may contain `$N` or `${N}` placeholders, which are substituted with the `N-th` capturing group in the `regexp`.
For example, the following query replaces all the substrings starting with `host-` and ending with `-foo` with the contents between `host-` and `-foo` in the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) for logs over the last 5 minutes:
```logsql
_time:5m | replace_regexp ("host-(.+?)-foo", "$1") at _msg
```
The `at _msg` part can be omitted if the replacement occurs in the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
The following query is equivalent to the previous one:
```logsql
_time:5m | replace_regexp ("host-(.+?)-foo", "$1")
```
The number of replacements can be limited with `limit N` at the end of `replace`. For example, the following query replaces only the first `password: ...` substring
ending with whitespace with empty substring at the [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) `baz`:
```logsql
_time:5m | replace_regexp ('password: [^ ]+', '') at baz limit 1
```
Performance tips:
- It is recommended using [`replace` pipe](#replace-pipe) instead of `replace_regexp` if possible, since it works faster.
- It is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `replace`.
See [general performance tips](#performance-tips) for details.
See also:
- [Conditional replace_regexp](#conditional-replace_regexp)
- [`replace` pipe](#replace-pipe)
- [`format` pipe](#format-pipe)
- [`extract` pipe](#extract-pipe)
#### Conditional replace_regexp
If the [`replace_regexp` pipe](#replace-pipe) musn't be applied to every [log entry](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model),
then add `if (<filters>)` after `replace_regexp`.
The `<filters>` can contain arbitrary [filters](#filters). For example, the following query replaces `password: ...` substrings ending with whitespace
with `***` in the `foo` field only if `user_type` field equals to `admin`:
```logsql
_time:5m | replace_regexp if (user_type:=admin) replace ("password: [^ ]+", "") at foo
```
### sort pipe ### sort pipe
By default logs are selected in arbitrary order because of performance reasons. If logs must be sorted, then `| sort by (field1, ..., fieldN)` [pipe](#pipes) can be used. By default logs are selected in arbitrary order because of performance reasons. If logs must be sorted, then `| sort by (field1, ..., fieldN)` [pipe](#pipes) can be used.
@ -1720,10 +1851,10 @@ _time:5m | uniq by (host, path)
The unique entries are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed. The unique entries are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed.
Add `hits` after `uniq by (...)` in order to return the number of matching logs per each field value: Add `with hits` after `uniq by (...)` in order to return the number of matching logs per each field value:
```logsql ```logsql
_time:5m | uniq by (host) hits _time:5m | uniq by (host) with hits
``` ```
Unique entries are stored in memory during query execution. Big number of unique selected entries may require a lot of memory. Unique entries are stored in memory during query execution. Big number of unique selected entries may require a lot of memory.
@ -1802,15 +1933,22 @@ form `foo`:
_time:5m | unpack_json from foo result_prefix "foo_" _time:5m | unpack_json from foo result_prefix "foo_"
``` ```
Performance tip: it is better from performance and resource usage PoV ingesting parsed JSON logs into VictoriaLogs Performance tips:
- It is better from performance and resource usage PoV ingesting parsed JSON logs into VictoriaLogs
according to the [supported data model](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) according to the [supported data model](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
instead of ingesting unparsed JSON lines into VictoriaLogs and then parsing them at query time with [`unpack_json` pipe](#unpack_json-pipe). instead of ingesting unparsed JSON lines into VictoriaLogs and then parsing them at query time with [`unpack_json` pipe](#unpack_json-pipe).
- It is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `unpack_json`.
See [general performance tips](#performance-tips) for details.
See also: See also:
- [Conditional `unpack_json`](#conditional-unpack_json) - [Conditional `unpack_json`](#conditional-unpack_json)
- [`unpack_logfmt` pipe](#unpack_logfmt-pipe) - [`unpack_logfmt` pipe](#unpack_logfmt-pipe)
- [`extract` pipe](#extract-pipe) - [`extract` pipe](#extract-pipe)
- [`unroll` pipe](#unroll-pipe)
- [`pack_json` pipe](#pack_json-pipe)
#### Conditional unpack_json #### Conditional unpack_json
@ -1879,10 +2017,15 @@ from `foo` field:
_time:5m | unpack_logfmt from foo result_prefix "foo_" _time:5m | unpack_logfmt from foo result_prefix "foo_"
``` ```
Performance tip: it is better from performance and resource usage PoV ingesting parsed [logfmt](https://brandur.org/logfmt) logs into VictoriaLogs Performance tips:
- It is better from performance and resource usage PoV ingesting parsed [logfmt](https://brandur.org/logfmt) logs into VictoriaLogs
according to the [supported data model](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) according to the [supported data model](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
instead of ingesting unparsed logfmt lines into VictoriaLogs and then parsing them at query time with [`unpack_logfmt` pipe](#unpack_logfmt-pipe). instead of ingesting unparsed logfmt lines into VictoriaLogs and then parsing them at query time with [`unpack_logfmt` pipe](#unpack_logfmt-pipe).
- It is recommended using more specific [log filters](#filters) in order to reduce the number of log entries, which are passed to `unpack_logfmt`.
See [general performance tips](#performance-tips) for details.
See also: See also:
- [Conditional unpack_logfmt](#conditional-unpack_logfmt) - [Conditional unpack_logfmt](#conditional-unpack_logfmt)
@ -1900,6 +2043,34 @@ only if `ip` field in the current log entry isn't set or empty:
_time:5m | unpack_logfmt if (ip:"") from foo _time:5m | unpack_logfmt if (ip:"") from foo
``` ```
### unroll pipe
`| unroll by (field1, ..., fieldN)` [pipe](#pipes) can be used for unrolling JSON arrays from `field1`, `fieldN`
[log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into separate rows.
For example, the following query unrolls `timestamp` and `value` [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) from logs for the last 5 minutes:
```logsql
_time:5m | unroll (timestamp, value)
```
See also:
- [`unpack_json` pipe](#unpack_json-pipe)
- [`extract` pipe](#extract-pipe)
- [`uniq_values` stats function](#uniq_values-stats)
- [`values` stats function](#values-stats)
#### Conditional unroll
If the [`unroll` pipe](#unpack_logfmt-pipe) musn't be applied to every [log entry](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model),
then add `if (<filters>)` after `unroll`.
The `<filters>` can contain arbitrary [filters](#filters). For example, the following query unrolls `value` field only if `value_type` field equals to `json_array`:
```logsql
_time:5m | unroll if (value_type:="json_array") (value)
```
## stats pipe functions ## stats pipe functions
LogsQL supports the following functions for [`stats` pipe](#stats-pipe): LogsQL supports the following functions for [`stats` pipe](#stats-pipe):
@ -2204,6 +2375,8 @@ over logs for the last 5 minutes:
_time:5m | stats uniq_values(ip) unique_ips _time:5m | stats uniq_values(ip) unique_ips
``` ```
The returned unique ip addresses can be unrolled into distinct log entries with [`unroll` pipe](#unroll-pipe).
Every unique value is stored in memory during query execution. Big number of unique values may require a lot of memory. Sometimes it is enough to return Every unique value is stored in memory during query execution. Big number of unique values may require a lot of memory. Sometimes it is enough to return
only a subset of unique values. In this case add `limit N` after `uniq_values(...)` in order to limit the number of returned unique values to `N`, only a subset of unique values. In this case add `limit N` after `uniq_values(...)` in order to limit the number of returned unique values to `N`,
while limiting the maximum memory usage. while limiting the maximum memory usage.
@ -2236,6 +2409,8 @@ over logs for the last 5 minutes:
_time:5m | stats values(ip) ips _time:5m | stats values(ip) ips
``` ```
The returned ip addresses can be unrolled into distinct log entries with [`unroll` pipe](#unroll-pipe).
See also: See also:
- [`uniq_values`](#uniq_values-stats) - [`uniq_values`](#uniq_values-stats)
@ -2257,8 +2432,9 @@ LogsQL supports the following transformations on the log entries selected with [
See [these docs](#extract-pipe) for details. See [these docs](#extract-pipe) for details.
- Unpacking JSON fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](#unpack_json-pipe). - Unpacking JSON fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](#unpack_json-pipe).
- Unpacking [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](#unpack_logfmt-pipe). - Unpacking [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](#unpack_logfmt-pipe).
- Creating a new field from existing [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) according to the provided format. See [these docs](#format-pipe). - Creating a new field from existing [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) according to the provided format. See [`format` pipe](#format-pipe).
- Replacing substrings in the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](#replace-pipe). - Replacing substrings in the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
See [`replace` pipe](#replace-pipe) and [`replace_regexp` pipe](#replace_regexp-pipe) docs.
LogsQL will support the following transformations in the future: LogsQL will support the following transformations in the future:
@ -2350,3 +2526,5 @@ Internally duration values are converted into nanoseconds.
This rule doesn't apply to [time filter](#time-filter) and [stream filter](#stream-filter), which can be put at any place of the query. This rule doesn't apply to [time filter](#time-filter) and [stream filter](#stream-filter), which can be put at any place of the query.
- Move more specific filters, which match lower number of log entries, to the beginning of the query. - Move more specific filters, which match lower number of log entries, to the beginning of the query.
This rule doesn't apply to [time filter](#time-filter) and [stream filter](#stream-filter), which can be put at any place of the query. This rule doesn't apply to [time filter](#time-filter) and [stream filter](#stream-filter), which can be put at any place of the query.
- If the selected logs are passed to [pipes](#pipes) for further transformations and statistics' calculations, then it is recommended
reducing the number of selected logs by using more specific [filters](#filters), which return lower number of logs to process by [pipes](#pipes).

View file

@ -74,14 +74,14 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges
} }
``` ```
Both label name and label value may contain arbitrary chars. Such chars must be encoded Both field name and field value may contain arbitrary chars. Such chars must be encoded
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
according to [JSON string encoding](https://www.rfc-editor.org/rfc/rfc7159.html#section-7). according to [JSON string encoding](https://www.rfc-editor.org/rfc/rfc7159.html#section-7).
Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding: Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding:
```json ```json
{ {
"label with whitepsace": "value\nwith\nnewlines", "field with whitepsace": "value\nwith\nnewlines",
"Поле": "价值", "Поле": "价值",
} }
``` ```
@ -89,13 +89,11 @@ Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8)
VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs. VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs.
This enables [full-text search](https://docs.victoriametrics.com/victorialogs/logsql/) across all the fields. This enables [full-text search](https://docs.victoriametrics.com/victorialogs/logsql/) across all the fields.
VictoriaLogs supports the following field types: VictoriaLogs supports the following special fields additionally to arbitrary [other fields](#other-field):
* [`_msg` field](#message-field) * [`_msg` field](#message-field)
* [`_time` field](#time-field) * [`_time` field](#time-field)
* [`_stream` fields](#stream-fields) * [`_stream` fields](#stream-fields)
* [other fields](#other-fields)
### Message field ### Message field
@ -116,7 +114,9 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges
### Time field ### Time field
The ingested [log entries](#data-model) may contain `_time` field with the timestamp of the ingested log entry. The ingested [log entries](#data-model) may contain `_time` field with the timestamp of the ingested log entry.
For example: The timestamp must be in [RFC3339](https://www.rfc-editor.org/rfc/rfc3339) format. The most commonly used subset of [ISO8601](https://en.wikipedia.org/wiki/ISO_8601)
is also supported. It is allowed specifying seconds part of the timestamp with any precision up to nanoseconds.
For example, the following [log entry](#data-model) contains valid timestamp with millisecond precision in the `_time` field:
```json ```json
{ {
@ -132,29 +132,39 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges
If `_time` field is missing, then the data ingestion time is used as log entry timestamp. If `_time` field is missing, then the data ingestion time is used as log entry timestamp.
The log entry timestamp allows quickly narrowing down the search to a particular time range. The `_time` field is used in [time filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) for quickly narrowing down
See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) for details. the search to a particular time range.
### Stream fields ### Stream fields
Some [structured logging](#data-model) fields may uniquely identify the application instance, which generates log entries. Some [structured logging](#data-model) fields may uniquely identify the application instance, which generates log entries.
This may be either a single field such as `instance=host123:456` or a set of fields such as This may be either a single field such as `instance="host123:456"` or a set of fields such as
`(datacenter=..., env=..., job=..., instance=...)` or `{datacenter="...", env="...", job="...", instance="..."}` or
`(kubernetes.namespace=..., kubernetes.node.name=..., kubernetes.pod.name=..., kubernetes.container.name=...)`. `{kubernetes.namespace="...", kubernetes.node.name="...", kubernetes.pod.name="...", kubernetes.container.name="..."}`.
Log entries received from a single application instance form a log stream in VictoriaLogs. Log entries received from a single application instance form a **log stream** in VictoriaLogs.
VictoriaLogs optimizes storing and querying of individual log streams. This provides the following benefits: VictoriaLogs optimizes storing and [querying](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter) of individual log streams.
This provides the following benefits:
- Reduced disk space usage, since a log stream from a single application instance is usually compressed better - Reduced disk space usage, since a log stream from a single application instance is usually compressed better
than a mixed log stream from multiple distinct applications. than a mixed log stream from multiple distinct applications.
- Increased query performance, since VictoriaLogs needs to scan lower amounts of data - Increased query performance, since VictoriaLogs needs to scan lower amounts of data
when [searching by stream labels](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter). when [searching by stream fields](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
VictoriaLogs cannot determine automatically, which fields uniquely identify every log stream, Every ingested log entry is associated with a log stream. The name of this stream is stored in `_stream` field.
so it stores all the received log entries in a single default stream - `{}`. This field has the format similar to [labels in Prometheus metrics](https://docs.victoriametrics.com/keyconcepts/#labels):
This may lead to not-so-optimal resource usage and query performance.
```
{field1="value1", ..., fieldN="valueN"}
```
For example, if `host` and `app` fields are associated with the stream, then the `_stream` field will have `{host="host-123",app="my-app"}` value
for the log entry with `host="host-123"` and `app="my-app"` fields. The `_stream` field can be searched
with [stream filters](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
By default the value of `_stream` field is `{}`, since VictoriaLogs cannot determine automatically,
which fields uniquely identify every log stream. This may lead to not-so-optimal resource usage and query performance.
Therefore it is recommended specifying stream-level fields via `_stream_fields` query arg Therefore it is recommended specifying stream-level fields via `_stream_fields` query arg
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/). during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
For example, if logs from Kubernetes containers have the following fields: For example, if logs from Kubernetes containers have the following fields:
@ -175,20 +185,17 @@ per-container logs into distinct streams.
#### How to determine which fields must be associated with log streams? #### How to determine which fields must be associated with log streams?
[Log streams](#stream-fields) can be associated with fields, which simultaneously meet the following conditions: [Log streams](#stream-fields) must contain [fields](#data-model), which uniquely identify the application instance, which generates logs.
For example, `container`, `instance` and `host` are good candidates for stream fields.
- Fields, which remain constant across log entries received from a single application instance. Additional fields may be added to log streams if they **remain constant during application instance lifetime**.
- Fields, which uniquely identify the application instance. For example, `instance`, `host`, `container`, etc. For example, `namespace`, `node`, `pod` and `job` are good candidates for additional stream fields. Adding such fields to log streams
makes sense if you are going to use these fields during search and want speeding up it with [stream filters](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
Sometimes a single application instance may generate multiple log streams and store them into distinct log files. There is **no need to add all the constant fields to log streams**, since this may increase resource usage during data ingestion and querying.
In this case it is OK to associate the log stream with filepath fields such as `log.file.path` additionally to instance-specific fields.
Structured logs may contain big number of fields, which do not change across log entries received from a single application instance. **Never add non-nonstant fields to streams if these fields may change with every log entry of the same stream**.
There is no need in associating all these fields with log stream - it is enough to associate only those fields, which uniquely identify For example, `ip`, `user_id` and `trace_id` **must never be associated with log streams**, since this may lead to [high cardinality issues](#high-cardinality).
the application instance across all the ingested logs. Additionally, some fields such as `datacenter`, `environment`, `namespace`, `job` or `app`,
can be associated with log stream in order to optimize searching by these fields with [stream filtering](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
Never associate log streams with fields, which may change across log entries of the same application instance. See [these docs](#high-cardinality) for details.
#### High cardinality #### High cardinality
@ -196,8 +203,7 @@ Some fields in the [ingested logs](#data-model) may contain big number of unique
For example, fields with names such as `ip`, `user_id` or `trace_id` tend to contain big number of unique values. For example, fields with names such as `ip`, `user_id` or `trace_id` tend to contain big number of unique values.
VictoriaLogs works perfectly with such fields unless they are associated with [log streams](#stream-fields). VictoriaLogs works perfectly with such fields unless they are associated with [log streams](#stream-fields).
Never associate high-cardinality fields with [log streams](#stream-fields), since this may result **Never** associate high-cardinality fields with [log streams](#stream-fields), since this may lead to the following issues:
to the following issues:
- Performance degradation during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) - Performance degradation during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
and [querying](https://docs.victoriametrics.com/victorialogs/querying/) and [querying](https://docs.victoriametrics.com/victorialogs/querying/)
@ -214,9 +220,9 @@ This can help narrowing down and eliminating high-cardinality fields from [log s
### Other fields ### Other fields
The rest of [structured logging](#data-model) fields are optional. They can be used for simplifying and optimizing search queries. Every ingested log entry may contain arbitrary number of [fields](#data-model) additionally to [`_msg`](#message-field) and [`_time`](#time-field).
For example, it is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long log message. For example, `level`, `ip`, `user_id`, `trace_id`, etc. Such fields can be used for simplifying and optimizing [search queries](#https://docs.victoriametrics.com/victorialogs/logsql/).
E.g. the `trace_id:XXXX-YYYY-ZZZZ` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query. It is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long [log message](#message-field).
E.g. the `trace_id:="XXXX-YYYY-ZZZZ"` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query.
See [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/) for more details. See [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/) for more details.

View file

@ -28,8 +28,8 @@ VictoriaLogs provides the following HTTP endpoints:
- [`/select/logsql/query`](#querying-logs) for querying logs - [`/select/logsql/query`](#querying-logs) for querying logs
- [`/select/logsql/hits`](#querying-hits-stats) for querying log hits stats over the given time range - [`/select/logsql/hits`](#querying-hits-stats) for querying log hits stats over the given time range
- [`/select/logsql/streams`](#querying-streams) for querying [log streams](#https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) - [`/select/logsql/streams`](#querying-streams) for querying [log streams](#https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
- [`/select/logsql/stream_label_names`](#querying-stream-label-names) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names - [`/select/logsql/stream_field_names`](#querying-stream-field-names) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names
- [`/select/logsql/stream_label_values`](#querying-stream-label-values) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values - [`/select/logsql/stream_field_values`](#querying-stream-field-values) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values
- [`/select/logsql/field_names`](#querying-field-names) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names. - [`/select/logsql/field_names`](#querying-field-names) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names.
- [`/select/logsql/field_values`](#querying-field-values) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values. - [`/select/logsql/field_values`](#querying-field-values) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values.
@ -43,8 +43,8 @@ For example, the following query returns all the log entries with the `error` wo
curl http://localhost:9428/select/logsql/query -d 'query=error' curl http://localhost:9428/select/logsql/query -d 'query=error'
``` ```
The response by default contains all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). The response by default contains all the [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for the selected logs.
See [how to query specific fields](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields). Use [`fields` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#fields-pipe) for selecting only the needed fields.
The `query` argument can be passed either in the request url itself (aka HTTP GET request) or via request body The `query` argument can be passed either in the request url itself (aka HTTP GET request) or via request body
with the `x-www-form-urlencoded` encoding (aka HTTP POST request). The HTTP POST is useful for sending long queries with the `x-www-form-urlencoded` encoding (aka HTTP POST request). The HTTP POST is useful for sending long queries
@ -56,7 +56,8 @@ or similar tools.
By default the `/select/logsql/query` returns all the log entries matching the given `query`. The response size can be limited in the following ways: By default the `/select/logsql/query` returns all the log entries matching the given `query`. The response size can be limited in the following ways:
- By closing the response stream at any time. In this case VictoriaLogs stops query execution and frees all the resources occupied by the request. - By closing the response stream at any time. VictoriaLogs stops query execution and frees all the resources occupied by the request as soon as it detects closed client connection.
So it is safe running [`*` query](https://docs.victoriametrics.com/victorialogs/logsql/#any-value-filter), which selects all the logs, even if trillions of logs are stored in VictoriaLogs.
- By specifying the maximum number of log entries, which can be returned in the response via `limit` query arg. For example, the following request returns - By specifying the maximum number of log entries, which can be returned in the response via `limit` query arg. For example, the following request returns
up to 10 matching log entries: up to 10 matching log entries:
```sh ```sh
@ -68,7 +69,7 @@ By default the `/select/logsql/query` returns all the log entries matching the g
``` ```
- By adding [`_time` filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter). The time range for the query can be specified via optional - By adding [`_time` filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter). The time range for the query can be specified via optional
`start` and `end` query ars formatted according to [these docs](https://docs.victoriametrics.com/single-server-victoriametrics/#timestamp-formats). `start` and `end` query ars formatted according to [these docs](https://docs.victoriametrics.com/single-server-victoriametrics/#timestamp-formats).
- By adding other [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) to the query. - By adding more specific [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) to the query, which select lower number of logs.
The `/select/logsql/query` endpoint returns [a stream of JSON lines](https://jsonlines.org/), The `/select/logsql/query` endpoint returns [a stream of JSON lines](https://jsonlines.org/),
where each line contains JSON-encoded log entry in the form `{field1="value1",...,fieldN="valueN"}`. where each line contains JSON-encoded log entry in the form `{field1="value1",...,fieldN="valueN"}`.
@ -79,18 +80,18 @@ Example response:
{"_msg":"some other error","_stream":"{}","_time":"2023-01-01T13:32:15Z"} {"_msg":"some other error","_stream":"{}","_time":"2023-01-01T13:32:15Z"}
``` ```
The matching lines are sent to the response stream as soon as they are found in VictoriaLogs storage. Logs lines are sent to the response stream as soon as they are found in VictoriaLogs storage.
This means that the returned response may contain billions of lines for queries matching too many log entries. This means that the returned response may contain billions of lines for queries matching too many log entries.
The response can be interrupted at any time by closing the connection to VictoriaLogs server. The response can be interrupted at any time by closing the connection to VictoriaLogs server.
This allows post-processing the returned lines at the client side with the usual Unix commands such as `grep`, `jq`, `less`, `head`, etc. This allows post-processing the returned lines at the client side with the usual Unix commands such as `grep`, `jq`, `less`, `head`, etc.,
See [these docs](#command-line) for more details. without worrying about resource usage at VictoriaLogs side. See [these docs](#command-line) for more details.
The returned lines aren't sorted, since sorting disables the ability to send matching log entries to response stream as soon as they are found. The returned lines aren't sorted by default, since sorting disables the ability to send matching log entries to response stream as soon as they are found.
Query results can be sorted either at VictoriaLogs side according [to these docs](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe) Query results can be sorted either at VictoriaLogs side via [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe)
or at client side with the usual `sort` command according to [these docs](#command-line). or at client side with the usual `sort` command according to [these docs](#command-line).
By default the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy) is queried. By default the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy) is queried.
If you need querying other tenant, then specify the needed tenant via http request headers. For example, the following query searches If you need querying other tenant, then specify it via `AccounID` and `ProjectID` http request headers. For example, the following query searches
for log messages at `(AccountID=12, ProjectID=34)` tenant: for log messages at `(AccountID=12, ProjectID=34)` tenant:
```sh ```sh
@ -100,14 +101,20 @@ curl http://localhost:9428/select/logsql/query -H 'AccountID: 12' -H 'ProjectID:
The number of requests to `/select/logsql/query` can be [monitored](https://docs.victoriametrics.com/victorialogs/#monitoring) The number of requests to `/select/logsql/query` can be [monitored](https://docs.victoriametrics.com/victorialogs/#monitoring)
with `vl_http_requests_total{path="/select/logsql/query"}` metric. with `vl_http_requests_total{path="/select/logsql/query"}` metric.
See also:
- [Querying hits stats](#querying-hits-stats) - [Querying hits stats](#querying-hits-stats)
- [Querying streams](#querying-streams) - [Querying streams](#querying-streams)
- [HTTP API](#http-api) - [Querying stream field names](#querying-stream-field-names)
- [Querying stream field values](#querying-stream-field-values)
- [Querying field names](#querying-field-names)
- [Querying field values](#querying-field-values)
### Querying hits stats ### Querying hits stats
VictoriaMetrics provides `/select/logsql/hits?query=<query>&start=<start>&end=<end>&step=<step>` HTTP endpoint, which returns the number VictoriaMetrics provides `/select/logsql/hits?query=<query>&start=<start>&end=<end>&step=<step>` HTTP endpoint, which returns the number
of matching log entries for the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` of matching log entries for the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]`
time range grouped by `<step>` buckets. The returned results are sorted by time. time range grouped by `<step>` buckets. The returned results are sorted by time.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -210,7 +217,7 @@ See also:
### Querying streams ### Querying streams
VictoriaLogs provides `/select/logsql/streams?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) VictoriaLogs provides `/select/logsql/streams?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range. from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every `stream`. The response also contains the number of log results per every `stream`.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -254,22 +261,22 @@ See also:
- [Querying hits stats](#querying-hits-stats) - [Querying hits stats](#querying-hits-stats)
- [HTTP API](#http-api) - [HTTP API](#http-api)
### Querying stream label names ### Querying stream field names
VictoriaLogs provides `/select/logsql/stream_label_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns VictoriaLogs provides `/select/logsql/stream_field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns
[log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names from results [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names from results
of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range. of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every label name. The response also contains the number of log results per every field name.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs. If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs. If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
For example, the following command returns stream label names across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) For example, the following command returns stream field names across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
for the last 5 minutes: for the last 5 minutes:
```sh ```sh
curl http://localhost:9428/select/logsql/stream_label_names -d 'query=error' -d 'start=5m' curl http://localhost:9428/select/logsql/stream_field_names -d 'query=error' -d 'start=5m'
``` ```
Below is an example JSON output returned from this endpoint: Below is an example JSON output returned from this endpoint:
@ -295,27 +302,27 @@ Below is an example JSON output returned from this endpoint:
See also: See also:
- [Querying stream label names](#querying-stream-label-names) - [Querying stream field names](#querying-stream-field-names)
- [Querying field values](#querying-field-values) - [Querying field values](#querying-field-values)
- [Querying streams](#querying-streams) - [Querying streams](#querying-streams)
- [HTTP API](#http-api) - [HTTP API](#http-api)
### Querying stream label values ### Querying stream field values
VictoriaLogs provides `/select/logsql/stream_label_values?query=<query>&start=<start>&<end>&label=<labelName>` HTTP endpoint, VictoriaLogs provides `/select/logsql/stream_field_values?query=<query>&start=<start>&<end>&field=<fieldName>` HTTP endpoint,
which returns [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values for the label with the given `<labelName>` name which returns [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values for the field with the given `<fieldName>` name
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range. from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every label value. The response also contains the number of log results per every field value.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs. If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs. If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
For example, the following command returns values for the stream label `host` across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) For example, the following command returns values for the stream field `host` across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
for the last 5 minutes: for the last 5 minutes:
```sh ```sh
curl http://localhost:9428/select/logsql/stream_label_values -d 'query=error' -d 'start=5m' -d 'label=host' curl http://localhost:9428/select/logsql/stream_field_values -d 'query=error' -d 'start=5m' -d 'field=host'
``` ```
Below is an example JSON output returned from this endpoint: Below is an example JSON output returned from this endpoint:
@ -335,12 +342,12 @@ Below is an example JSON output returned from this endpoint:
} }
``` ```
The `/select/logsql/stream_label_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`. The `/select/logsql/stream_field_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`.
The endpoint returns arbitrary subset of values if their number exceeds `N`, so `limit=N` cannot be used for pagination over big number of field values. The endpoint returns arbitrary subset of values if their number exceeds `N`, so `limit=N` cannot be used for pagination over big number of field values.
See also: See also:
- [Querying stream label values](#querying-stream-label-values) - [Querying stream field values](#querying-stream-field-values)
- [Querying field names](#querying-field-names) - [Querying field names](#querying-field-names)
- [Querying streams](#querying-streams) - [Querying streams](#querying-streams)
- [HTTP API](#http-api) - [HTTP API](#http-api)
@ -348,7 +355,7 @@ See also:
### Querying field names ### Querying field names
VictoriaLogs provides `/select/logsql/field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns field names VictoriaLogs provides `/select/logsql/field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns field names
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range. from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every field name. The response also contains the number of log results per every field name.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -385,7 +392,7 @@ Below is an example JSON output returned from this endpoint:
See also: See also:
- [Querying stream label names](#querying-stream-label-names) - [Querying stream field names](#querying-stream-field-names)
- [Querying field values](#querying-field-values) - [Querying field values](#querying-field-values)
- [Querying streams](#querying-streams) - [Querying streams](#querying-streams)
- [HTTP API](#http-api) - [HTTP API](#http-api)
@ -394,7 +401,7 @@ See also:
VictoriaLogs provides `/select/logsql/field_values?query=<query>&field=<fieldName>&start=<start>&end=<end>` HTTP endpoint, which returns VictoriaLogs provides `/select/logsql/field_values?query=<query>&field=<fieldName>&start=<start>&end=<end>` HTTP endpoint, which returns
unique values for the given `<fieldName>` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) unique values for the given `<fieldName>` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range. from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every field value. The response also contains the number of log results per every field value.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -435,7 +442,7 @@ When the `limit` is reached, `hits` are zeroed, since they cannot be calculated
See also: See also:
- [Querying stream label values](#querying-stream-label-values) - [Querying stream field values](#querying-stream-field-values)
- [Querying field names](#querying-field-names) - [Querying field names](#querying-field-names)
- [Querying streams](#querying-streams) - [Querying streams](#querying-streams)
- [HTTP API](#http-api) - [HTTP API](#http-api)
@ -454,32 +461,25 @@ There are three modes of displaying query results:
- `Table` - displays query results as a table. - `Table` - displays query results as a table.
- `JSON` - displays raw JSON response from [HTTP API](#http-api). - `JSON` - displays raw JSON response from [HTTP API](#http-api).
This is the first version that has minimal functionality. It comes with the following limitations: This is the first version that has minimal functionality and may contain bugs.
It is recommended trying [command line interface](#command-line), which has no known bugs :)
- The number of query results is always limited to 1000 lines. Iteratively add
more specific [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) to the query
in order to get full response with less than 1000 lines.
- Queries are always executed against [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy) `0`.
These limitations will be removed in future versions.
To get around the current limitations, you can use an alternative - the [command line interface](#command-line).
## Command-line ## Command-line
VictoriaLogs integrates well with `curl` and other command-line tools during querying because of the following features: VictoriaLogs integrates well with `curl` and other command-line tools during querying because of the following features:
- VictoriaLogs sends the matching log entries to the response stream as soon as they are found. - Matching log entries are sent to the response stream as soon as they are found.
This allows forwarding the response stream to arbitrary [Unix pipes](https://en.wikipedia.org/wiki/Pipeline_(Unix)). This allows forwarding the response stream to arbitrary [Unix pipes](https://en.wikipedia.org/wiki/Pipeline_(Unix))
- VictoriaLogs automatically adjusts query execution speed to the speed of the client, which reads the response stream. without waiting until the response finishes.
- Query execution speed is automatically adjusted to the speed of the client, which reads the response stream.
For example, if the response stream is piped to `less` command, then the query is suspended For example, if the response stream is piped to `less` command, then the query is suspended
until the `less` command reads the next block from the response stream. until the `less` command reads the next block from the response stream.
- VictoriaLogs automatically cancels query execution when the client closes the response stream. - Query is automatically canceled when the client closes the response stream.
For example, if the query response is piped to `head` command, then VictoriaLogs stops executing the query For example, if the query response is piped to `head` command, then VictoriaLogs stops executing the query
when the `head` command closes the response stream. when the `head` command closes the response stream.
These features allow executing queries at command-line interface, which potentially select billions of rows, These features allow executing queries at command-line interface, which potentially select billions of rows,
without the risk of high resource usage (CPU, RAM, disk IO) at VictoriaLogs server. without the risk of high resource usage (CPU, RAM, disk IO) at VictoriaLogs.
For example, the following query can return very big number of matching log entries (e.g. billions) if VictoriaLogs contains For example, the following query can return very big number of matching log entries (e.g. billions) if VictoriaLogs contains
many log messages with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word): many log messages with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word):
@ -488,8 +488,8 @@ many log messages with the `error` [word](https://docs.victoriametrics.com/victo
curl http://localhost:9428/select/logsql/query -d 'query=error' curl http://localhost:9428/select/logsql/query -d 'query=error'
``` ```
If the command returns "never-ending" response, then just press `ctrl+C` at any time in order to cancel the query. If the command above returns "never-ending" response, then just press `ctrl+C` at any time in order to cancel the query.
VictoriaLogs notices that the response stream is closed, so it cancels the query and instantly stops consuming CPU, RAM and disk IO for this query. VictoriaLogs notices that the response stream is closed, so it cancels the query and stops consuming CPU, RAM and disk IO for this query.
Then just use `head` command for investigating the returned log messages and narrowing down the query: Then just use `head` command for investigating the returned log messages and narrowing down the query:
@ -500,6 +500,12 @@ curl http://localhost:9428/select/logsql/query -d 'query=error' | head -10
The `head -10` command reads only the first 10 log messages from the response and then closes the response stream. The `head -10` command reads only the first 10 log messages from the response and then closes the response stream.
This automatically cancels the query at VictoriaLogs side, so it stops consuming CPU, RAM and disk IO resources. This automatically cancels the query at VictoriaLogs side, so it stops consuming CPU, RAM and disk IO resources.
Alternatively, you can limit the number of returned logs at VictoriaLogs side via [`limit` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe):
```sh
curl http://localhost:9428/select/logsql/query -d 'query=error | limit 10'
```
Sometimes it may be more convenient to use `less` command instead of `head` during the investigation of the returned response: Sometimes it may be more convenient to use `less` command instead of `head` during the investigation of the returned response:
```sh ```sh
@ -509,7 +515,7 @@ curl http://localhost:9428/select/logsql/query -d 'query=error' | less
The `less` command reads the response stream on demand, when the user scrolls down the output. The `less` command reads the response stream on demand, when the user scrolls down the output.
VictoriaLogs suspends query execution when `less` stops reading the response stream. VictoriaLogs suspends query execution when `less` stops reading the response stream.
It doesn't consume CPU and disk IO resources during this time. It resumes query execution It doesn't consume CPU and disk IO resources during this time. It resumes query execution
when the `less` continues reading the response stream. after the `less` continues reading the response stream.
Suppose that the initial investigation of the returned query results helped determining that the needed log messages contain Suppose that the initial investigation of the returned query results helped determining that the needed log messages contain
`cannot open file` [phrase](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter). `cannot open file` [phrase](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter).
@ -543,7 +549,13 @@ See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stream-fi
[these docs](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) about `_time` filter [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) about `_time` filter
and [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter) about `AND` operator. and [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter) about `AND` operator.
The following example shows how to sort query results by the [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field): Alternatively, you can count the number of matching logs at VictoriaLogs side with [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe):
```sh
curl http://localhost:9428/select/logsql/query -d 'query=_stream:{app="nginx"} AND _time:5m AND error | stats count() logs_with_error'
```
The following example shows how to sort query results by the [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) with traditional Unix tools:
```sh ```sh
curl http://localhost:9428/select/logsql/query -d 'query=error' | jq -r '._time + " " + ._msg' | sort | less curl http://localhost:9428/select/logsql/query -d 'query=error' | jq -r '._time + " " + ._msg' | sort | less
@ -558,8 +570,14 @@ can take non-trivial amounts of time if the `query` returns too many results. Th
before sorting the results. See [these tips](https://docs.victoriametrics.com/victorialogs/logsql/#performance-tips) before sorting the results. See [these tips](https://docs.victoriametrics.com/victorialogs/logsql/#performance-tips)
on how to narrow down query results. on how to narrow down query results.
Alternatively, sorting of matching logs can be performed at VictoriaLogs side via [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe):
```sh
curl http://localhost:9428/select/logsql/query -d 'query=error | sort by (_time)' | less
```
The following example calculates stats on the number of log messages received during the last 5 minutes The following example calculates stats on the number of log messages received during the last 5 minutes
grouped by `log.level` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model): grouped by `log.level` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with traditional Unix tools:
```sh ```sh
curl http://localhost:9428/select/logsql/query -d 'query=_time:5m log.level:*' | jq -r '."log.level"' | sort | uniq -c curl http://localhost:9428/select/logsql/query -d 'query=_time:5m log.level:*' | jq -r '."log.level"' | sort | uniq -c
@ -569,6 +587,12 @@ The query selects all the log messages with non-empty `log.level` field via ["an
then pipes them to `jq` command, which extracts the `log.level` field value from the returned JSON stream, then the extracted `log.level` values then pipes them to `jq` command, which extracts the `log.level` field value from the returned JSON stream, then the extracted `log.level` values
are sorted with `sort` command and, finally, they are passed to `uniq -c` command for calculating the needed stats. are sorted with `sort` command and, finally, they are passed to `uniq -c` command for calculating the needed stats.
Alternatively, all the stats calculations above can be performed at VictoriaLogs side via [`stats by(...)`](https://docs.victoriametrics.com/victorialogs/logsql/#stats-by-fields):
```sh
curl http://localhost:9428/select/logsql/query -d 'query=_time:5m log.level:* | stats by (log.level) count() matching_logs'
```
See also: See also:
- [Key concepts](https://docs.victoriametrics.com/victorialogs/keyconcepts/). - [Key concepts](https://docs.victoriametrics.com/victorialogs/keyconcepts/).

View file

@ -31,11 +31,17 @@ type blockResult struct {
// csBuf contains requested columns. // csBuf contains requested columns.
csBuf []blockResultColumn csBuf []blockResultColumn
// csEmpty contains non-existing columns, which were referenced via getColumnByName()
csEmpty []blockResultColumn
// cs contains cached pointers to requested columns returned from getColumns() if csInitialized=true. // cs contains cached pointers to requested columns returned from getColumns() if csInitialized=true.
cs []*blockResultColumn cs []*blockResultColumn
// csInitialized is set to true if cs is properly initialized and can be returned from getColumns(). // csInitialized is set to true if cs is properly initialized and can be returned from getColumns().
csInitialized bool csInitialized bool
fvecs []filteredValuesEncodedCreator
svecs []searchValuesEncodedCreator
} }
func (br *blockResult) reset() { func (br *blockResult) reset() {
@ -49,10 +55,19 @@ func (br *blockResult) reset() {
clear(br.csBuf) clear(br.csBuf)
br.csBuf = br.csBuf[:0] br.csBuf = br.csBuf[:0]
clear(br.csEmpty)
br.csEmpty = br.csEmpty[:0]
clear(br.cs) clear(br.cs)
br.cs = br.cs[:0] br.cs = br.cs[:0]
br.csInitialized = false br.csInitialized = false
clear(br.fvecs)
br.fvecs = br.fvecs[:0]
clear(br.svecs)
br.svecs = br.svecs[:0]
} }
// clone returns a clone of br, which owns its own data. // clone returns a clone of br, which owns its own data.
@ -88,6 +103,10 @@ func (br *blockResult) clone() *blockResult {
} }
brNew.csBuf = csNew brNew.csBuf = csNew
// do not clone br.csEmpty - it will be populated by the caller via getColumnByName().
// do not clone br.fvecs and br.svecs, since they may point to external data.
return brNew return brNew
} }
@ -128,6 +147,9 @@ func (br *blockResult) initFromFilterNeededColumns(brSrc *blockResult, bm *bitma
} }
} }
// appendFilteredColumn adds cSrc with the given bm filter to br.
//
// the br is valid until brSrc, cSrc or bm is updated.
func (br *blockResult) appendFilteredColumn(brSrc *blockResult, cSrc *blockResultColumn, bm *bitmap) { func (br *blockResult) appendFilteredColumn(brSrc *blockResult, cSrc *blockResultColumn, bm *bitmap) {
if len(br.timestamps) == 0 { if len(br.timestamps) == 0 {
return return
@ -146,23 +168,36 @@ func (br *blockResult) appendFilteredColumn(brSrc *blockResult, cSrc *blockResul
cDst.minValue = cSrc.minValue cDst.minValue = cSrc.minValue
cDst.maxValue = cSrc.maxValue cDst.maxValue = cSrc.maxValue
cDst.dictValues = cSrc.dictValues cDst.dictValues = cSrc.dictValues
cDst.newValuesEncodedFunc = func(br *blockResult) []string { br.fvecs = append(br.fvecs, filteredValuesEncodedCreator{
valuesEncodedSrc := cSrc.getValuesEncoded(brSrc) br: brSrc,
c: cSrc,
bm: bm,
})
cDst.valuesEncodedCreator = &br.fvecs[len(br.fvecs)-1]
}
br.csBuf = append(br.csBuf, cDst)
br.csInitialized = false
}
type filteredValuesEncodedCreator struct {
br *blockResult
c *blockResultColumn
bm *bitmap
}
func (fvec *filteredValuesEncodedCreator) newValuesEncoded(br *blockResult) []string {
valuesEncodedSrc := fvec.c.getValuesEncoded(fvec.br)
valuesBuf := br.valuesBuf valuesBuf := br.valuesBuf
valuesBufLen := len(valuesBuf) valuesBufLen := len(valuesBuf)
bm.forEachSetBitReadonly(func(idx int) { fvec.bm.forEachSetBitReadonly(func(idx int) {
valuesBuf = append(valuesBuf, valuesEncodedSrc[idx]) valuesBuf = append(valuesBuf, valuesEncodedSrc[idx])
}) })
br.valuesBuf = valuesBuf br.valuesBuf = valuesBuf
return valuesBuf[valuesBufLen:] return valuesBuf[valuesBufLen:]
} }
}
br.csBuf = append(br.csBuf, cDst)
br.csInitialized = false
}
// cloneValues clones the given values into br and returns the cloned values. // cloneValues clones the given values into br and returns the cloned values.
func (br *blockResult) cloneValues(values []string) []string { func (br *blockResult) cloneValues(values []string) []string {
@ -287,6 +322,8 @@ func (br *blockResult) initAllColumns(bs *blockSearch, bm *bitmap) {
br.addColumn(bs, bm, ch) br.addColumn(bs, bm, ch)
} }
} }
br.csInitFast()
} }
// initRequestedColumns initialized only requested columns in br according to bs and bm. // initRequestedColumns initialized only requested columns in br according to bs and bm.
@ -314,6 +351,8 @@ func (br *blockResult) initRequestedColumns(bs *blockSearch, bm *bitmap) {
} }
} }
} }
br.csInitFast()
} }
func (br *blockResult) mustInit(bs *blockSearch, bm *bitmap) { func (br *blockResult) mustInit(bs *blockSearch, bm *bitmap) {
@ -433,13 +472,28 @@ func (br *blockResult) addColumn(bs *blockSearch, bm *bitmap, ch *columnHeader)
minValue: ch.minValue, minValue: ch.minValue,
maxValue: ch.maxValue, maxValue: ch.maxValue,
dictValues: ch.valuesDict.values, dictValues: ch.valuesDict.values,
newValuesEncodedFunc: func(br *blockResult) []string {
return br.newValuesEncodedFromColumnHeader(bs, bm, ch)
},
}) })
c := &br.csBuf[len(br.csBuf)-1]
br.svecs = append(br.svecs, searchValuesEncodedCreator{
bs: bs,
bm: bm,
ch: ch,
})
c.valuesEncodedCreator = &br.svecs[len(br.svecs)-1]
br.csInitialized = false br.csInitialized = false
} }
type searchValuesEncodedCreator struct {
bs *blockSearch
bm *bitmap
ch *columnHeader
}
func (svec *searchValuesEncodedCreator) newValuesEncoded(br *blockResult) []string {
return br.newValuesEncodedFromColumnHeader(svec.bs, svec.bm, svec.ch)
}
func (br *blockResult) addTimeColumn() { func (br *blockResult) addTimeColumn() {
br.csBuf = append(br.csBuf, blockResultColumn{ br.csBuf = append(br.csBuf, blockResultColumn{
name: "_time", name: "_time",
@ -1325,15 +1379,31 @@ func (br *blockResult) getColumnByName(columnName string) *blockResultColumn {
return cs[idx] return cs[idx]
} }
br.addConstColumn(columnName, "") // Search for empty column with the given name
return &br.csBuf[len(br.csBuf)-1] csEmpty := br.csEmpty
for i := range csEmpty {
if csEmpty[i].name == columnName {
return &csEmpty[i]
}
}
// Create missing empty column
br.csEmpty = append(br.csEmpty, blockResultColumn{
name: br.a.copyString(columnName),
isConst: true,
valuesEncoded: getEmptyStrings(1),
})
return &br.csEmpty[len(br.csEmpty)-1]
} }
func (br *blockResult) getColumns() []*blockResultColumn { func (br *blockResult) getColumns() []*blockResultColumn {
if br.csInitialized { if !br.csInitialized {
br.csInit()
}
return br.cs return br.cs
} }
func (br *blockResult) csInit() {
csBuf := br.csBuf csBuf := br.csBuf
clear(br.cs) clear(br.cs)
cs := br.cs[:0] cs := br.cs[:0]
@ -1348,8 +1418,17 @@ func (br *blockResult) getColumns() []*blockResultColumn {
} }
br.cs = cs br.cs = cs
br.csInitialized = true br.csInitialized = true
}
return br.cs func (br *blockResult) csInitFast() {
csBuf := br.csBuf
clear(br.cs)
cs := slicesutil.SetLength(br.cs, len(csBuf))
for i := range csBuf {
cs[i] = &csBuf[i]
}
br.cs = cs
br.csInitialized = true
} }
func getBlockResultColumnIdxByName(cs []*blockResultColumn, name string) int { func getBlockResultColumnIdxByName(cs []*blockResultColumn, name string) int {
@ -1444,10 +1523,10 @@ type blockResultColumn struct {
// valuesBucketed contains values after getValuesBucketed() call // valuesBucketed contains values after getValuesBucketed() call
valuesBucketed []string valuesBucketed []string
// newValuesEncodedFunc must return valuesEncoded. // valuesEncodedCreator must return valuesEncoded.
// //
// This func must be set for non-const and non-time columns if valuesEncoded field isn't set. // This interface must be set for non-const and non-time columns if valuesEncoded field isn't set.
newValuesEncodedFunc func(br *blockResult) []string valuesEncodedCreator columnValuesEncodedCreator
// bucketSizeStr contains bucketSizeStr for valuesBucketed // bucketSizeStr contains bucketSizeStr for valuesBucketed
bucketSizeStr string bucketSizeStr string
@ -1456,6 +1535,11 @@ type blockResultColumn struct {
bucketOffsetStr string bucketOffsetStr string
} }
// columnValuesEncodedCreator must return encoded values for the current column.
type columnValuesEncodedCreator interface {
newValuesEncoded(br *blockResult) []string
}
// clone returns a clone of c backed by data from br. // clone returns a clone of c backed by data from br.
// //
// It is expected that c.valuesEncoded is already initialized for non-time column. // It is expected that c.valuesEncoded is already initialized for non-time column.
@ -1484,8 +1568,8 @@ func (c *blockResultColumn) clone(br *blockResult) blockResultColumn {
} }
cNew.valuesBucketed = br.cloneValues(c.valuesBucketed) cNew.valuesBucketed = br.cloneValues(c.valuesBucketed)
// Do not copy c.newValuesEncodedFunc, since it may refer to data, which may change over time. // Do not copy c.valuesEncodedCreator, since it may refer to data, which may change over time.
// We already copied c.valuesEncoded, so cNew.newValuesEncodedFunc must be nil. // We already copied c.valuesEncoded, so cNew.valuesEncodedCreator must be nil.
cNew.bucketSizeStr = c.bucketSizeStr cNew.bucketSizeStr = c.bucketSizeStr
cNew.bucketOffsetStr = c.bucketOffsetStr cNew.bucketOffsetStr = c.bucketOffsetStr
@ -1579,7 +1663,7 @@ func (c *blockResultColumn) getValuesEncoded(br *blockResult) []string {
} }
if c.valuesEncoded == nil { if c.valuesEncoded == nil {
c.valuesEncoded = c.newValuesEncodedFunc(br) c.valuesEncoded = c.valuesEncodedCreator.newValuesEncoded(br)
} }
return c.valuesEncoded return c.valuesEncoded
} }

View file

@ -321,23 +321,10 @@ func (q *Query) Optimize() {
// Call Optimize for queries from 'in(query)' filters. // Call Optimize for queries from 'in(query)' filters.
optimizeFilterIn(q.f) optimizeFilterIn(q.f)
// Optimize individual pipes.
for _, p := range q.pipes { for _, p := range q.pipes {
switch t := p.(type) { p.optimize()
case *pipeStats:
for _, f := range t.funcs {
f.iff.optimizeFilterIn()
}
case *pipeReplace:
t.iff.optimizeFilterIn()
case *pipeFormat:
t.iff.optimizeFilterIn()
case *pipeExtract:
t.iff.optimizeFilterIn()
case *pipeUnpackJSON:
t.iff.optimizeFilterIn()
case *pipeUnpackLogfmt:
t.iff.optimizeFilterIn()
}
} }
} }

View file

@ -11,15 +11,26 @@ type pipe interface {
// updateNeededFields must update neededFields and unneededFields with fields it needs and not needs at the input. // updateNeededFields must update neededFields and unneededFields with fields it needs and not needs at the input.
updateNeededFields(neededFields, unneededFields fieldsSet) updateNeededFields(neededFields, unneededFields fieldsSet)
// newPipeProcessor must return new pipeProcessor for the given ppBase. // newPipeProcessor must return new pipeProcessor, which writes data to the given ppNext.
// //
// workersCount is the number of goroutine workers, which will call writeBlock() method. // workersCount is the number of goroutine workers, which will call writeBlock() method.
// //
// If stopCh is closed, the returned pipeProcessor must stop performing CPU-intensive tasks which take more than a few milliseconds. // If stopCh is closed, the returned pipeProcessor must stop performing CPU-intensive tasks which take more than a few milliseconds.
// It is OK to continue processing pipeProcessor calls if they take less than a few milliseconds. // It is OK to continue processing pipeProcessor calls if they take less than a few milliseconds.
// //
// The returned pipeProcessor may call cancel() at any time in order to notify worker goroutines to stop sending new data to pipeProcessor. // The returned pipeProcessor may call cancel() at any time in order to notify the caller to stop sending new data to it.
newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor
// optimize must optimize the pipe
optimize()
// hasFilterInWithQuery must return true of pipe contains 'in(subquery)' filter (recursively).
hasFilterInWithQuery() bool
// initFilterInValues must return new pipe with the initialized values for 'in(subquery)' filters (recursively).
//
// It is OK to return the pipe itself if it doesn't contain 'in(subquery)' filters.
initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error)
} }
// pipeProcessor must process a single pipe. // pipeProcessor must process a single pipe.
@ -39,7 +50,7 @@ type pipeProcessor interface {
// cancel() may be called also when the pipeProcessor decides to stop accepting new data, even if there is no any error. // cancel() may be called also when the pipeProcessor decides to stop accepting new data, even if there is no any error.
writeBlock(workerID uint, br *blockResult) writeBlock(workerID uint, br *blockResult)
// flush must flush all the data accumulated in the pipeProcessor to the base pipeProcessor. // flush must flush all the data accumulated in the pipeProcessor to the next pipeProcessor.
// //
// flush is called after all the worker goroutines are stopped. // flush is called after all the worker goroutines are stopped.
// //
@ -135,6 +146,12 @@ func parsePipe(lex *lexer) (pipe, error) {
return nil, fmt.Errorf("cannot parse 'offset' pipe: %w", err) return nil, fmt.Errorf("cannot parse 'offset' pipe: %w", err)
} }
return ps, nil return ps, nil
case lex.isKeyword("pack_json"):
pp, err := parsePackJSON(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'pack_json' pipe: %w", err)
}
return pp, nil
case lex.isKeyword("rename", "mv"): case lex.isKeyword("rename", "mv"):
pr, err := parsePipeRename(lex) pr, err := parsePipeRename(lex)
if err != nil { if err != nil {
@ -147,6 +164,12 @@ func parsePipe(lex *lexer) (pipe, error) {
return nil, fmt.Errorf("cannot parse 'replace' pipe: %w", err) return nil, fmt.Errorf("cannot parse 'replace' pipe: %w", err)
} }
return pr, nil return pr, nil
case lex.isKeyword("replace_regexp"):
pr, err := parsePipeReplaceRegexp(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'replace_regexp' pipe: %w", err)
}
return pr, nil
case lex.isKeyword("sort"): case lex.isKeyword("sort"):
ps, err := parsePipeSort(lex) ps, err := parsePipeSort(lex)
if err != nil { if err != nil {
@ -177,6 +200,12 @@ func parsePipe(lex *lexer) (pipe, error) {
return nil, fmt.Errorf("cannot parse 'unpack_logfmt' pipe: %w", err) return nil, fmt.Errorf("cannot parse 'unpack_logfmt' pipe: %w", err)
} }
return pu, nil return pu, nil
case lex.isKeyword("unroll"):
pu, err := parsePipeUnroll(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'unroll' pipe: %w", err)
}
return pu, nil
default: default:
return nil, fmt.Errorf("unexpected pipe %q", lex.token) return nil, fmt.Errorf("unexpected pipe %q", lex.token)
} }

View file

@ -50,16 +50,28 @@ func (pc *pipeCopy) updateNeededFields(neededFields, unneededFields fieldsSet) {
} }
} }
func (pc *pipeCopy) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pc *pipeCopy) optimize() {
// Nothing to do
}
func (pc *pipeCopy) hasFilterInWithQuery() bool {
return false
}
func (pc *pipeCopy) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
return pc, nil
}
func (pc *pipeCopy) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
return &pipeCopyProcessor{ return &pipeCopyProcessor{
pc: pc, pc: pc,
ppBase: ppBase, ppNext: ppNext,
} }
} }
type pipeCopyProcessor struct { type pipeCopyProcessor struct {
pc *pipeCopy pc *pipeCopy
ppBase pipeProcessor ppNext pipeProcessor
} }
func (pcp *pipeCopyProcessor) writeBlock(workerID uint, br *blockResult) { func (pcp *pipeCopyProcessor) writeBlock(workerID uint, br *blockResult) {
@ -68,7 +80,7 @@ func (pcp *pipeCopyProcessor) writeBlock(workerID uint, br *blockResult) {
} }
br.copyColumns(pcp.pc.srcFields, pcp.pc.dstFields) br.copyColumns(pcp.pc.srcFields, pcp.pc.dstFields)
pcp.ppBase.writeBlock(workerID, br) pcp.ppNext.writeBlock(workerID, br)
} }
func (pcp *pipeCopyProcessor) flush() error { func (pcp *pipeCopyProcessor) flush() error {

View file

@ -32,16 +32,28 @@ func (pd *pipeDelete) updateNeededFields(neededFields, unneededFields fieldsSet)
} }
} }
func (pd *pipeDelete) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pd *pipeDelete) optimize() {
// nothing to do
}
func (pd *pipeDelete) hasFilterInWithQuery() bool {
return false
}
func (pd *pipeDelete) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
return pd, nil
}
func (pd *pipeDelete) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
return &pipeDeleteProcessor{ return &pipeDeleteProcessor{
pd: pd, pd: pd,
ppBase: ppBase, ppNext: ppNext,
} }
} }
type pipeDeleteProcessor struct { type pipeDeleteProcessor struct {
pd *pipeDelete pd *pipeDelete
ppBase pipeProcessor ppNext pipeProcessor
} }
func (pdp *pipeDeleteProcessor) writeBlock(workerID uint, br *blockResult) { func (pdp *pipeDeleteProcessor) writeBlock(workerID uint, br *blockResult) {
@ -50,7 +62,7 @@ func (pdp *pipeDeleteProcessor) writeBlock(workerID uint, br *blockResult) {
} }
br.deleteColumns(pdp.pd.fields) br.deleteColumns(pdp.pd.fields)
pdp.ppBase.writeBlock(workerID, br) pdp.ppNext.writeBlock(workerID, br)
} }
func (pdp *pipeDeleteProcessor) flush() error { func (pdp *pipeDeleteProcessor) flush() error {

View file

@ -2,6 +2,9 @@ package logstorage
import ( import (
"fmt" "fmt"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
) )
// pipeExtract processes '| extract ...' pipe. // pipeExtract processes '| extract ...' pipe.
@ -38,6 +41,24 @@ func (pe *pipeExtract) String() string {
return s return s
} }
func (pe *pipeExtract) optimize() {
pe.iff.optimizeFilterIn()
}
func (pe *pipeExtract) hasFilterInWithQuery() bool {
return pe.iff.hasFilterInWithQuery()
}
func (pe *pipeExtract) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
iffNew, err := pe.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
peNew := *pe
peNew.iff = iffNew
return &peNew, nil
}
func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet) { func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") { if neededFields.contains("*") {
unneededFieldsOrig := unneededFields.clone() unneededFieldsOrig := unneededFields.clone()
@ -80,21 +101,129 @@ func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet
} }
} }
func (pe *pipeExtract) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pe *pipeExtract) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
patterns := make([]*pattern, workersCount) return &pipeExtractProcessor{
for i := range patterns { pe: pe,
patterns[i] = pe.ptn.clone() ppNext: ppNext,
}
unpackFunc := func(uctx *fieldsUnpackerContext, s string) { shards: make([]pipeExtractProcessorShard, workersCount),
ptn := patterns[uctx.workerID]
ptn.apply(s)
for _, f := range ptn.fields {
uctx.addField(f.name, *f.value)
} }
} }
return newPipeUnpackProcessor(workersCount, unpackFunc, ppBase, pe.fromField, "", pe.keepOriginalFields, pe.skipEmptyResults, pe.iff) type pipeExtractProcessor struct {
pe *pipeExtract
ppNext pipeProcessor
shards []pipeExtractProcessorShard
}
type pipeExtractProcessorShard struct {
pipeExtractProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeExtractProcessorShardNopad{})%128]byte
}
type pipeExtractProcessorShardNopad struct {
bm bitmap
ptn *pattern
resultColumns []*blockResultColumn
resultValues []string
rcs []resultColumn
a arena
}
func (pep *pipeExtractProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
pe := pep.pe
shard := &pep.shards[workerID]
bm := &shard.bm
bm.init(len(br.timestamps))
bm.setBits()
if iff := pe.iff; iff != nil {
iff.f.applyToBlockResult(br, bm)
if bm.isZero() {
pep.ppNext.writeBlock(workerID, br)
return
}
}
if shard.ptn == nil {
shard.ptn = pe.ptn.clone()
}
ptn := shard.ptn
shard.rcs = slicesutil.SetLength(shard.rcs, len(ptn.fields))
rcs := shard.rcs
for i := range ptn.fields {
rcs[i].name = ptn.fields[i].name
}
c := br.getColumnByName(pe.fromField)
values := c.getValues(br)
shard.resultColumns = slicesutil.SetLength(shard.resultColumns, len(rcs))
resultColumns := shard.resultColumns
for i := range resultColumns {
resultColumns[i] = br.getColumnByName(rcs[i].name)
}
shard.resultValues = slicesutil.SetLength(shard.resultValues, len(rcs))
resultValues := shard.resultValues
hadUpdates := false
vPrev := ""
for rowIdx, v := range values {
if bm.isSetBit(rowIdx) {
if !hadUpdates || vPrev != v {
vPrev = v
hadUpdates = true
ptn.apply(v)
for i, f := range ptn.fields {
v := *f.value
if v == "" && pe.skipEmptyResults || pe.keepOriginalFields {
c := resultColumns[i]
if vOrig := c.getValueAtRow(br, rowIdx); vOrig != "" {
v = vOrig
}
} else {
v = shard.a.copyString(v)
}
resultValues[i] = v
}
}
} else {
for i, c := range resultColumns {
resultValues[i] = c.getValueAtRow(br, rowIdx)
}
}
for i, v := range resultValues {
rcs[i].addValue(v)
}
}
for i := range rcs {
br.addResultColumn(&rcs[i])
}
pep.ppNext.writeBlock(workerID, br)
for i := range rcs {
rcs[i].reset()
}
shard.a.reset()
}
func (pep *pipeExtractProcessor) flush() error {
return nil
} }
func parsePipeExtract(lex *lexer) (*pipeExtract, error) { func parsePipeExtract(lex *lexer) (*pipeExtract, error) {

View file

@ -37,13 +37,25 @@ func (pf *pipeFieldNames) updateNeededFields(neededFields, unneededFields fields
} }
} }
func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pf *pipeFieldNames) optimize() {
// nothing to do
}
func (pf *pipeFieldNames) hasFilterInWithQuery() bool {
return false
}
func (pf *pipeFieldNames) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
return pf, nil
}
func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
shards := make([]pipeFieldNamesProcessorShard, workersCount) shards := make([]pipeFieldNamesProcessorShard, workersCount)
pfp := &pipeFieldNamesProcessor{ pfp := &pipeFieldNamesProcessor{
pf: pf, pf: pf,
stopCh: stopCh, stopCh: stopCh,
ppBase: ppBase, ppNext: ppNext,
shards: shards, shards: shards,
} }
@ -53,7 +65,7 @@ func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struc
type pipeFieldNamesProcessor struct { type pipeFieldNamesProcessor struct {
pf *pipeFieldNames pf *pipeFieldNames
stopCh <-chan struct{} stopCh <-chan struct{}
ppBase pipeProcessor ppNext pipeProcessor
shards []pipeFieldNamesProcessorShard shards []pipeFieldNamesProcessorShard
} }
@ -172,10 +184,10 @@ func (wctx *pipeFieldNamesWriteContext) flush() {
wctx.valuesLen = 0 wctx.valuesLen = 0
// Flush rcs to ppBase // Flush rcs to ppNext
br.setResultColumns(wctx.rcs[:], wctx.rowsCount) br.setResultColumns(wctx.rcs[:], wctx.rowsCount)
wctx.rowsCount = 0 wctx.rowsCount = 0
wctx.pfp.ppBase.writeBlock(0, br) wctx.pfp.ppNext.writeBlock(0, br)
br.reset() br.reset()
wctx.rcs[0].resetValues() wctx.rcs[0].resetValues()
wctx.rcs[1].resetValues() wctx.rcs[1].resetValues()

View file

@ -49,16 +49,28 @@ func (pf *pipeFields) updateNeededFields(neededFields, unneededFields fieldsSet)
unneededFields.reset() unneededFields.reset()
} }
func (pf *pipeFields) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pf *pipeFields) optimize() {
// nothing to do
}
func (pf *pipeFields) hasFilterInWithQuery() bool {
return false
}
func (pf *pipeFields) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
return pf, nil
}
func (pf *pipeFields) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
return &pipeFieldsProcessor{ return &pipeFieldsProcessor{
pf: pf, pf: pf,
ppBase: ppBase, ppNext: ppNext,
} }
} }
type pipeFieldsProcessor struct { type pipeFieldsProcessor struct {
pf *pipeFields pf *pipeFields
ppBase pipeProcessor ppNext pipeProcessor
} }
func (pfp *pipeFieldsProcessor) writeBlock(workerID uint, br *blockResult) { func (pfp *pipeFieldsProcessor) writeBlock(workerID uint, br *blockResult) {
@ -69,7 +81,7 @@ func (pfp *pipeFieldsProcessor) writeBlock(workerID uint, br *blockResult) {
if !pfp.pf.containsStar { if !pfp.pf.containsStar {
br.setColumns(pfp.pf.fields) br.setColumns(pfp.pf.fields)
} }
pfp.ppBase.writeBlock(workerID, br) pfp.ppNext.writeBlock(workerID, br)
} }
func (pfp *pipeFieldsProcessor) flush() error { func (pfp *pipeFieldsProcessor) flush() error {

View file

@ -29,12 +29,30 @@ func (pf *pipeFilter) updateNeededFields(neededFields, unneededFields fieldsSet)
} }
} }
func (pf *pipeFilter) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pf *pipeFilter) optimize() {
optimizeFilterIn(pf.f)
}
func (pf *pipeFilter) hasFilterInWithQuery() bool {
return hasFilterInWithQueryForFilter(pf.f)
}
func (pf *pipeFilter) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
fNew, err := initFilterInValuesForFilter(cache, pf.f, getFieldValuesFunc)
if err != nil {
return nil, err
}
pfNew := *pf
pf.f = fNew
return &pfNew, nil
}
func (pf *pipeFilter) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
shards := make([]pipeFilterProcessorShard, workersCount) shards := make([]pipeFilterProcessorShard, workersCount)
pfp := &pipeFilterProcessor{ pfp := &pipeFilterProcessor{
pf: pf, pf: pf,
ppBase: ppBase, ppNext: ppNext,
shards: shards, shards: shards,
} }
@ -43,7 +61,7 @@ func (pf *pipeFilter) newPipeProcessor(workersCount int, _ <-chan struct{}, _ fu
type pipeFilterProcessor struct { type pipeFilterProcessor struct {
pf *pipeFilter pf *pipeFilter
ppBase pipeProcessor ppNext pipeProcessor
shards []pipeFilterProcessorShard shards []pipeFilterProcessorShard
} }
@ -72,8 +90,8 @@ func (pfp *pipeFilterProcessor) writeBlock(workerID uint, br *blockResult) {
bm.setBits() bm.setBits()
pfp.pf.f.applyToBlockResult(br, bm) pfp.pf.f.applyToBlockResult(br, bm)
if bm.areAllBitsSet() { if bm.areAllBitsSet() {
// Fast path - the filter didn't filter out anything - send br to the base pipe as is. // Fast path - the filter didn't filter out anything - send br to the next pipe as is.
pfp.ppBase.writeBlock(workerID, br) pfp.ppNext.writeBlock(workerID, br)
return return
} }
if bm.isZero() { if bm.isZero() {
@ -81,9 +99,9 @@ func (pfp *pipeFilterProcessor) writeBlock(workerID uint, br *blockResult) {
return return
} }
// Slow path - copy the remaining rows from br to shard.br before sending them to base pipe. // Slow path - copy the remaining rows from br to shard.br before sending them to the next pipe.
shard.br.initFromFilterAllColumns(br, bm) shard.br.initFromFilterAllColumns(br, bm)
pfp.ppBase.writeBlock(workerID, &shard.br) pfp.ppNext.writeBlock(workerID, &shard.br)
} }
func (pfp *pipeFilterProcessor) flush() error { func (pfp *pipeFilterProcessor) flush() error {

View file

@ -4,8 +4,6 @@ import (
"fmt" "fmt"
"strconv" "strconv"
"unsafe" "unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
) )
// pipeFormat processes '| format ...' pipe. // pipeFormat processes '| format ...' pipe.
@ -74,10 +72,28 @@ func (pf *pipeFormat) updateNeededFields(neededFields, unneededFields fieldsSet)
} }
} }
func (pf *pipeFormat) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pf *pipeFormat) optimize() {
pf.iff.optimizeFilterIn()
}
func (pf *pipeFormat) hasFilterInWithQuery() bool {
return pf.iff.hasFilterInWithQuery()
}
func (pf *pipeFormat) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
iffNew, err := pf.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
pfNew := *pf
pfNew.iff = iffNew
return &pfNew, nil
}
func (pf *pipeFormat) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
return &pipeFormatProcessor{ return &pipeFormatProcessor{
pf: pf, pf: pf,
ppBase: ppBase, ppNext: ppNext,
shards: make([]pipeFormatProcessorShard, workersCount), shards: make([]pipeFormatProcessorShard, workersCount),
} }
@ -85,7 +101,7 @@ func (pf *pipeFormat) newPipeProcessor(workersCount int, _ <-chan struct{}, _ fu
type pipeFormatProcessor struct { type pipeFormatProcessor struct {
pf *pipeFormat pf *pipeFormat
ppBase pipeProcessor ppNext pipeProcessor
shards []pipeFormatProcessorShard shards []pipeFormatProcessorShard
} }
@ -100,8 +116,8 @@ type pipeFormatProcessorShard struct {
type pipeFormatProcessorShardNopad struct { type pipeFormatProcessorShardNopad struct {
bm bitmap bm bitmap
uctx fieldsUnpackerContext a arena
wctx pipeUnpackWriteContext rc resultColumn
} }
func (pfp *pipeFormatProcessor) writeBlock(workerID uint, br *blockResult) { func (pfp *pipeFormatProcessor) writeBlock(workerID uint, br *blockResult) {
@ -110,39 +126,49 @@ func (pfp *pipeFormatProcessor) writeBlock(workerID uint, br *blockResult) {
} }
shard := &pfp.shards[workerID] shard := &pfp.shards[workerID]
shard.wctx.init(workerID, pfp.ppBase, pfp.pf.keepOriginalFields, pfp.pf.skipEmptyResults, br) pf := pfp.pf
shard.uctx.init(workerID, "")
bm := &shard.bm bm := &shard.bm
bm.init(len(br.timestamps)) bm.init(len(br.timestamps))
bm.setBits() bm.setBits()
if iff := pfp.pf.iff; iff != nil { if iff := pf.iff; iff != nil {
iff.f.applyToBlockResult(br, bm) iff.f.applyToBlockResult(br, bm)
if bm.isZero() { if bm.isZero() {
pfp.ppBase.writeBlock(workerID, br) pfp.ppNext.writeBlock(workerID, br)
return return
} }
} }
shard.rc.name = pf.resultField
resultColumn := br.getColumnByName(pf.resultField)
for rowIdx := range br.timestamps { for rowIdx := range br.timestamps {
v := ""
if bm.isSetBit(rowIdx) { if bm.isSetBit(rowIdx) {
shard.formatRow(pfp.pf, br, rowIdx) v = shard.formatRow(pf, br, rowIdx)
shard.wctx.writeRow(rowIdx, shard.uctx.fields) if v == "" && pf.skipEmptyResults || pf.keepOriginalFields {
} else { if vOrig := resultColumn.getValueAtRow(br, rowIdx); vOrig != "" {
shard.wctx.writeRow(rowIdx, nil) v = vOrig
} }
} }
} else {
v = resultColumn.getValueAtRow(br, rowIdx)
}
shard.rc.addValue(v)
}
shard.wctx.flush() br.addResultColumn(&shard.rc)
shard.wctx.reset() pfp.ppNext.writeBlock(workerID, br)
shard.uctx.reset()
shard.a.reset()
shard.rc.reset()
} }
func (pfp *pipeFormatProcessor) flush() error { func (pfp *pipeFormatProcessor) flush() error {
return nil return nil
} }
func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult, rowIdx int) { func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult, rowIdx int) string {
bb := bbPool.Get() bb := bbPool.Get()
b := bb.B b := bb.B
for _, step := range pf.steps { for _, step := range pf.steps {
@ -159,10 +185,9 @@ func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult
} }
bb.B = b bb.B = b
s := bytesutil.ToUnsafeString(b) v := shard.a.copyBytesToString(b)
shard.uctx.resetFields()
shard.uctx.addField(pf.resultField, s)
bbPool.Put(bb) bbPool.Put(bb)
return v
} }
func parsePipeFormat(lex *lexer) (*pipeFormat, error) { func parsePipeFormat(lex *lexer) (*pipeFormat, error) {

View file

@ -17,9 +17,22 @@ func (pl *pipeLimit) String() string {
} }
func (pl *pipeLimit) updateNeededFields(_, _ fieldsSet) { func (pl *pipeLimit) updateNeededFields(_, _ fieldsSet) {
// nothing to do
} }
func (pl *pipeLimit) newPipeProcessor(_ int, _ <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor { func (pl *pipeLimit) optimize() {
// nothing to do
}
func (pl *pipeLimit) hasFilterInWithQuery() bool {
return false
}
func (pl *pipeLimit) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
return pl, nil
}
func (pl *pipeLimit) newPipeProcessor(_ int, _ <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
if pl.limit == 0 { if pl.limit == 0 {
// Special case - notify the caller to stop writing data to the returned pipeLimitProcessor // Special case - notify the caller to stop writing data to the returned pipeLimitProcessor
cancel() cancel()
@ -27,14 +40,14 @@ func (pl *pipeLimit) newPipeProcessor(_ int, _ <-chan struct{}, cancel func(), p
return &pipeLimitProcessor{ return &pipeLimitProcessor{
pl: pl, pl: pl,
cancel: cancel, cancel: cancel,
ppBase: ppBase, ppNext: ppNext,
} }
} }
type pipeLimitProcessor struct { type pipeLimitProcessor struct {
pl *pipeLimit pl *pipeLimit
cancel func() cancel func()
ppBase pipeProcessor ppNext pipeProcessor
rowsProcessed atomic.Uint64 rowsProcessed atomic.Uint64
} }
@ -46,8 +59,8 @@ func (plp *pipeLimitProcessor) writeBlock(workerID uint, br *blockResult) {
rowsProcessed := plp.rowsProcessed.Add(uint64(len(br.timestamps))) rowsProcessed := plp.rowsProcessed.Add(uint64(len(br.timestamps)))
if rowsProcessed <= plp.pl.limit { if rowsProcessed <= plp.pl.limit {
// Fast path - write all the rows to ppBase. // Fast path - write all the rows to ppNext.
plp.ppBase.writeBlock(workerID, br) plp.ppNext.writeBlock(workerID, br)
return return
} }
@ -61,7 +74,7 @@ func (plp *pipeLimitProcessor) writeBlock(workerID uint, br *blockResult) {
// Write remaining rows. // Write remaining rows.
keepRows := plp.pl.limit - rowsProcessed keepRows := plp.pl.limit - rowsProcessed
br.truncateRows(int(keepRows)) br.truncateRows(int(keepRows))
plp.ppBase.writeBlock(workerID, br) plp.ppNext.writeBlock(workerID, br)
// Notify the caller that it should stop passing more data to writeBlock(). // Notify the caller that it should stop passing more data to writeBlock().
plp.cancel() plp.cancel()

View file

@ -17,18 +17,31 @@ func (po *pipeOffset) String() string {
} }
func (po *pipeOffset) updateNeededFields(_, _ fieldsSet) { func (po *pipeOffset) updateNeededFields(_, _ fieldsSet) {
// nothing to do
} }
func (po *pipeOffset) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (po *pipeOffset) optimize() {
// nothing to do
}
func (po *pipeOffset) hasFilterInWithQuery() bool {
return false
}
func (po *pipeOffset) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
return po, nil
}
func (po *pipeOffset) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
return &pipeOffsetProcessor{ return &pipeOffsetProcessor{
po: po, po: po,
ppBase: ppBase, ppNext: ppNext,
} }
} }
type pipeOffsetProcessor struct { type pipeOffsetProcessor struct {
po *pipeOffset po *pipeOffset
ppBase pipeProcessor ppNext pipeProcessor
rowsProcessed atomic.Uint64 rowsProcessed atomic.Uint64
} }
@ -45,13 +58,13 @@ func (pop *pipeOffsetProcessor) writeBlock(workerID uint, br *blockResult) {
rowsProcessed -= uint64(len(br.timestamps)) rowsProcessed -= uint64(len(br.timestamps))
if rowsProcessed >= pop.po.offset { if rowsProcessed >= pop.po.offset {
pop.ppBase.writeBlock(workerID, br) pop.ppNext.writeBlock(workerID, br)
return return
} }
rowsSkip := pop.po.offset - rowsProcessed rowsSkip := pop.po.offset - rowsProcessed
br.skipRows(int(rowsSkip)) br.skipRows(int(rowsSkip))
pop.ppBase.writeBlock(workerID, br) pop.ppNext.writeBlock(workerID, br)
} }
func (pop *pipeOffsetProcessor) flush() error { func (pop *pipeOffsetProcessor) flush() error {

View file

@ -0,0 +1,140 @@
package logstorage
import (
"fmt"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
)
// pipePackJSON processes '| pack_json ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#pack_json-pipe
type pipePackJSON struct {
resultField string
}
func (pp *pipePackJSON) String() string {
s := "pack_json"
if !isMsgFieldName(pp.resultField) {
s += " as " + quoteTokenIfNeeded(pp.resultField)
}
return s
}
func (pp *pipePackJSON) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
if !unneededFields.contains(pp.resultField) {
unneededFields.reset()
}
} else {
if neededFields.contains(pp.resultField) {
neededFields.add("*")
}
}
}
func (pp *pipePackJSON) optimize() {
// nothing to do
}
func (pp *pipePackJSON) hasFilterInWithQuery() bool {
return false
}
func (pp *pipePackJSON) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
return pp, nil
}
func (pp *pipePackJSON) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
return &pipePackJSONProcessor{
pp: pp,
ppNext: ppNext,
shards: make([]pipePackJSONProcessorShard, workersCount),
}
}
type pipePackJSONProcessor struct {
pp *pipePackJSON
ppNext pipeProcessor
shards []pipePackJSONProcessorShard
}
type pipePackJSONProcessorShard struct {
pipePackJSONProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipePackJSONProcessorShardNopad{})%128]byte
}
type pipePackJSONProcessorShardNopad struct {
rc resultColumn
buf []byte
fields []Field
}
func (ppp *pipePackJSONProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &ppp.shards[workerID]
shard.rc.name = ppp.pp.resultField
cs := br.getColumns()
buf := shard.buf[:0]
fields := shard.fields
for rowIdx := range br.timestamps {
fields = fields[:0]
for _, c := range cs {
v := c.getValueAtRow(br, rowIdx)
fields = append(fields, Field{
Name: c.name,
Value: v,
})
}
bufLen := len(buf)
buf = marshalFieldsToJSON(buf, fields)
v := bytesutil.ToUnsafeString(buf[bufLen:])
shard.rc.addValue(v)
}
br.addResultColumn(&shard.rc)
ppp.ppNext.writeBlock(workerID, br)
shard.rc.reset()
}
func (ppp *pipePackJSONProcessor) flush() error {
return nil
}
func parsePackJSON(lex *lexer) (*pipePackJSON, error) {
if !lex.isKeyword("pack_json") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "pack_json")
}
lex.nextToken()
// parse optional 'as ...` part
resultField := "_msg"
if lex.isKeyword("as") {
lex.nextToken()
field, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse result field for 'pack_json': %w", err)
}
resultField = field
}
pp := &pipePackJSON{
resultField: resultField,
}
return pp, nil
}

View file

@ -0,0 +1,101 @@
package logstorage
import (
"testing"
)
func TestParsePipePackJSONSuccess(t *testing.T) {
f := func(pipeStr string) {
t.Helper()
expectParsePipeSuccess(t, pipeStr)
}
f(`pack_json`)
f(`pack_json as x`)
}
func TestParsePipePackJSONFailure(t *testing.T) {
f := func(pipeStr string) {
t.Helper()
expectParsePipeFailure(t, pipeStr)
}
f(`pack_json foo bar`)
}
func TestPipePackJSON(t *testing.T) {
f := func(pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
expectPipeResults(t, pipeStr, rows, rowsExpected)
}
// pack into _msg
f(`pack_json`, [][]Field{
{
{"_msg", "x"},
{"foo", `abc`},
{"bar", `cde`},
},
{
{"a", "b"},
{"c", "d"},
},
}, [][]Field{
{
{"_msg", `{"_msg":"x","foo":"abc","bar":"cde"}`},
{"foo", `abc`},
{"bar", `cde`},
},
{
{"_msg", `{"a":"b","c":"d"}`},
{"a", "b"},
{"c", "d"},
},
})
// pack into other field
f(`pack_json as a`, [][]Field{
{
{"_msg", "x"},
{"foo", `abc`},
{"bar", `cde`},
},
{
{"a", "b"},
{"c", "d"},
},
}, [][]Field{
{
{"_msg", `x`},
{"foo", `abc`},
{"bar", `cde`},
{"a", `{"_msg":"x","foo":"abc","bar":"cde"}`},
},
{
{"a", `{"a":"b","c":"d"}`},
{"c", "d"},
},
})
}
func TestPipePackJSONUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f(`pack_json as x`, "*", "", "*", "")
// unneeded fields do not intersect with output
f(`pack_json as x`, "*", "f1,f2", "*", "")
// unneeded fields intersect with output
f(`pack_json as f1`, "*", "f1,f2", "*", "f1,f2")
// needed fields do not intersect with output
f(`pack_json f1`, "x,y", "", "x,y", "")
// needed fields intersect with output
f(`pack_json as f2`, "f2,y", "", "*", "")
}

View file

@ -54,16 +54,28 @@ func (pr *pipeRename) updateNeededFields(neededFields, unneededFields fieldsSet)
} }
} }
func (pr *pipeRename) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pr *pipeRename) optimize() {
// nothing to do
}
func (pr *pipeRename) hasFilterInWithQuery() bool {
return false
}
func (pr *pipeRename) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
return pr, nil
}
func (pr *pipeRename) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
return &pipeRenameProcessor{ return &pipeRenameProcessor{
pr: pr, pr: pr,
ppBase: ppBase, ppNext: ppNext,
} }
} }
type pipeRenameProcessor struct { type pipeRenameProcessor struct {
pr *pipeRename pr *pipeRename
ppBase pipeProcessor ppNext pipeProcessor
} }
func (prp *pipeRenameProcessor) writeBlock(workerID uint, br *blockResult) { func (prp *pipeRenameProcessor) writeBlock(workerID uint, br *blockResult) {
@ -72,7 +84,7 @@ func (prp *pipeRenameProcessor) writeBlock(workerID uint, br *blockResult) {
} }
br.renameColumns(prp.pr.srcFields, prp.pr.dstFields) br.renameColumns(prp.pr.srcFields, prp.pr.dstFields)
prp.ppBase.writeBlock(workerID, br) prp.ppNext.writeBlock(workerID, br)
} }
func (prp *pipeRenameProcessor) flush() error { func (prp *pipeRenameProcessor) flush() error {

View file

@ -3,16 +3,13 @@ package logstorage
import ( import (
"fmt" "fmt"
"strings" "strings"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
) )
// pipeReplace processes '| replace ...' pipe. // pipeReplace processes '| replace ...' pipe.
// //
// See https://docs.victoriametrics.com/victorialogs/logsql/#replace-pipe // See https://docs.victoriametrics.com/victorialogs/logsql/#replace-pipe
type pipeReplace struct { type pipeReplace struct {
srcField string field string
oldSubstr string oldSubstr string
newSubstr string newSubstr string
@ -29,8 +26,8 @@ func (pr *pipeReplace) String() string {
s += " " + pr.iff.String() s += " " + pr.iff.String()
} }
s += fmt.Sprintf(" (%s, %s)", quoteTokenIfNeeded(pr.oldSubstr), quoteTokenIfNeeded(pr.newSubstr)) s += fmt.Sprintf(" (%s, %s)", quoteTokenIfNeeded(pr.oldSubstr), quoteTokenIfNeeded(pr.newSubstr))
if pr.srcField != "_msg" { if pr.field != "_msg" {
s += " at " + quoteTokenIfNeeded(pr.srcField) s += " at " + quoteTokenIfNeeded(pr.field)
} }
if pr.limit > 0 { if pr.limit > 0 {
s += fmt.Sprintf(" limit %d", pr.limit) s += fmt.Sprintf(" limit %d", pr.limit)
@ -39,97 +36,37 @@ func (pr *pipeReplace) String() string {
} }
func (pr *pipeReplace) updateNeededFields(neededFields, unneededFields fieldsSet) { func (pr *pipeReplace) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") { updateNeededFieldsForUpdatePipe(neededFields, unneededFields, pr.field, pr.iff)
if !unneededFields.contains(pr.srcField) && pr.iff != nil {
unneededFields.removeFields(pr.iff.neededFields)
}
} else {
if neededFields.contains(pr.srcField) && pr.iff != nil {
neededFields.addFields(pr.iff.neededFields)
}
}
} }
func (pr *pipeReplace) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pr *pipeReplace) optimize() {
return &pipeReplaceProcessor{ pr.iff.optimizeFilterIn()
pr: pr,
ppBase: ppBase,
shards: make([]pipeReplaceProcessorShard, workersCount),
}
} }
type pipeReplaceProcessor struct { func (pr *pipeReplace) hasFilterInWithQuery() bool {
pr *pipeReplace return pr.iff.hasFilterInWithQuery()
ppBase pipeProcessor
shards []pipeReplaceProcessorShard
} }
type pipeReplaceProcessorShard struct { func (pr *pipeReplace) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
pipeReplaceProcessorShardNopad iffNew, err := pr.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 . return nil, err
_ [128 - unsafe.Sizeof(pipeReplaceProcessorShardNopad{})%128]byte }
peNew := *pr
peNew.iff = iffNew
return &peNew, nil
} }
type pipeReplaceProcessorShardNopad struct { func (pr *pipeReplace) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
bm bitmap updateFunc := func(a *arena, v string) string {
uctx fieldsUnpackerContext
wctx pipeUnpackWriteContext
}
func (prp *pipeReplaceProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &prp.shards[workerID]
shard.wctx.init(workerID, prp.ppBase, false, false, br)
shard.uctx.init(workerID, "")
pr := prp.pr
bm := &shard.bm
bm.init(len(br.timestamps))
bm.setBits()
if iff := pr.iff; iff != nil {
iff.f.applyToBlockResult(br, bm)
if bm.isZero() {
prp.ppBase.writeBlock(workerID, br)
return
}
}
c := br.getColumnByName(pr.srcField)
values := c.getValues(br)
bb := bbPool.Get() bb := bbPool.Get()
vPrev := ""
shard.uctx.addField(pr.srcField, "")
for rowIdx, v := range values {
if bm.isSetBit(rowIdx) {
if vPrev != v {
bb.B = appendReplace(bb.B[:0], v, pr.oldSubstr, pr.newSubstr, pr.limit) bb.B = appendReplace(bb.B[:0], v, pr.oldSubstr, pr.newSubstr, pr.limit)
s := bytesutil.ToUnsafeString(bb.B) result := a.copyBytesToString(bb.B)
shard.uctx.resetFields()
shard.uctx.addField(pr.srcField, s)
vPrev = v
}
shard.wctx.writeRow(rowIdx, shard.uctx.fields)
} else {
shard.wctx.writeRow(rowIdx, nil)
}
}
bbPool.Put(bb) bbPool.Put(bb)
return result
shard.wctx.flush()
shard.wctx.reset()
shard.uctx.reset()
} }
func (prp *pipeReplaceProcessor) flush() error { return newPipeUpdateProcessor(workersCount, updateFunc, ppNext, pr.field, pr.iff)
return nil
} }
func parsePipeReplace(lex *lexer) (*pipeReplace, error) { func parsePipeReplace(lex *lexer) (*pipeReplace, error) {
@ -164,7 +101,7 @@ func parsePipeReplace(lex *lexer) (*pipeReplace, error) {
newSubstr, err := getCompoundToken(lex) newSubstr, err := getCompoundToken(lex)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse newSubstr in 'replace': %w", err) return nil, fmt.Errorf("cannot parse newSubstr in 'replace(%q': %w", oldSubstr, err)
} }
if !lex.isKeyword(")") { if !lex.isKeyword(")") {
@ -172,14 +109,14 @@ func parsePipeReplace(lex *lexer) (*pipeReplace, error) {
} }
lex.nextToken() lex.nextToken()
srcField := "_msg" field := "_msg"
if lex.isKeyword("at") { if lex.isKeyword("at") {
lex.nextToken() lex.nextToken()
f, err := parseFieldName(lex) f, err := parseFieldName(lex)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse 'at' field after 'replace(%q, %q)': %w", oldSubstr, newSubstr, err) return nil, fmt.Errorf("cannot parse 'at' field after 'replace(%q, %q)': %w", oldSubstr, newSubstr, err)
} }
srcField = f field = f
} }
limit := uint64(0) limit := uint64(0)
@ -194,7 +131,7 @@ func parsePipeReplace(lex *lexer) (*pipeReplace, error) {
} }
pr := &pipeReplace{ pr := &pipeReplace{
srcField: srcField, field: field,
oldSubstr: oldSubstr, oldSubstr: oldSubstr,
newSubstr: newSubstr, newSubstr: newSubstr,
limit: limit, limit: limit,

View file

@ -0,0 +1,170 @@
package logstorage
import (
"fmt"
"regexp"
)
// pipeReplaceRegexp processes '| replace_regexp ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#replace_regexp-pipe
type pipeReplaceRegexp struct {
field string
re *regexp.Regexp
replacement string
// limit limits the number of replacements, which can be performed
limit uint64
// iff is an optional filter for skipping the replace_regexp operation
iff *ifFilter
}
func (pr *pipeReplaceRegexp) String() string {
s := "replace_regexp"
if pr.iff != nil {
s += " " + pr.iff.String()
}
s += fmt.Sprintf(" (%s, %s)", quoteTokenIfNeeded(pr.re.String()), quoteTokenIfNeeded(pr.replacement))
if pr.field != "_msg" {
s += " at " + quoteTokenIfNeeded(pr.field)
}
if pr.limit > 0 {
s += fmt.Sprintf(" limit %d", pr.limit)
}
return s
}
func (pr *pipeReplaceRegexp) updateNeededFields(neededFields, unneededFields fieldsSet) {
updateNeededFieldsForUpdatePipe(neededFields, unneededFields, pr.field, pr.iff)
}
func (pr *pipeReplaceRegexp) optimize() {
pr.iff.optimizeFilterIn()
}
func (pr *pipeReplaceRegexp) hasFilterInWithQuery() bool {
return pr.iff.hasFilterInWithQuery()
}
func (pr *pipeReplaceRegexp) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
iffNew, err := pr.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
peNew := *pr
peNew.iff = iffNew
return &peNew, nil
}
func (pr *pipeReplaceRegexp) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
updateFunc := func(a *arena, v string) string {
bb := bbPool.Get()
bb.B = appendReplaceRegexp(bb.B[:0], v, pr.re, pr.replacement, pr.limit)
result := a.copyBytesToString(bb.B)
bbPool.Put(bb)
return result
}
return newPipeUpdateProcessor(workersCount, updateFunc, ppNext, pr.field, pr.iff)
}
func parsePipeReplaceRegexp(lex *lexer) (*pipeReplaceRegexp, error) {
if !lex.isKeyword("replace_regexp") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "replace_regexp")
}
lex.nextToken()
// parse optional if (...)
var iff *ifFilter
if lex.isKeyword("if") {
f, err := parseIfFilter(lex)
if err != nil {
return nil, err
}
iff = f
}
if !lex.isKeyword("(") {
return nil, fmt.Errorf("missing '(' after 'replace_regexp'")
}
lex.nextToken()
reStr, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse reStr in 'replace_regexp': %w", err)
}
re, err := regexp.Compile(reStr)
if err != nil {
return nil, fmt.Errorf("cannot parse regexp %q in 'replace_regexp': %w", reStr, err)
}
if !lex.isKeyword(",") {
return nil, fmt.Errorf("missing ',' after 'replace_regexp(%q'", reStr)
}
lex.nextToken()
replacement, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse replacement in 'replace_regexp(%q': %w", reStr, err)
}
if !lex.isKeyword(")") {
return nil, fmt.Errorf("missing ')' after 'replace_regexp(%q, %q'", reStr, replacement)
}
lex.nextToken()
field := "_msg"
if lex.isKeyword("at") {
lex.nextToken()
f, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'at' field after 'replace_regexp(%q, %q)': %w", reStr, replacement, err)
}
field = f
}
limit := uint64(0)
if lex.isKeyword("limit") {
lex.nextToken()
n, ok := tryParseUint64(lex.token)
if !ok {
return nil, fmt.Errorf("cannot parse 'limit %s' in 'replace_regexp'", lex.token)
}
lex.nextToken()
limit = n
}
pr := &pipeReplaceRegexp{
field: field,
re: re,
replacement: replacement,
limit: limit,
iff: iff,
}
return pr, nil
}
func appendReplaceRegexp(dst []byte, s string, re *regexp.Regexp, replacement string, limit uint64) []byte {
if len(s) == 0 {
return dst
}
replacements := uint64(0)
for {
locs := re.FindStringSubmatchIndex(s)
if locs == nil {
return append(dst, s...)
}
start := locs[0]
dst = append(dst, s[:start]...)
end := locs[1]
dst = re.ExpandString(dst, replacement, s, locs)
s = s[end:]
replacements++
if limit > 0 && replacements >= limit {
return append(dst, s...)
}
}
}

View file

@ -0,0 +1,200 @@
package logstorage
import (
"regexp"
"testing"
)
func TestParsePipeReplaceRegexpSuccess(t *testing.T) {
f := func(pipeStr string) {
t.Helper()
expectParsePipeSuccess(t, pipeStr)
}
f(`replace_regexp (foo, bar)`)
f(`replace_regexp ("foo[^ ]+bar|baz", "bar${1}x$0")`)
f(`replace_regexp (" ", "") at x`)
f(`replace_regexp if (x:y) ("-", ":") at a`)
f(`replace_regexp (" ", "") at x limit 10`)
f(`replace_regexp if (x:y) (" ", "") at foo limit 10`)
}
func TestParsePipeReplaceRegexpFailure(t *testing.T) {
f := func(pipeStr string) {
t.Helper()
expectParsePipeFailure(t, pipeStr)
}
f(`replace_regexp`)
f(`replace_regexp if`)
f(`replace_regexp foo`)
f(`replace_regexp (`)
f(`replace_regexp (foo`)
f(`replace_regexp (foo,`)
f(`replace_regexp(foo,bar`)
f(`replace_regexp(foo,bar,baz)`)
f(`replace_regexp(foo,bar) abc`)
f(`replace_regexp(bar,baz) limit`)
f(`replace_regexp(bar,baz) limit N`)
f(`replace_regexp ("foo[", "bar")`)
}
func TestPipeReplaceRegexp(t *testing.T) {
f := func(pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
expectPipeResults(t, pipeStr, rows, rowsExpected)
}
// replace_regexp with placeholders
f(`replace_regexp ("foo(.+?)bar", "q-$1-x")`, [][]Field{
{
{"_msg", `abc foo a bar foobar foo b bar`},
{"bar", `cde`},
},
{
{"_msg", `1234`},
},
}, [][]Field{
{
{"_msg", `abc q- a -x q-bar foo b -x`},
{"bar", `cde`},
},
{
{"_msg", `1234`},
},
})
// replace_regexp without limits at _msg
f(`replace_regexp ("[_/]", "-")`, [][]Field{
{
{"_msg", `a_bc_d/ef`},
{"bar", `cde`},
},
{
{"_msg", `1234`},
},
}, [][]Field{
{
{"_msg", `a-bc-d-ef`},
{"bar", `cde`},
},
{
{"_msg", `1234`},
},
})
// replace_regexp with limit 1 at foo
f(`replace_regexp ("[_/]", "-") at foo limit 1`, [][]Field{
{
{"foo", `a_bc_d/ef`},
{"bar", `cde`},
},
{
{"foo", `1234`},
},
}, [][]Field{
{
{"foo", `a-bc_d/ef`},
{"bar", `cde`},
},
{
{"foo", `1234`},
},
})
// replace_regexp with limit 100 at foo
f(`replace_regexp ("[_/]", "-") at foo limit 100`, [][]Field{
{
{"foo", `a_bc_d/ef`},
{"bar", `cde`},
},
{
{"foo", `1234`},
},
}, [][]Field{
{
{"foo", `a-bc-d-ef`},
{"bar", `cde`},
},
{
{"foo", `1234`},
},
})
// conditional replace_regexp at foo
f(`replace_regexp if (bar:abc) ("[_/]", "") at foo`, [][]Field{
{
{"foo", `a_bc_d/ef`},
{"bar", `cde`},
},
{
{"foo", `123_45/6`},
{"bar", "abc"},
},
}, [][]Field{
{
{"foo", `a_bc_d/ef`},
{"bar", `cde`},
},
{
{"foo", `123456`},
{"bar", "abc"},
},
})
}
func TestPipeReplaceRegexpUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f(`replace_regexp ("a", "b") at x`, "*", "", "*", "")
f(`replace_regexp if (f1:q) ("a", "b") at x`, "*", "", "*", "")
// unneeded fields do not intersect with at field
f(`replace_regexp ("a", "b") at x`, "*", "f1,f2", "*", "f1,f2")
f(`replace_regexp if (f3:q) ("a", "b") at x`, "*", "f1,f2", "*", "f1,f2")
f(`replace_regexp if (f2:q) ("a", "b") at x`, "*", "f1,f2", "*", "f1")
// unneeded fields intersect with at field
f(`replace_regexp ("a", "b") at x`, "*", "x,y", "*", "x,y")
f(`replace_regexp if (f1:q) ("a", "b") at x`, "*", "x,y", "*", "x,y")
f(`replace_regexp if (x:q) ("a", "b") at x`, "*", "x,y", "*", "x,y")
f(`replace_regexp if (y:q) ("a", "b") at x`, "*", "x,y", "*", "x,y")
// needed fields do not intersect with at field
f(`replace_regexp ("a", "b") at x`, "f2,y", "", "f2,y", "")
f(`replace_regexp if (f1:q) ("a", "b") at x`, "f2,y", "", "f2,y", "")
// needed fields intersect with at field
f(`replace_regexp ("a", "b") at y`, "f2,y", "", "f2,y", "")
f(`replace_regexp if (f1:q) ("a", "b") at y`, "f2,y", "", "f1,f2,y", "")
}
func TestAppendReplaceRegexp(t *testing.T) {
f := func(s, reStr, replacement string, limit int, resultExpected string) {
t.Helper()
re := regexp.MustCompile(reStr)
result := appendReplaceRegexp(nil, s, re, replacement, uint64(limit))
if string(result) != resultExpected {
t.Fatalf("unexpected result for appendReplaceRegexp(%q, %q, %q, %d)\ngot\n%s\nwant\n%s", s, reStr, replacement, limit, result, resultExpected)
}
}
f("", "", "", 0, "")
f("", "foo", "bar", 0, "")
f("abc", "foo", "bar", 0, "abc")
f("foo", "fo+", "bar", 0, "bar")
f("foox", "fo+", "bar", 0, "barx")
f("afoo", "fo+", "bar", 0, "abar")
f("afoox", "fo+", "bar", 0, "abarx")
f("foo-bar/baz", "[-/]", "_", 0, "foo_bar_baz")
f("foo bar/ baz ", "[ /]", "", 2, "foobar baz ")
// placeholders
f("afoo abc barz", "a([^ ]+)", "b${1}x", 0, "bfoox bbcx bbrzx")
f("afoo abc barz", "a([^ ]+)", "b${1}x", 1, "bfoox abc barz")
}

View file

@ -163,10 +163,11 @@ func TestAppendReplace(t *testing.T) {
f("", "", "", 0, "") f("", "", "", 0, "")
f("", "foo", "bar", 0, "") f("", "foo", "bar", 0, "")
f("abc", "foo", "bar", 0, "abc")
f("foo", "foo", "bar", 0, "bar") f("foo", "foo", "bar", 0, "bar")
f("foox", "foo", "bar", 0, "barx") f("foox", "foo", "bar", 0, "barx")
f("afoo", "foo", "bar", 0, "abar") f("afoo", "foo", "bar", 0, "abar")
f("afoox", "foo", "bar", 0, "abarx") f("afoox", "foo", "bar", 0, "abarx")
f("foo-bar-baz", "-", "_", 0, "foo_bar_baz") f("foo-bar-baz", "-", "_", 0, "foo_bar_baz")
f("foo bar baz ", " ", "", 0, "foobarbaz") f("foo bar baz ", " ", "", 1, "foobar baz ")
} }

View file

@ -67,14 +67,26 @@ func (ps *pipeSort) updateNeededFields(neededFields, unneededFields fieldsSet) {
} }
} }
func (ps *pipeSort) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor { func (ps *pipeSort) optimize() {
if ps.limit > 0 { // nothing to do
return newPipeTopkProcessor(ps, workersCount, stopCh, cancel, ppBase)
}
return newPipeSortProcessor(ps, workersCount, stopCh, cancel, ppBase)
} }
func newPipeSortProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor { func (ps *pipeSort) hasFilterInWithQuery() bool {
return false
}
func (ps *pipeSort) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
return ps, nil
}
func (ps *pipeSort) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
if ps.limit > 0 {
return newPipeTopkProcessor(ps, workersCount, stopCh, cancel, ppNext)
}
return newPipeSortProcessor(ps, workersCount, stopCh, cancel, ppNext)
}
func newPipeSortProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
maxStateSize := int64(float64(memory.Allowed()) * 0.2) maxStateSize := int64(float64(memory.Allowed()) * 0.2)
shards := make([]pipeSortProcessorShard, workersCount) shards := make([]pipeSortProcessorShard, workersCount)
@ -92,7 +104,7 @@ func newPipeSortProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}
ps: ps, ps: ps,
stopCh: stopCh, stopCh: stopCh,
cancel: cancel, cancel: cancel,
ppBase: ppBase, ppNext: ppNext,
shards: shards, shards: shards,
@ -107,7 +119,7 @@ type pipeSortProcessor struct {
ps *pipeSort ps *pipeSort
stopCh <-chan struct{} stopCh <-chan struct{}
cancel func() cancel func()
ppBase pipeProcessor ppNext pipeProcessor
shards []pipeSortProcessorShard shards []pipeSortProcessorShard
@ -522,7 +534,7 @@ func (wctx *pipeSortWriteContext) writeNextRow(shard *pipeSortProcessorShard) {
} }
} }
if !areEqualColumns { if !areEqualColumns {
// send the current block to ppBase and construct a block with new set of columns // send the current block to ppNext and construct a block with new set of columns
wctx.flush() wctx.flush()
rcs = wctx.rcs[:0] rcs = wctx.rcs[:0]
@ -561,10 +573,10 @@ func (wctx *pipeSortWriteContext) flush() {
wctx.valuesLen = 0 wctx.valuesLen = 0
// Flush rcs to ppBase // Flush rcs to ppNext
br.setResultColumns(rcs, wctx.rowsCount) br.setResultColumns(rcs, wctx.rowsCount)
wctx.rowsCount = 0 wctx.rowsCount = 0
wctx.psp.ppBase.writeBlock(0, br) wctx.psp.ppNext.writeBlock(0, br)
br.reset() br.reset()
for i := range rcs { for i := range rcs {
rcs[i].resetValues() rcs[i].resetValues()

View file

@ -116,24 +116,47 @@ func (ps *pipeStats) updateNeededFields(neededFields, unneededFields fieldsSet)
unneededFields.reset() unneededFields.reset()
} }
func (ps *pipeStats) optimize() {
for _, f := range ps.funcs {
f.iff.optimizeFilterIn()
}
}
func (ps *pipeStats) hasFilterInWithQuery() bool {
for _, f := range ps.funcs {
if f.iff.hasFilterInWithQuery() {
return true
}
}
return false
}
func (ps *pipeStats) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
funcsNew := make([]pipeStatsFunc, len(ps.funcs))
for i, f := range ps.funcs {
iffNew, err := f.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
f.iff = iffNew
funcsNew[i] = f
}
psNew := *ps
ps.funcs = funcsNew
return &psNew, nil
}
const stateSizeBudgetChunk = 1 << 20 const stateSizeBudgetChunk = 1 << 20
func (ps *pipeStats) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor { func (ps *pipeStats) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
maxStateSize := int64(float64(memory.Allowed()) * 0.3) maxStateSize := int64(float64(memory.Allowed()) * 0.3)
shards := make([]pipeStatsProcessorShard, workersCount) shards := make([]pipeStatsProcessorShard, workersCount)
funcsLen := len(ps.funcs)
for i := range shards { for i := range shards {
shards[i] = pipeStatsProcessorShard{ shards[i] = pipeStatsProcessorShard{
pipeStatsProcessorShardNopad: pipeStatsProcessorShardNopad{ pipeStatsProcessorShardNopad: pipeStatsProcessorShardNopad{
ps: ps, ps: ps,
m: make(map[string]*pipeStatsGroup),
bms: make([]bitmap, funcsLen),
brs: make([]*blockResult, funcsLen),
brsBuf: make([]blockResult, funcsLen),
stateSizeBudget: stateSizeBudgetChunk, stateSizeBudget: stateSizeBudgetChunk,
}, },
} }
@ -144,7 +167,7 @@ func (ps *pipeStats) newPipeProcessor(workersCount int, stopCh <-chan struct{},
ps: ps, ps: ps,
stopCh: stopCh, stopCh: stopCh,
cancel: cancel, cancel: cancel,
ppBase: ppBase, ppNext: ppNext,
shards: shards, shards: shards,
@ -159,7 +182,7 @@ type pipeStatsProcessor struct {
ps *pipeStats ps *pipeStats
stopCh <-chan struct{} stopCh <-chan struct{}
cancel func() cancel func()
ppBase pipeProcessor ppNext pipeProcessor
shards []pipeStatsProcessorShard shards []pipeStatsProcessorShard
@ -190,7 +213,22 @@ type pipeStatsProcessorShardNopad struct {
stateSizeBudget int stateSizeBudget int
} }
func (shard *pipeStatsProcessorShard) init() {
if shard.m != nil {
// Already initialized
return
}
funcsLen := len(shard.ps.funcs)
shard.m = make(map[string]*pipeStatsGroup)
shard.bms = make([]bitmap, funcsLen)
shard.brs = make([]*blockResult, funcsLen)
shard.brsBuf = make([]blockResult, funcsLen)
}
func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) { func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
shard.init()
byFields := shard.ps.byFields byFields := shard.ps.byFields
// Apply per-function filters // Apply per-function filters
@ -398,7 +436,9 @@ func (psp *pipeStatsProcessor) flush() error {
// Merge states across shards // Merge states across shards
shards := psp.shards shards := psp.shards
m := shards[0].m shardMain := &shards[0]
shardMain.init()
m := shardMain.m
shards = shards[1:] shards = shards[1:]
for i := range shards { for i := range shards {
shard := &shards[i] shard := &shards[i]
@ -420,12 +460,12 @@ func (psp *pipeStatsProcessor) flush() error {
} }
} }
// Write per-group states to ppBase // Write per-group states to ppNext
byFields := psp.ps.byFields byFields := psp.ps.byFields
if len(byFields) == 0 && len(m) == 0 { if len(byFields) == 0 && len(m) == 0 {
// Special case - zero matching rows. // Special case - zero matching rows.
_ = shards[0].getPipeStatsGroup(nil) _ = shardMain.getPipeStatsGroup(nil)
m = shards[0].m m = shardMain.m
} }
rcs := make([]resultColumn, 0, len(byFields)+len(psp.ps.funcs)) rcs := make([]resultColumn, 0, len(byFields)+len(psp.ps.funcs))
@ -480,7 +520,7 @@ func (psp *pipeStatsProcessor) flush() error {
if valuesLen >= 1_000_000 { if valuesLen >= 1_000_000 {
br.setResultColumns(rcs, rowsCount) br.setResultColumns(rcs, rowsCount)
rowsCount = 0 rowsCount = 0
psp.ppBase.writeBlock(0, &br) psp.ppNext.writeBlock(0, &br)
br.reset() br.reset()
for i := range rcs { for i := range rcs {
rcs[i].resetValues() rcs[i].resetValues()
@ -490,7 +530,7 @@ func (psp *pipeStatsProcessor) flush() error {
} }
br.setResultColumns(rcs, rowsCount) br.setResultColumns(rcs, rowsCount)
psp.ppBase.writeBlock(0, &br) psp.ppNext.writeBlock(0, &br)
return nil return nil
} }

View file

@ -13,7 +13,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
) )
func newPipeTopkProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor { func newPipeTopkProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
maxStateSize := int64(float64(memory.Allowed()) * 0.2) maxStateSize := int64(float64(memory.Allowed()) * 0.2)
shards := make([]pipeTopkProcessorShard, workersCount) shards := make([]pipeTopkProcessorShard, workersCount)
@ -31,7 +31,7 @@ func newPipeTopkProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}
ps: ps, ps: ps,
stopCh: stopCh, stopCh: stopCh,
cancel: cancel, cancel: cancel,
ppBase: ppBase, ppNext: ppNext,
shards: shards, shards: shards,
@ -46,7 +46,7 @@ type pipeTopkProcessor struct {
ps *pipeSort ps *pipeSort
stopCh <-chan struct{} stopCh <-chan struct{}
cancel func() cancel func()
ppBase pipeProcessor ppNext pipeProcessor
shards []pipeTopkProcessorShard shards []pipeTopkProcessorShard
@ -464,7 +464,7 @@ func (wctx *pipeTopkWriteContext) writeNextRow(shard *pipeTopkProcessorShard) bo
} }
} }
if !areEqualColumns { if !areEqualColumns {
// send the current block to ppBase and construct a block with new set of columns // send the current block to ppNext and construct a block with new set of columns
wctx.flush() wctx.flush()
rcs = wctx.rcs[:0] rcs = wctx.rcs[:0]
@ -508,10 +508,10 @@ func (wctx *pipeTopkWriteContext) flush() {
wctx.valuesLen = 0 wctx.valuesLen = 0
// Flush rcs to ppBase // Flush rcs to ppNext
br.setResultColumns(rcs, wctx.rowsCount) br.setResultColumns(rcs, wctx.rowsCount)
wctx.rowsCount = 0 wctx.rowsCount = 0
wctx.ptp.ppBase.writeBlock(0, br) wctx.ptp.ppNext.writeBlock(0, br)
br.reset() br.reset()
for i := range rcs { for i := range rcs {
rcs[i].resetValues() rcs[i].resetValues()

View file

@ -32,7 +32,7 @@ func (pu *pipeUniq) String() string {
s += " by (" + fieldNamesString(pu.byFields) + ")" s += " by (" + fieldNamesString(pu.byFields) + ")"
} }
if pu.hitsFieldName != "" { if pu.hitsFieldName != "" {
s += " hits" s += " with hits"
} }
if pu.limit > 0 { if pu.limit > 0 {
s += fmt.Sprintf(" limit %d", pu.limit) s += fmt.Sprintf(" limit %d", pu.limit)
@ -51,7 +51,19 @@ func (pu *pipeUniq) updateNeededFields(neededFields, unneededFields fieldsSet) {
} }
} }
func (pu *pipeUniq) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor { func (pu *pipeUniq) optimize() {
// nothing to do
}
func (pu *pipeUniq) hasFilterInWithQuery() bool {
return false
}
func (pu *pipeUniq) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
return pu, nil
}
func (pu *pipeUniq) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
maxStateSize := int64(float64(memory.Allowed()) * 0.2) maxStateSize := int64(float64(memory.Allowed()) * 0.2)
shards := make([]pipeUniqProcessorShard, workersCount) shards := make([]pipeUniqProcessorShard, workersCount)
@ -69,7 +81,7 @@ func (pu *pipeUniq) newPipeProcessor(workersCount int, stopCh <-chan struct{}, c
pu: pu, pu: pu,
stopCh: stopCh, stopCh: stopCh,
cancel: cancel, cancel: cancel,
ppBase: ppBase, ppNext: ppNext,
shards: shards, shards: shards,
@ -84,7 +96,7 @@ type pipeUniqProcessor struct {
pu *pipeUniq pu *pipeUniq
stopCh <-chan struct{} stopCh <-chan struct{}
cancel func() cancel func()
ppBase pipeProcessor ppNext pipeProcessor
shards []pipeUniqProcessorShard shards []pipeUniqProcessorShard
@ -418,7 +430,7 @@ func (wctx *pipeUniqWriteContext) writeRow(rowFields []Field) {
} }
} }
if !areEqualColumns { if !areEqualColumns {
// send the current block to ppBase and construct a block with new set of columns // send the current block to ppNext and construct a block with new set of columns
wctx.flush() wctx.flush()
rcs = wctx.rcs[:0] rcs = wctx.rcs[:0]
@ -446,10 +458,10 @@ func (wctx *pipeUniqWriteContext) flush() {
wctx.valuesLen = 0 wctx.valuesLen = 0
// Flush rcs to ppBase // Flush rcs to ppNext
br.setResultColumns(rcs, wctx.rowsCount) br.setResultColumns(rcs, wctx.rowsCount)
wctx.rowsCount = 0 wctx.rowsCount = 0
wctx.pup.ppBase.writeBlock(0, br) wctx.pup.ppNext.writeBlock(0, br)
br.reset() br.reset()
for i := range rcs { for i := range rcs {
rcs[i].resetValues() rcs[i].resetValues()
@ -477,6 +489,12 @@ func parsePipeUniq(lex *lexer) (*pipeUniq, error) {
pu.byFields = bfs pu.byFields = bfs
} }
if lex.isKeyword("with") {
lex.nextToken()
if !lex.isKeyword("hits") {
return nil, fmt.Errorf("missing 'hits' after 'with'")
}
}
if lex.isKeyword("hits") { if lex.isKeyword("hits") {
lex.nextToken() lex.nextToken()
hitsFieldName := "hits" hitsFieldName := "hits"

View file

@ -11,15 +11,15 @@ func TestParsePipeUniqSuccess(t *testing.T) {
} }
f(`uniq`) f(`uniq`)
f(`uniq hits`) f(`uniq with hits`)
f(`uniq limit 10`) f(`uniq limit 10`)
f(`uniq hits limit 10`) f(`uniq with hits limit 10`)
f(`uniq by (x)`) f(`uniq by (x)`)
f(`uniq by (x) limit 10`) f(`uniq by (x) limit 10`)
f(`uniq by (x, y)`) f(`uniq by (x, y)`)
f(`uniq by (x, y) hits`) f(`uniq by (x, y) with hits`)
f(`uniq by (x, y) limit 10`) f(`uniq by (x, y) limit 10`)
f(`uniq by (x, y) hits limit 10`) f(`uniq by (x, y) with hits limit 10`)
} }
func TestParsePipeUniqFailure(t *testing.T) { func TestParsePipeUniqFailure(t *testing.T) {
@ -33,6 +33,7 @@ func TestParsePipeUniqFailure(t *testing.T) {
f(`uniq by hits`) f(`uniq by hits`)
f(`uniq by(x) limit`) f(`uniq by(x) limit`)
f(`uniq by(x) limit foo`) f(`uniq by(x) limit foo`)
f(`uniq by (x) with`)
} }
func TestPipeUniq(t *testing.T) { func TestPipeUniq(t *testing.T) {
@ -365,10 +366,12 @@ func TestPipeUniqUpdateNeededFields(t *testing.T) {
f("uniq by()", "*", "", "*", "") f("uniq by()", "*", "", "*", "")
f("uniq by(*)", "*", "", "*", "") f("uniq by(*)", "*", "", "*", "")
f("uniq by(f1,f2)", "*", "", "f1,f2", "") f("uniq by(f1,f2)", "*", "", "f1,f2", "")
f("uniq by(f1,f2) with hits", "*", "", "f1,f2", "")
// all the needed fields, unneeded fields do not intersect with src // all the needed fields, unneeded fields do not intersect with src
f("uniq by(s1, s2)", "*", "f1,f2", "s1,s2", "") f("uniq by(s1, s2)", "*", "f1,f2", "s1,s2", "")
f("uniq", "*", "f1,f2", "*", "") f("uniq", "*", "f1,f2", "*", "")
f("uniq with hits", "*", "f1,f2", "*", "")
// all the needed fields, unneeded fields intersect with src // all the needed fields, unneeded fields intersect with src
f("uniq by(s1, s2)", "*", "s1,f1,f2", "s1,s2", "") f("uniq by(s1, s2)", "*", "s1,f1,f2", "s1,s2", "")

View file

@ -6,6 +6,49 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
) )
func updateNeededFieldsForUnpackPipe(fromField string, outFields []string, keepOriginalFields, skipEmptyResults bool, iff *ifFilter, neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
unneededFieldsOrig := unneededFields.clone()
unneededFieldsCount := 0
if len(outFields) > 0 {
for _, f := range outFields {
if unneededFieldsOrig.contains(f) {
unneededFieldsCount++
}
if !keepOriginalFields && !skipEmptyResults {
unneededFields.add(f)
}
}
}
if len(outFields) == 0 || unneededFieldsCount < len(outFields) {
unneededFields.remove(fromField)
if iff != nil {
unneededFields.removeFields(iff.neededFields)
}
}
} else {
neededFieldsOrig := neededFields.clone()
needFromField := len(outFields) == 0
if len(outFields) > 0 {
needFromField = false
for _, f := range outFields {
if neededFieldsOrig.contains(f) {
needFromField = true
}
if !keepOriginalFields && !skipEmptyResults {
neededFields.remove(f)
}
}
}
if needFromField {
neededFields.add(fromField)
if iff != nil {
neededFields.addFields(iff.neededFields)
}
}
}
}
type fieldsUnpackerContext struct { type fieldsUnpackerContext struct {
workerID uint workerID uint
fieldPrefix string fieldPrefix string
@ -53,12 +96,12 @@ func (uctx *fieldsUnpackerContext) addField(name, value string) {
}) })
} }
func newPipeUnpackProcessor(workersCount int, unpackFunc func(uctx *fieldsUnpackerContext, s string), ppBase pipeProcessor, func newPipeUnpackProcessor(workersCount int, unpackFunc func(uctx *fieldsUnpackerContext, s string), ppNext pipeProcessor,
fromField string, fieldPrefix string, keepOriginalFields, skipEmptyResults bool, iff *ifFilter) *pipeUnpackProcessor { fromField string, fieldPrefix string, keepOriginalFields, skipEmptyResults bool, iff *ifFilter) *pipeUnpackProcessor {
return &pipeUnpackProcessor{ return &pipeUnpackProcessor{
unpackFunc: unpackFunc, unpackFunc: unpackFunc,
ppBase: ppBase, ppNext: ppNext,
shards: make([]pipeUnpackProcessorShard, workersCount), shards: make([]pipeUnpackProcessorShard, workersCount),
@ -72,7 +115,7 @@ func newPipeUnpackProcessor(workersCount int, unpackFunc func(uctx *fieldsUnpack
type pipeUnpackProcessor struct { type pipeUnpackProcessor struct {
unpackFunc func(uctx *fieldsUnpackerContext, s string) unpackFunc func(uctx *fieldsUnpackerContext, s string)
ppBase pipeProcessor ppNext pipeProcessor
shards []pipeUnpackProcessorShard shards []pipeUnpackProcessorShard
@ -104,7 +147,7 @@ func (pup *pipeUnpackProcessor) writeBlock(workerID uint, br *blockResult) {
} }
shard := &pup.shards[workerID] shard := &pup.shards[workerID]
shard.wctx.init(workerID, pup.ppBase, pup.keepOriginalFields, pup.skipEmptyResults, br) shard.wctx.init(workerID, pup.ppNext, pup.keepOriginalFields, pup.skipEmptyResults, br)
shard.uctx.init(workerID, pup.fieldPrefix) shard.uctx.init(workerID, pup.fieldPrefix)
bm := &shard.bm bm := &shard.bm
@ -113,7 +156,7 @@ func (pup *pipeUnpackProcessor) writeBlock(workerID uint, br *blockResult) {
if pup.iff != nil { if pup.iff != nil {
pup.iff.f.applyToBlockResult(br, bm) pup.iff.f.applyToBlockResult(br, bm)
if bm.isZero() { if bm.isZero() {
pup.ppBase.writeBlock(workerID, br) pup.ppNext.writeBlock(workerID, br)
return return
} }
} }
@ -132,13 +175,16 @@ func (pup *pipeUnpackProcessor) writeBlock(workerID uint, br *blockResult) {
} }
} else { } else {
values := c.getValues(br) values := c.getValues(br)
vPrevApplied := "" vPrev := ""
hadUnpacks := false
for i, v := range values { for i, v := range values {
if bm.isSetBit(i) { if bm.isSetBit(i) {
if vPrevApplied != v { if !hadUnpacks || vPrev != v {
vPrev = v
hadUnpacks = true
shard.uctx.resetFields() shard.uctx.resetFields()
pup.unpackFunc(&shard.uctx, v) pup.unpackFunc(&shard.uctx, v)
vPrevApplied = v
} }
shard.wctx.writeRow(i, shard.uctx.fields) shard.wctx.writeRow(i, shard.uctx.fields)
} else { } else {
@ -158,7 +204,7 @@ func (pup *pipeUnpackProcessor) flush() error {
type pipeUnpackWriteContext struct { type pipeUnpackWriteContext struct {
workerID uint workerID uint
ppBase pipeProcessor ppNext pipeProcessor
keepOriginalFields bool keepOriginalFields bool
skipEmptyResults bool skipEmptyResults bool
@ -177,7 +223,7 @@ type pipeUnpackWriteContext struct {
func (wctx *pipeUnpackWriteContext) reset() { func (wctx *pipeUnpackWriteContext) reset() {
wctx.workerID = 0 wctx.workerID = 0
wctx.ppBase = nil wctx.ppNext = nil
wctx.keepOriginalFields = false wctx.keepOriginalFields = false
wctx.brSrc = nil wctx.brSrc = nil
@ -193,11 +239,11 @@ func (wctx *pipeUnpackWriteContext) reset() {
wctx.valuesLen = 0 wctx.valuesLen = 0
} }
func (wctx *pipeUnpackWriteContext) init(workerID uint, ppBase pipeProcessor, keepOriginalFields, skipEmptyResults bool, brSrc *blockResult) { func (wctx *pipeUnpackWriteContext) init(workerID uint, ppNext pipeProcessor, keepOriginalFields, skipEmptyResults bool, brSrc *blockResult) {
wctx.reset() wctx.reset()
wctx.workerID = workerID wctx.workerID = workerID
wctx.ppBase = ppBase wctx.ppNext = ppNext
wctx.keepOriginalFields = keepOriginalFields wctx.keepOriginalFields = keepOriginalFields
wctx.skipEmptyResults = skipEmptyResults wctx.skipEmptyResults = skipEmptyResults
@ -219,7 +265,7 @@ func (wctx *pipeUnpackWriteContext) writeRow(rowIdx int, extraFields []Field) {
} }
} }
if !areEqualColumns { if !areEqualColumns {
// send the current block to ppBase and construct a block with new set of columns // send the current block to ppNext and construct a block with new set of columns
wctx.flush() wctx.flush()
rcs = wctx.rcs[:0] rcs = wctx.rcs[:0]
@ -264,11 +310,11 @@ func (wctx *pipeUnpackWriteContext) flush() {
wctx.valuesLen = 0 wctx.valuesLen = 0
// Flush rcs to ppBase // Flush rcs to ppNext
br := &wctx.br br := &wctx.br
br.setResultColumns(rcs, wctx.rowsCount) br.setResultColumns(rcs, wctx.rowsCount)
wctx.rowsCount = 0 wctx.rowsCount = 0
wctx.ppBase.writeBlock(wctx.workerID, br) wctx.ppNext.writeBlock(wctx.workerID, br)
br.reset() br.reset()
for i := range rcs { for i := range rcs {
rcs[i].resetValues() rcs[i].resetValues()

View file

@ -56,50 +56,25 @@ func (pu *pipeUnpackJSON) updateNeededFields(neededFields, unneededFields fields
updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff, neededFields, unneededFields) updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff, neededFields, unneededFields)
} }
func updateNeededFieldsForUnpackPipe(fromField string, outFields []string, keepOriginalFields, skipEmptyResults bool, iff *ifFilter, neededFields, unneededFields fieldsSet) { func (pu *pipeUnpackJSON) optimize() {
if neededFields.contains("*") { pu.iff.optimizeFilterIn()
unneededFieldsOrig := unneededFields.clone()
unneededFieldsCount := 0
if len(outFields) > 0 {
for _, f := range outFields {
if unneededFieldsOrig.contains(f) {
unneededFieldsCount++
}
if !keepOriginalFields && !skipEmptyResults {
unneededFields.add(f)
}
}
}
if len(outFields) == 0 || unneededFieldsCount < len(outFields) {
unneededFields.remove(fromField)
if iff != nil {
unneededFields.removeFields(iff.neededFields)
}
}
} else {
neededFieldsOrig := neededFields.clone()
needFromField := len(outFields) == 0
if len(outFields) > 0 {
needFromField = false
for _, f := range outFields {
if neededFieldsOrig.contains(f) {
needFromField = true
}
if !keepOriginalFields && !skipEmptyResults {
neededFields.remove(f)
}
}
}
if needFromField {
neededFields.add(fromField)
if iff != nil {
neededFields.addFields(iff.neededFields)
}
}
}
} }
func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pu *pipeUnpackJSON) hasFilterInWithQuery() bool {
return pu.iff.hasFilterInWithQuery()
}
func (pu *pipeUnpackJSON) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
iffNew, err := pu.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
puNew := *pu
puNew.iff = iffNew
return &puNew, nil
}
func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
unpackJSON := func(uctx *fieldsUnpackerContext, s string) { unpackJSON := func(uctx *fieldsUnpackerContext, s string) {
if len(s) == 0 || s[0] != '{' { if len(s) == 0 || s[0] != '{' {
// This isn't a JSON object // This isn't a JSON object
@ -134,7 +109,7 @@ func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{},
} }
PutJSONParser(p) PutJSONParser(p)
} }
return newPipeUnpackProcessor(workersCount, unpackJSON, ppBase, pu.fromField, pu.resultPrefix, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff) return newPipeUnpackProcessor(workersCount, unpackJSON, ppNext, pu.fromField, pu.resultPrefix, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff)
} }
func parsePipeUnpackJSON(lex *lexer) (*pipeUnpackJSON, error) { func parsePipeUnpackJSON(lex *lexer) (*pipeUnpackJSON, error) {

View file

@ -1,10 +1,6 @@
package logstorage package logstorage
import ( import (
"math/rand"
"slices"
"strings"
"sync"
"testing" "testing"
) )
@ -166,7 +162,6 @@ func TestPipeUnpackJSON(t *testing.T) {
}, [][]Field{ }, [][]Field{
{ {
{"_msg", `{"foo":"bar"}`}, {"_msg", `{"foo":"bar"}`},
{"x", ""},
}, },
}) })
@ -313,228 +308,12 @@ func TestPipeUnpackJSON(t *testing.T) {
{"y", `abc`}, {"y", `abc`},
}, },
{ {
{"y", ""},
{"z", `foobar`}, {"z", `foobar`},
{"x", `{"z":["bar",123]}`}, {"x", `{"z":["bar",123]}`},
}, },
}) })
} }
func expectPipeResults(t *testing.T, pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
lex := newLexer(pipeStr)
p, err := parsePipe(lex)
if err != nil {
t.Fatalf("unexpected error when parsing %q: %s", pipeStr, err)
}
workersCount := 5
stopCh := make(chan struct{})
cancel := func() {}
ppTest := newTestPipeProcessor()
pp := p.newPipeProcessor(workersCount, stopCh, cancel, ppTest)
brw := newTestBlockResultWriter(workersCount, pp)
for _, row := range rows {
brw.writeRow(row)
}
brw.flush()
pp.flush()
ppTest.expectRows(t, rowsExpected)
}
func newTestBlockResultWriter(workersCount int, ppBase pipeProcessor) *testBlockResultWriter {
return &testBlockResultWriter{
workersCount: workersCount,
ppBase: ppBase,
}
}
type testBlockResultWriter struct {
workersCount int
ppBase pipeProcessor
rcs []resultColumn
br blockResult
rowsCount int
}
func (brw *testBlockResultWriter) writeRow(row []Field) {
if !brw.areSameFields(row) {
brw.flush()
brw.rcs = brw.rcs[:0]
for _, field := range row {
brw.rcs = appendResultColumnWithName(brw.rcs, field.Name)
}
}
for i, field := range row {
brw.rcs[i].addValue(field.Value)
}
brw.rowsCount++
if rand.Intn(5) == 0 {
brw.flush()
}
}
func (brw *testBlockResultWriter) areSameFields(row []Field) bool {
if len(brw.rcs) != len(row) {
return false
}
for i, rc := range brw.rcs {
if rc.name != row[i].Name {
return false
}
}
return true
}
func (brw *testBlockResultWriter) flush() {
brw.br.setResultColumns(brw.rcs, brw.rowsCount)
brw.rowsCount = 0
workerID := rand.Intn(brw.workersCount)
brw.ppBase.writeBlock(uint(workerID), &brw.br)
brw.br.reset()
for i := range brw.rcs {
brw.rcs[i].resetValues()
}
}
func newTestPipeProcessor() *testPipeProcessor {
return &testPipeProcessor{}
}
type testPipeProcessor struct {
resultRowsLock sync.Mutex
resultRows [][]Field
}
func (pp *testPipeProcessor) writeBlock(_ uint, br *blockResult) {
cs := br.getColumns()
var columnValues [][]string
for _, c := range cs {
values := c.getValues(br)
columnValues = append(columnValues, values)
}
for i := range br.timestamps {
row := make([]Field, len(columnValues))
for j, values := range columnValues {
r := &row[j]
r.Name = strings.Clone(cs[j].name)
r.Value = strings.Clone(values[i])
}
pp.resultRowsLock.Lock()
pp.resultRows = append(pp.resultRows, row)
pp.resultRowsLock.Unlock()
}
}
func (pp *testPipeProcessor) flush() error {
return nil
}
func (pp *testPipeProcessor) expectRows(t *testing.T, expectedRows [][]Field) {
t.Helper()
if len(pp.resultRows) != len(expectedRows) {
t.Fatalf("unexpected number of rows; got %d; want %d\nrows got\n%s\nrows expected\n%s",
len(pp.resultRows), len(expectedRows), rowsToString(pp.resultRows), rowsToString(expectedRows))
}
sortTestRows(pp.resultRows)
sortTestRows(expectedRows)
for i, resultRow := range pp.resultRows {
expectedRow := expectedRows[i]
if len(resultRow) != len(expectedRow) {
t.Fatalf("unexpected number of fields at row #%d; got %d; want %d\nrow got\n%s\nrow expected\n%s",
i, len(resultRow), len(expectedRow), rowToString(resultRow), rowToString(expectedRow))
}
for j, resultField := range resultRow {
expectedField := expectedRow[j]
if resultField.Name != expectedField.Name {
t.Fatalf("unexpected field name at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
i, resultField.Name, expectedField.Name, rowToString(resultRow), rowToString(expectedRow))
}
if resultField.Value != expectedField.Value {
t.Fatalf("unexpected value for field %q at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
resultField.Name, i, resultField.Value, expectedField.Value, rowToString(resultRow), rowToString(expectedRow))
}
}
}
}
func sortTestRows(rows [][]Field) {
for _, row := range rows {
sortTestFields(row)
}
slices.SortFunc(rows, func(a, b []Field) int {
reverse := false
if len(a) > len(b) {
reverse = true
a, b = b, a
}
for i, fA := range a {
fB := b[i]
result := cmpTestFields(fA, fB)
if result == 0 {
continue
}
if reverse {
result = -result
}
return result
}
if len(a) == len(b) {
return 0
}
if reverse {
return 1
}
return -1
})
}
func sortTestFields(fields []Field) {
slices.SortFunc(fields, cmpTestFields)
}
func cmpTestFields(a, b Field) int {
if a.Name == b.Name {
if a.Value == b.Value {
return 0
}
if a.Value < b.Value {
return -1
}
return 1
}
if a.Name < b.Name {
return -1
}
return 1
}
func rowsToString(rows [][]Field) string {
a := make([]string, len(rows))
for i, row := range rows {
a[i] = rowToString(row)
}
return strings.Join(a, "\n")
}
func rowToString(row []Field) string {
a := make([]string, len(row))
for i, f := range row {
a[i] = f.String()
}
return "{" + strings.Join(a, ",") + "}"
}
func TestPipeUnpackJSONUpdateNeededFields(t *testing.T) { func TestPipeUnpackJSONUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) { f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper() t.Helper()

View file

@ -54,7 +54,25 @@ func (pu *pipeUnpackLogfmt) updateNeededFields(neededFields, unneededFields fiel
updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff, neededFields, unneededFields) updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff, neededFields, unneededFields)
} }
func (pu *pipeUnpackLogfmt) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pu *pipeUnpackLogfmt) optimize() {
pu.iff.optimizeFilterIn()
}
func (pu *pipeUnpackLogfmt) hasFilterInWithQuery() bool {
return pu.iff.hasFilterInWithQuery()
}
func (pu *pipeUnpackLogfmt) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
iffNew, err := pu.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
puNew := *pu
puNew.iff = iffNew
return &puNew, nil
}
func (pu *pipeUnpackLogfmt) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
unpackLogfmt := func(uctx *fieldsUnpackerContext, s string) { unpackLogfmt := func(uctx *fieldsUnpackerContext, s string) {
p := getLogfmtParser() p := getLogfmtParser()
@ -82,8 +100,7 @@ func (pu *pipeUnpackLogfmt) newPipeProcessor(workersCount int, _ <-chan struct{}
putLogfmtParser(p) putLogfmtParser(p)
} }
return newPipeUnpackProcessor(workersCount, unpackLogfmt, ppBase, pu.fromField, pu.resultPrefix, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff) return newPipeUnpackProcessor(workersCount, unpackLogfmt, ppNext, pu.fromField, pu.resultPrefix, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff)
} }
func parsePipeUnpackLogfmt(lex *lexer) (*pipeUnpackLogfmt, error) { func parsePipeUnpackLogfmt(lex *lexer) (*pipeUnpackLogfmt, error) {

View file

@ -151,7 +151,6 @@ func TestPipeUnpackLogfmt(t *testing.T) {
}, },
}, [][]Field{ }, [][]Field{
{ {
{"foo", ""},
{"_msg", `foo=bar baz="x y=z" a=b`}, {"_msg", `foo=bar baz="x y=z" a=b`},
}, },
}) })
@ -291,7 +290,6 @@ func TestPipeUnpackLogfmt(t *testing.T) {
{"y", `abc`}, {"y", `abc`},
}, },
{ {
{"y", ""},
{"z", `foobar`}, {"z", `foobar`},
{"x", `z=bar`}, {"x", `z=bar`},
}, },

View file

@ -0,0 +1,284 @@
package logstorage
import (
"fmt"
"slices"
"unsafe"
"github.com/valyala/fastjson"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
)
// pipeUnroll processes '| unroll ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#unroll-pipe
type pipeUnroll struct {
// fields to unroll
fields []string
// iff is an optional filter for skipping the unroll
iff *ifFilter
}
func (pu *pipeUnroll) String() string {
s := "unroll"
if pu.iff != nil {
s += " " + pu.iff.String()
}
s += " by (" + fieldNamesString(pu.fields) + ")"
return s
}
func (pu *pipeUnroll) optimize() {
pu.iff.optimizeFilterIn()
}
func (pu *pipeUnroll) hasFilterInWithQuery() bool {
return pu.iff.hasFilterInWithQuery()
}
func (pu *pipeUnroll) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
iffNew, err := pu.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
puNew := *pu
puNew.iff = iffNew
return &puNew, nil
}
func (pu *pipeUnroll) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
unneededFieldsCount := 0
for _, f := range pu.fields {
if unneededFields.contains(f) {
unneededFieldsCount++
}
}
if unneededFieldsCount < len(pu.fields) && pu.iff != nil {
unneededFields.removeFields(pu.iff.neededFields)
}
} else {
needIfFields := false
for _, f := range pu.fields {
if neededFields.contains(f) {
needIfFields = true
}
}
if needIfFields && pu.iff != nil {
neededFields.addFields(pu.iff.neededFields)
}
}
}
func (pu *pipeUnroll) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
return &pipeUnrollProcessor{
pu: pu,
ppNext: ppNext,
shards: make([]pipeUnrollProcessorShard, workersCount),
}
}
type pipeUnrollProcessor struct {
pu *pipeUnroll
ppNext pipeProcessor
shards []pipeUnrollProcessorShard
}
type pipeUnrollProcessorShard struct {
pipeUnrollProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeUnrollProcessorShardNopad{})%128]byte
}
type pipeUnrollProcessorShardNopad struct {
bm bitmap
wctx pipeUnpackWriteContext
a arena
columnValues [][]string
unrolledValues [][]string
valuesBuf []string
fields []Field
}
func (pup *pipeUnrollProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
pu := pup.pu
shard := &pup.shards[workerID]
shard.wctx.init(workerID, pup.ppNext, false, false, br)
bm := &shard.bm
bm.init(len(br.timestamps))
bm.setBits()
if iff := pu.iff; iff != nil {
iff.f.applyToBlockResult(br, bm)
if bm.isZero() {
pup.ppNext.writeBlock(workerID, br)
return
}
}
shard.columnValues = slicesutil.SetLength(shard.columnValues, len(pu.fields))
columnValues := shard.columnValues
for i, f := range pu.fields {
c := br.getColumnByName(f)
columnValues[i] = c.getValues(br)
}
fields := shard.fields
for rowIdx := range br.timestamps {
if bm.isSetBit(rowIdx) {
shard.writeUnrolledFields(br, pu.fields, columnValues, rowIdx)
} else {
fields = fields[:0]
for i, f := range pu.fields {
v := columnValues[i][rowIdx]
fields = append(fields, Field{
Name: f,
Value: v,
})
}
shard.wctx.writeRow(rowIdx, fields)
}
}
shard.wctx.flush()
shard.wctx.reset()
shard.a.reset()
}
func (shard *pipeUnrollProcessorShard) writeUnrolledFields(br *blockResult, fieldNames []string, columnValues [][]string, rowIdx int) {
// unroll values at rowIdx row
shard.unrolledValues = slicesutil.SetLength(shard.unrolledValues, len(columnValues))
unrolledValues := shard.unrolledValues
valuesBuf := shard.valuesBuf[:0]
for i, values := range columnValues {
v := values[rowIdx]
valuesBufLen := len(valuesBuf)
valuesBuf = unpackJSONArray(valuesBuf, &shard.a, v)
unrolledValues[i] = valuesBuf[valuesBufLen:]
}
shard.valuesBuf = valuesBuf
// find the number of rows across unrolled values
rows := len(unrolledValues[0])
for _, values := range unrolledValues[1:] {
if len(values) > rows {
rows = len(values)
}
}
if rows == 0 {
// Unroll too a single row with empty unrolled values.
rows = 1
}
// write unrolled values to the next pipe.
fields := shard.fields
for unrollIdx := 0; unrollIdx < rows; unrollIdx++ {
fields = fields[:0]
for i, values := range unrolledValues {
v := ""
if unrollIdx < len(values) {
v = values[unrollIdx]
}
fields = append(fields, Field{
Name: fieldNames[i],
Value: v,
})
}
shard.wctx.writeRow(rowIdx, fields)
}
}
func (pup *pipeUnrollProcessor) flush() error {
return nil
}
func parsePipeUnroll(lex *lexer) (*pipeUnroll, error) {
if !lex.isKeyword("unroll") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "unroll")
}
lex.nextToken()
// parse optional if (...)
var iff *ifFilter
if lex.isKeyword("if") {
f, err := parseIfFilter(lex)
if err != nil {
return nil, err
}
iff = f
}
// parse by (...)
if lex.isKeyword("by") {
lex.nextToken()
}
fields, err := parseFieldNamesInParens(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'by(...)' at 'unroll': %w", err)
}
if len(fields) == 0 {
return nil, fmt.Errorf("'by(...)' at 'unroll' must contain at least a single field")
}
if slices.Contains(fields, "*") {
return nil, fmt.Errorf("unroll by '*' isn't supported")
}
pu := &pipeUnroll{
fields: fields,
iff: iff,
}
return pu, nil
}
func unpackJSONArray(dst []string, a *arena, s string) []string {
if s == "" || s[0] != '[' {
return dst
}
p := jspp.Get()
defer jspp.Put(p)
jsv, err := p.Parse(s)
if err != nil {
return dst
}
jsa, err := jsv.Array()
if err != nil {
return dst
}
for _, jsv := range jsa {
if jsv.Type() == fastjson.TypeString {
sb, err := jsv.StringBytes()
if err != nil {
logger.Panicf("BUG: unexpected error returned from StringBytes(): %s", err)
}
v := a.copyBytesToString(sb)
dst = append(dst, v)
} else {
bLen := len(a.b)
a.b = jsv.MarshalTo(a.b)
v := bytesutil.ToUnsafeString(a.b[bLen:])
dst = append(dst, v)
}
}
return dst
}
var jspp fastjson.ParserPool

View file

@ -0,0 +1,261 @@
package logstorage
import (
"reflect"
"testing"
)
func TestParsePipeUnrollSuccess(t *testing.T) {
f := func(pipeStr string) {
t.Helper()
expectParsePipeSuccess(t, pipeStr)
}
f(`unroll by (foo)`)
f(`unroll if (x:y) by (foo, bar)`)
}
func TestParsePipeUrollFailure(t *testing.T) {
f := func(pipeStr string) {
t.Helper()
expectParsePipeFailure(t, pipeStr)
}
f(`unroll`)
f(`unroll by ()`)
f(`unroll by (*)`)
f(`unroll by (f, *)`)
f(`unroll by`)
f(`unroll (`)
f(`unroll by (foo) bar`)
f(`unroll by (x) if (a:b)`)
}
func TestPipeUnroll(t *testing.T) {
f := func(pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
expectPipeResults(t, pipeStr, rows, rowsExpected)
}
// unroll by missing field
f("unroll (x)", [][]Field{
{
{"a", `["foo",1,{"baz":"x"},[1,2],null,NaN]`},
{"q", "w"},
},
}, [][]Field{
{
{"a", `["foo",1,{"baz":"x"},[1,2],null,NaN]`},
{"q", "w"},
{"x", ""},
},
})
// unroll by field without JSON array
f("unroll (q)", [][]Field{
{
{"a", `["foo",1,{"baz":"x"},[1,2],null,NaN]`},
{"q", "w"},
},
}, [][]Field{
{
{"a", `["foo",1,{"baz":"x"},[1,2],null,NaN]`},
{"q", ""},
},
})
// unroll by a single field
f("unroll (a)", [][]Field{
{
{"a", `["foo",1,{"baz":"x"},[1,2],null,NaN]`},
{"q", "w"},
},
{
{"a", "b"},
{"c", "d"},
},
}, [][]Field{
{
{"a", "foo"},
{"q", "w"},
},
{
{"a", "1"},
{"q", "w"},
},
{
{"a", `{"baz":"x"}`},
{"q", "w"},
},
{
{"a", "[1,2]"},
{"q", "w"},
},
{
{"a", "null"},
{"q", "w"},
},
{
{"a", "NaN"},
{"q", "w"},
},
{
{"a", ""},
{"c", "d"},
},
})
// unroll by multiple fields
f("unroll by (timestamp, value)", [][]Field{
{
{"timestamp", "[1,2,3]"},
{"value", `["foo","bar","baz"]`},
{"other", "abc"},
{"x", "y"},
},
{
{"timestamp", "[1]"},
{"value", `["foo","bar"]`},
},
{
{"timestamp", "[1]"},
{"value", `bar`},
{"q", "w"},
},
}, [][]Field{
{
{"timestamp", "1"},
{"value", "foo"},
{"other", "abc"},
{"x", "y"},
},
{
{"timestamp", "2"},
{"value", "bar"},
{"other", "abc"},
{"x", "y"},
},
{
{"timestamp", "3"},
{"value", "baz"},
{"other", "abc"},
{"x", "y"},
},
{
{"timestamp", "1"},
{"value", "foo"},
},
{
{"timestamp", ""},
{"value", "bar"},
},
{
{"timestamp", "1"},
{"value", ""},
{"q", "w"},
},
})
// conditional unroll by missing field
f("unroll if (q:abc) (a)", [][]Field{
{
{"a", `asd`},
{"q", "w"},
},
{
{"a", `["foo",123]`},
{"q", "abc"},
},
}, [][]Field{
{
{"a", `asd`},
{"q", "w"},
},
{
{"a", "foo"},
{"q", "abc"},
},
{
{"a", "123"},
{"q", "abc"},
},
})
// unroll by non-existing field
f("unroll (a)", [][]Field{
{
{"a", `asd`},
{"q", "w"},
},
{
{"a", `["foo",123]`},
{"q", "abc"},
},
}, [][]Field{
{
{"a", ``},
{"q", "w"},
},
{
{"a", "foo"},
{"q", "abc"},
},
{
{"a", "123"},
{"q", "abc"},
},
})
}
func TestPipeUnrollUpdateNeededFields(t *testing.T) {
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
t.Helper()
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
}
// all the needed fields
f("unroll (x)", "*", "", "*", "")
f("unroll (x, y)", "*", "", "*", "")
f("unroll if (y:z) (a, b)", "*", "", "*", "")
// all the needed fields, unneeded fields do not intersect with src
f("unroll (x)", "*", "f1,f2", "*", "f1,f2")
f("unroll if (a:b) (x)", "*", "f1,f2", "*", "f1,f2")
f("unroll if (f1:b) (x)", "*", "f1,f2", "*", "f2")
// all the needed fields, unneeded fields intersect with src
f("unroll (x)", "*", "f2,x", "*", "f2,x")
f("unroll if (a:b) (x)", "*", "f2,x", "*", "f2,x")
f("unroll if (f2:b) (x)", "*", "f2,x", "*", "f2,x")
// needed fields do not intersect with src
f("unroll (x)", "f1,f2", "", "f1,f2", "")
f("unroll if (a:b) (x)", "f1,f2", "", "f1,f2", "")
// needed fields intersect with src
f("unroll (x)", "f2,x", "", "f2,x", "")
f("unroll if (a:b) (x)", "f2,x", "", "a,f2,x", "")
}
func TestUnpackJSONArray(t *testing.T) {
f := func(s string, resultExpected []string) {
t.Helper()
var a arena
result := unpackJSONArray(nil, &a, s)
if !reflect.DeepEqual(result, resultExpected) {
t.Fatalf("unexpected result for unpackJSONArray(%q)\ngot\n%q\nwant\n%q", s, result, resultExpected)
}
}
f("", nil)
f("123", nil)
f("foo", nil)
f(`"foo"`, nil)
f(`{"foo":"bar"}`, nil)
f(`[foo`, nil)
f(`[]`, nil)
f(`[1]`, []string{"1"})
f(`[1,"foo",["bar",12],{"baz":"x"},NaN,null]`, []string{"1", "foo", `["bar",12]`, `{"baz":"x"}`, "NaN", "null"})
}

View file

@ -0,0 +1,103 @@
package logstorage
import (
"unsafe"
)
func updateNeededFieldsForUpdatePipe(neededFields, unneededFields fieldsSet, field string, iff *ifFilter) {
if neededFields.contains("*") {
if !unneededFields.contains(field) && iff != nil {
unneededFields.removeFields(iff.neededFields)
}
} else {
if neededFields.contains(field) && iff != nil {
neededFields.addFields(iff.neededFields)
}
}
}
func newPipeUpdateProcessor(workersCount int, updateFunc func(a *arena, v string) string, ppNext pipeProcessor, field string, iff *ifFilter) pipeProcessor {
return &pipeUpdateProcessor{
updateFunc: updateFunc,
field: field,
iff: iff,
ppNext: ppNext,
shards: make([]pipeUpdateProcessorShard, workersCount),
}
}
type pipeUpdateProcessor struct {
updateFunc func(a *arena, v string) string
field string
iff *ifFilter
ppNext pipeProcessor
shards []pipeUpdateProcessorShard
}
type pipeUpdateProcessorShard struct {
pipeUpdateProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeUpdateProcessorShardNopad{})%128]byte
}
type pipeUpdateProcessorShardNopad struct {
bm bitmap
rc resultColumn
a arena
}
func (pup *pipeUpdateProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &pup.shards[workerID]
bm := &shard.bm
bm.init(len(br.timestamps))
bm.setBits()
if iff := pup.iff; iff != nil {
iff.f.applyToBlockResult(br, bm)
if bm.isZero() {
pup.ppNext.writeBlock(workerID, br)
return
}
}
shard.rc.name = pup.field
c := br.getColumnByName(pup.field)
values := c.getValues(br)
hadUpdates := false
vPrev := ""
for rowIdx, v := range values {
if bm.isSetBit(rowIdx) {
if !hadUpdates || vPrev != v {
vPrev = v
hadUpdates = true
v = pup.updateFunc(&shard.a, v)
}
}
shard.rc.addValue(v)
}
br.addResultColumn(&shard.rc)
pup.ppNext.writeBlock(workerID, br)
shard.rc.reset()
shard.a.reset()
}
func (pup *pipeUpdateProcessor) flush() error {
return nil
}

View file

@ -0,0 +1,224 @@
package logstorage
import (
"math/rand"
"slices"
"strings"
"sync"
"testing"
)
func expectPipeResults(t *testing.T, pipeStr string, rows, rowsExpected [][]Field) {
t.Helper()
lex := newLexer(pipeStr)
p, err := parsePipe(lex)
if err != nil {
t.Fatalf("unexpected error when parsing %q: %s", pipeStr, err)
}
workersCount := 5
stopCh := make(chan struct{})
cancel := func() {}
ppTest := newTestPipeProcessor()
pp := p.newPipeProcessor(workersCount, stopCh, cancel, ppTest)
brw := newTestBlockResultWriter(workersCount, pp)
for _, row := range rows {
brw.writeRow(row)
}
brw.flush()
pp.flush()
ppTest.expectRows(t, rowsExpected)
}
func newTestBlockResultWriter(workersCount int, ppNext pipeProcessor) *testBlockResultWriter {
return &testBlockResultWriter{
workersCount: workersCount,
ppNext: ppNext,
}
}
type testBlockResultWriter struct {
workersCount int
ppNext pipeProcessor
rcs []resultColumn
br blockResult
rowsCount int
}
func (brw *testBlockResultWriter) writeRow(row []Field) {
if !brw.areSameFields(row) {
brw.flush()
brw.rcs = brw.rcs[:0]
for _, field := range row {
brw.rcs = appendResultColumnWithName(brw.rcs, field.Name)
}
}
for i, field := range row {
brw.rcs[i].addValue(field.Value)
}
brw.rowsCount++
if rand.Intn(5) == 0 {
brw.flush()
}
}
func (brw *testBlockResultWriter) areSameFields(row []Field) bool {
if len(brw.rcs) != len(row) {
return false
}
for i, rc := range brw.rcs {
if rc.name != row[i].Name {
return false
}
}
return true
}
func (brw *testBlockResultWriter) flush() {
brw.br.setResultColumns(brw.rcs, brw.rowsCount)
brw.rowsCount = 0
workerID := rand.Intn(brw.workersCount)
brw.ppNext.writeBlock(uint(workerID), &brw.br)
brw.br.reset()
for i := range brw.rcs {
brw.rcs[i].resetValues()
}
}
func newTestPipeProcessor() *testPipeProcessor {
return &testPipeProcessor{}
}
type testPipeProcessor struct {
resultRowsLock sync.Mutex
resultRows [][]Field
}
func (pp *testPipeProcessor) writeBlock(_ uint, br *blockResult) {
cs := br.getColumns()
var columnValues [][]string
for _, c := range cs {
values := c.getValues(br)
columnValues = append(columnValues, values)
}
for i := range br.timestamps {
row := make([]Field, len(columnValues))
for j, values := range columnValues {
r := &row[j]
r.Name = strings.Clone(cs[j].name)
r.Value = strings.Clone(values[i])
}
pp.resultRowsLock.Lock()
pp.resultRows = append(pp.resultRows, row)
pp.resultRowsLock.Unlock()
}
}
func (pp *testPipeProcessor) flush() error {
return nil
}
func (pp *testPipeProcessor) expectRows(t *testing.T, expectedRows [][]Field) {
t.Helper()
if len(pp.resultRows) != len(expectedRows) {
t.Fatalf("unexpected number of rows; got %d; want %d\nrows got\n%s\nrows expected\n%s",
len(pp.resultRows), len(expectedRows), rowsToString(pp.resultRows), rowsToString(expectedRows))
}
sortTestRows(pp.resultRows)
sortTestRows(expectedRows)
for i, resultRow := range pp.resultRows {
expectedRow := expectedRows[i]
if len(resultRow) != len(expectedRow) {
t.Fatalf("unexpected number of fields at row #%d; got %d; want %d\nrow got\n%s\nrow expected\n%s",
i, len(resultRow), len(expectedRow), rowToString(resultRow), rowToString(expectedRow))
}
for j, resultField := range resultRow {
expectedField := expectedRow[j]
if resultField.Name != expectedField.Name {
t.Fatalf("unexpected field name at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
i, resultField.Name, expectedField.Name, rowToString(resultRow), rowToString(expectedRow))
}
if resultField.Value != expectedField.Value {
t.Fatalf("unexpected value for field %q at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
resultField.Name, i, resultField.Value, expectedField.Value, rowToString(resultRow), rowToString(expectedRow))
}
}
}
}
func sortTestRows(rows [][]Field) {
for _, row := range rows {
sortTestFields(row)
}
slices.SortFunc(rows, func(a, b []Field) int {
reverse := false
if len(a) > len(b) {
reverse = true
a, b = b, a
}
for i, fA := range a {
fB := b[i]
result := cmpTestFields(fA, fB)
if result == 0 {
continue
}
if reverse {
result = -result
}
return result
}
if len(a) == len(b) {
return 0
}
if reverse {
return 1
}
return -1
})
}
func sortTestFields(fields []Field) {
slices.SortFunc(fields, cmpTestFields)
}
func cmpTestFields(a, b Field) int {
if a.Name == b.Name {
if a.Value == b.Value {
return 0
}
if a.Value < b.Value {
return -1
}
return 1
}
if a.Name < b.Name {
return -1
}
return 1
}
func rowsToString(rows [][]Field) string {
a := make([]string, len(rows))
for i, row := range rows {
a[i] = rowToString(row)
}
return strings.Join(a, "\n")
}
func rowToString(row []Field) string {
a := make([]string, len(row))
for i, f := range row {
a[i] = f.String()
}
return "{" + strings.Join(a, ",") + "}"
}

View file

@ -275,7 +275,7 @@ func TestStatsFieldsMax(t *testing.T) {
{ {
{"a", "1"}, {"a", "1"},
{"b", ""}, {"b", ""},
{"x", `{"_msg":"def","a":"1","c":"foo","b":""}`}, {"x", `{"_msg":"def","a":"1","c":"foo"}`},
}, },
{ {
{"a", "3"}, {"a", "3"},

View file

@ -274,7 +274,7 @@ func TestStatsFieldsMin(t *testing.T) {
{ {
{"a", "1"}, {"a", "1"},
{"b", ""}, {"b", ""},
{"x", `{"_msg":"def","a":"1","c":"foo","b":""}`}, {"x", `{"_msg":"def","a":"1","c":"foo"}`},
}, },
{ {
{"a", "3"}, {"a", "3"},

View file

@ -229,7 +229,7 @@ func (s *Storage) getFieldValuesNoHits(ctx context.Context, tenantIDs []TenantID
func (s *Storage) GetFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]ValueWithHits, error) { func (s *Storage) GetFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]ValueWithHits, error) {
pipes := append([]pipe{}, q.pipes...) pipes := append([]pipe{}, q.pipes...)
quotedFieldName := quoteTokenIfNeeded(fieldName) quotedFieldName := quoteTokenIfNeeded(fieldName)
pipeStr := fmt.Sprintf("uniq by (%s) hits limit %d", quotedFieldName, limit) pipeStr := fmt.Sprintf("uniq by (%s) with hits limit %d", quotedFieldName, limit)
lex := newLexer(pipeStr) lex := newLexer(pipeStr)
pu, err := parsePipeUniq(lex) pu, err := parsePipeUniq(lex)
@ -288,18 +288,18 @@ func sortValuesWithHits(results []ValueWithHits) {
}) })
} }
// GetStreamLabelNames returns stream label names from q results for the given tenantIDs. // GetStreamFieldNames returns stream field names from q results for the given tenantIDs.
func (s *Storage) GetStreamLabelNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) { func (s *Storage) GetStreamFieldNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) {
streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64) streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
if err != nil { if err != nil {
return nil, err return nil, err
} }
m := make(map[string]*uint64) m := make(map[string]*uint64)
forEachStreamLabel(streams, func(label Field, hits uint64) { forEachStreamField(streams, func(f Field, hits uint64) {
pHits, ok := m[label.Name] pHits, ok := m[f.Name]
if !ok { if !ok {
nameCopy := strings.Clone(label.Name) nameCopy := strings.Clone(f.Name)
hitsLocal := uint64(0) hitsLocal := uint64(0)
pHits = &hitsLocal pHits = &hitsLocal
m[nameCopy] = pHits m[nameCopy] = pHits
@ -310,23 +310,23 @@ func (s *Storage) GetStreamLabelNames(ctx context.Context, tenantIDs []TenantID,
return names, nil return names, nil
} }
// GetStreamLabelValues returns stream label values for the given labelName from q results for the given tenantIDs. // GetStreamFieldValues returns stream field values for the given fieldName from q results for the given tenantIDs.
// //
// If limit > 9, then up to limit unique label values are returned. // If limit > 9, then up to limit unique values are returned.
func (s *Storage) GetStreamLabelValues(ctx context.Context, tenantIDs []TenantID, q *Query, labelName string, limit uint64) ([]ValueWithHits, error) { func (s *Storage) GetStreamFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]ValueWithHits, error) {
streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64) streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
if err != nil { if err != nil {
return nil, err return nil, err
} }
m := make(map[string]*uint64) m := make(map[string]*uint64)
forEachStreamLabel(streams, func(label Field, hits uint64) { forEachStreamField(streams, func(f Field, hits uint64) {
if label.Name != labelName { if f.Name != fieldName {
return return
} }
pHits, ok := m[label.Value] pHits, ok := m[f.Value]
if !ok { if !ok {
valueCopy := strings.Clone(label.Value) valueCopy := strings.Clone(f.Value)
hitsLocal := uint64(0) hitsLocal := uint64(0)
pHits = &hitsLocal pHits = &hitsLocal
m[valueCopy] = pHits m[valueCopy] = pHits
@ -429,35 +429,10 @@ func hasFilterInWithQueryForFilter(f filter) bool {
func hasFilterInWithQueryForPipes(pipes []pipe) bool { func hasFilterInWithQueryForPipes(pipes []pipe) bool {
for _, p := range pipes { for _, p := range pipes {
switch t := p.(type) { if p.hasFilterInWithQuery() {
case *pipeStats:
for _, f := range t.funcs {
if f.iff.hasFilterInWithQuery() {
return true return true
} }
} }
case *pipeReplace:
if t.iff.hasFilterInWithQuery() {
return true
}
case *pipeFormat:
if t.iff.hasFilterInWithQuery() {
return true
}
case *pipeExtract:
if t.iff.hasFilterInWithQuery() {
return true
}
case *pipeUnpackJSON:
if t.iff.hasFilterInWithQuery() {
return true
}
case *pipeUnpackLogfmt:
if t.iff.hasFilterInWithQuery() {
return true
}
}
}
return false return false
} }
@ -514,64 +489,11 @@ func initFilterInValuesForFilter(cache map[string][]string, f filter, getFieldVa
func initFilterInValuesForPipes(cache map[string][]string, pipes []pipe, getFieldValuesFunc getFieldValuesFunc) ([]pipe, error) { func initFilterInValuesForPipes(cache map[string][]string, pipes []pipe, getFieldValuesFunc getFieldValuesFunc) ([]pipe, error) {
pipesNew := make([]pipe, len(pipes)) pipesNew := make([]pipe, len(pipes))
for i, p := range pipes { for i, p := range pipes {
switch t := p.(type) { pNew, err := p.initFilterInValues(cache, getFieldValuesFunc)
case *pipeStats:
funcsNew := make([]pipeStatsFunc, len(t.funcs))
for j, f := range t.funcs {
iffNew, err := f.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil { if err != nil {
return nil, err return nil, err
} }
f.iff = iffNew pipesNew[i] = pNew
funcsNew[j] = f
}
pipesNew[i] = &pipeStats{
byFields: t.byFields,
funcs: funcsNew,
}
case *pipeReplace:
iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
pr := *t
pr.iff = iffNew
pipesNew[i] = &pr
case *pipeFormat:
iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
pf := *t
pf.iff = iffNew
pipesNew[i] = &pf
case *pipeExtract:
iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
pe := *t
pe.iff = iffNew
pipesNew[i] = &pe
case *pipeUnpackJSON:
iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
pu := *t
pu.iff = iffNew
pipesNew[i] = &pu
case *pipeUnpackLogfmt:
iffNew, err := t.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
pu := *t
pu.iff = iffNew
pipesNew[i] = &pu
default:
pipesNew[i] = p
}
} }
return pipesNew, nil return pipesNew, nil
} }
@ -1099,22 +1021,22 @@ func getFilterTimeRange(f filter) (int64, int64) {
return math.MinInt64, math.MaxInt64 return math.MinInt64, math.MaxInt64
} }
func forEachStreamLabel(streams []ValueWithHits, f func(label Field, hits uint64)) { func forEachStreamField(streams []ValueWithHits, f func(f Field, hits uint64)) {
var labels []Field var fields []Field
for i := range streams { for i := range streams {
var err error var err error
labels, err = parseStreamLabels(labels[:0], streams[i].Value) fields, err = parseStreamFields(fields[:0], streams[i].Value)
if err != nil { if err != nil {
continue continue
} }
hits := streams[i].Hits hits := streams[i].Hits
for j := range labels { for j := range fields {
f(labels[j], hits) f(fields[j], hits)
} }
} }
} }
func parseStreamLabels(dst []Field, s string) ([]Field, error) { func parseStreamFields(dst []Field, s string) ([]Field, error) {
if len(s) == 0 || s[0] != '{' { if len(s) == 0 || s[0] != '{' {
return dst, fmt.Errorf("missing '{' at the beginning of stream name") return dst, fmt.Errorf("missing '{' at the beginning of stream name")
} }
@ -1130,14 +1052,14 @@ func parseStreamLabels(dst []Field, s string) ([]Field, error) {
for { for {
n := strings.Index(s, `="`) n := strings.Index(s, `="`)
if n < 0 { if n < 0 {
return dst, fmt.Errorf("cannot find label value in double quotes at [%s]", s) return dst, fmt.Errorf("cannot find field value in double quotes at [%s]", s)
} }
name := s[:n] name := s[:n]
s = s[n+1:] s = s[n+1:]
value, nOffset := tryUnquoteString(s, "") value, nOffset := tryUnquoteString(s, "")
if nOffset < 0 { if nOffset < 0 {
return dst, fmt.Errorf("cannot find parse label value in double quotes at [%s]", s) return dst, fmt.Errorf("cannot find parse field value in double quotes at [%s]", s)
} }
s = s[nOffset:] s = s[nOffset:]

View file

@ -650,11 +650,11 @@ func TestStorageSearch(t *testing.T) {
fs.MustRemoveAll(path) fs.MustRemoveAll(path)
} }
func TestParseStreamLabelsSuccess(t *testing.T) { func TestParseStreamFieldsSuccess(t *testing.T) {
f := func(s, resultExpected string) { f := func(s, resultExpected string) {
t.Helper() t.Helper()
labels, err := parseStreamLabels(nil, s) labels, err := parseStreamFields(nil, s)
if err != nil { if err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }