From e93a71d741c68607c545dcc703891e0066fff82d Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sat, 25 May 2024 11:59:47 +0200 Subject: [PATCH] wip --- app/vlselect/logsql/hits_response.qtpl | 4 +- app/vlselect/logsql/hits_response.qtpl.go | 12 ++-- app/vlselect/logsql/logsql.go | 34 +++++------ app/vlselect/main.go | 16 ++--- app/vlstorage/main.go | 14 ++--- docs/VictoriaLogs/CHANGELOG.md | 4 +- docs/VictoriaLogs/keyConcepts.md | 74 ++++++++++++----------- docs/VictoriaLogs/querying/README.md | 50 +++++++-------- lib/logstorage/storage_search.go | 40 ++++++------ lib/logstorage/storage_search_test.go | 4 +- 10 files changed, 129 insertions(+), 123 deletions(-) diff --git a/app/vlselect/logsql/hits_response.qtpl b/app/vlselect/logsql/hits_response.qtpl index f9976f7ab..32629cc3b 100644 --- a/app/vlselect/logsql/hits_response.qtpl +++ b/app/vlselect/logsql/hits_response.qtpl @@ -6,8 +6,8 @@ {% stripspace %} -// LabelsForHits formats labels for /select/logsql/hits response -{% func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) %} +// FieldsForHits formats labels for /select/logsql/hits response +{% func FieldsForHits(columns []logstorage.BlockColumn, rowIdx int) %} { {% if len(columns) > 0 %} {%q= columns[0].Name %}:{%q= columns[0].Values[rowIdx] %} diff --git a/app/vlselect/logsql/hits_response.qtpl.go b/app/vlselect/logsql/hits_response.qtpl.go index a0d463952..cbb0d9ee9 100644 --- a/app/vlselect/logsql/hits_response.qtpl.go +++ b/app/vlselect/logsql/hits_response.qtpl.go @@ -11,7 +11,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" ) -// LabelsForHits formats labels for /select/logsql/hits response +// FieldsForHits formats labels for /select/logsql/hits response //line app/vlselect/logsql/hits_response.qtpl:10 import ( @@ -27,7 +27,7 @@ var ( ) //line app/vlselect/logsql/hits_response.qtpl:10 -func StreamLabelsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) { +func StreamFieldsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) { //line app/vlselect/logsql/hits_response.qtpl:10 qw422016.N().S(`{`) //line app/vlselect/logsql/hits_response.qtpl:12 @@ -58,22 +58,22 @@ func StreamLabelsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockCo } //line app/vlselect/logsql/hits_response.qtpl:19 -func WriteLabelsForHits(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) { +func WriteFieldsForHits(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) { //line app/vlselect/logsql/hits_response.qtpl:19 qw422016 := qt422016.AcquireWriter(qq422016) //line app/vlselect/logsql/hits_response.qtpl:19 - StreamLabelsForHits(qw422016, columns, rowIdx) + StreamFieldsForHits(qw422016, columns, rowIdx) //line app/vlselect/logsql/hits_response.qtpl:19 qt422016.ReleaseWriter(qw422016) //line app/vlselect/logsql/hits_response.qtpl:19 } //line app/vlselect/logsql/hits_response.qtpl:19 -func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) string { +func FieldsForHits(columns []logstorage.BlockColumn, rowIdx int) string { //line app/vlselect/logsql/hits_response.qtpl:19 qb422016 := qt422016.AcquireByteBuffer() //line app/vlselect/logsql/hits_response.qtpl:19 - WriteLabelsForHits(qb422016, columns, rowIdx) + WriteFieldsForHits(qb422016, columns, rowIdx) //line app/vlselect/logsql/hits_response.qtpl:19 qs422016 := string(qb422016.B) //line app/vlselect/logsql/hits_response.qtpl:19 diff --git a/app/vlselect/logsql/logsql.go b/app/vlselect/logsql/logsql.go index 2f50f825e..fc3f65bba 100644 --- a/app/vlselect/logsql/logsql.go +++ b/app/vlselect/logsql/logsql.go @@ -77,7 +77,7 @@ func ProcessHitsRequest(ctx context.Context, w http.ResponseWriter, r *http.Requ hitsStr := strings.Clone(hitsValues[i]) bb.Reset() - WriteLabelsForHits(bb, columns, i) + WriteFieldsForHits(bb, columns, i) mLock.Lock() hs, ok := m[string(bb.B)] @@ -189,21 +189,21 @@ func ProcessFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *ht WriteValuesWithHitsJSON(w, values) } -// ProcessStreamLabelNamesRequest processes /select/logsql/stream_label_names request. +// ProcessStreamFieldNamesRequest processes /select/logsql/stream_field_names request. // -// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-names -func ProcessStreamLabelNamesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) { +// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-names +func ProcessStreamFieldNamesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) { q, tenantIDs, err := parseCommonArgs(r) if err != nil { httpserver.Errorf(w, r, "%s", err) return } - // Obtain stream label names for the given query + // Obtain stream field names for the given query q.Optimize() - names, err := vlstorage.GetStreamLabelNames(ctx, tenantIDs, q) + names, err := vlstorage.GetStreamFieldNames(ctx, tenantIDs, q) if err != nil { - httpserver.Errorf(w, r, "cannot obtain stream label names: %s", err) + httpserver.Errorf(w, r, "cannot obtain stream field names: %s", err) } // Write results @@ -211,20 +211,20 @@ func ProcessStreamLabelNamesRequest(ctx context.Context, w http.ResponseWriter, WriteValuesWithHitsJSON(w, names) } -// ProcessStreamLabelValuesRequest processes /select/logsql/stream_label_values request. +// ProcessStreamFieldValuesRequest processes /select/logsql/stream_field_values request. // -// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-values -func ProcessStreamLabelValuesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) { +// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-values +func ProcessStreamFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) { q, tenantIDs, err := parseCommonArgs(r) if err != nil { httpserver.Errorf(w, r, "%s", err) return } - // Parse labelName query arg - labelName := r.FormValue("label") - if labelName == "" { - httpserver.Errorf(w, r, "missing 'label' query arg") + // Parse fieldName query arg + fieldName := r.FormValue("field") + if fieldName == "" { + httpserver.Errorf(w, r, "missing 'field' query arg") return } @@ -238,11 +238,11 @@ func ProcessStreamLabelValuesRequest(ctx context.Context, w http.ResponseWriter, limit = 0 } - // Obtain stream label names for the given query + // Obtain stream field values for the given query and the given fieldName q.Optimize() - values, err := vlstorage.GetStreamLabelValues(ctx, tenantIDs, q, labelName, uint64(limit)) + values, err := vlstorage.GetStreamFieldValues(ctx, tenantIDs, q, fieldName, uint64(limit)) if err != nil { - httpserver.Errorf(w, r, "cannot obtain stream label values: %s", err) + httpserver.Errorf(w, r, "cannot obtain stream field values: %s", err) } // Write results diff --git a/app/vlselect/main.go b/app/vlselect/main.go index 988d9fe1e..e43ff42be 100644 --- a/app/vlselect/main.go +++ b/app/vlselect/main.go @@ -157,13 +157,13 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool { logsqlQueryRequests.Inc() logsql.ProcessQueryRequest(ctx, w, r) return true - case "/select/logsql/stream_label_names": - logsqlStreamLabelNamesRequests.Inc() - logsql.ProcessStreamLabelNamesRequest(ctx, w, r) + case "/select/logsql/stream_field_names": + logsqlStreamFieldNamesRequests.Inc() + logsql.ProcessStreamFieldNamesRequest(ctx, w, r) return true - case "/select/logsql/stream_label_values": - logsqlStreamLabelValuesRequests.Inc() - logsql.ProcessStreamLabelValuesRequest(ctx, w, r) + case "/select/logsql/stream_field_values": + logsqlStreamFieldValuesRequests.Inc() + logsql.ProcessStreamFieldValuesRequest(ctx, w, r) return true case "/select/logsql/streams": logsqlStreamsRequests.Inc() @@ -192,7 +192,7 @@ var ( logsqlFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_values"}`) logsqlHitsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/hits"}`) logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`) - logsqlStreamLabelNamesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_names"}`) - logsqlStreamLabelValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_values"}`) + logsqlStreamFieldNamesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_field_names"}`) + logsqlStreamFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_field_values"}`) logsqlStreamsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/streams"}`) ) diff --git a/app/vlstorage/main.go b/app/vlstorage/main.go index 3f41ddcb8..6ea43a805 100644 --- a/app/vlstorage/main.go +++ b/app/vlstorage/main.go @@ -123,16 +123,16 @@ func GetFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *log return strg.GetFieldValues(ctx, tenantIDs, q, fieldName, limit) } -// GetStreamLabelNames executes q and returns stream labels names seen in results. -func GetStreamLabelNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]logstorage.ValueWithHits, error) { - return strg.GetStreamLabelNames(ctx, tenantIDs, q) +// GetStreamFieldNames executes q and returns stream field names seen in results. +func GetStreamFieldNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]logstorage.ValueWithHits, error) { + return strg.GetStreamFieldNames(ctx, tenantIDs, q) } -// GetStreamLabelValues executes q and returns stream label values for the given labelName seen in results. +// GetStreamFieldValues executes q and returns stream field values for the given fieldName seen in results. // -// If limit > 0, then up to limit unique stream label values are returned. -func GetStreamLabelValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, labelName string, limit uint64) ([]logstorage.ValueWithHits, error) { - return strg.GetStreamLabelValues(ctx, tenantIDs, q, labelName, limit) +// If limit > 0, then up to limit unique stream field values are returned. +func GetStreamFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, fieldName string, limit uint64) ([]logstorage.ValueWithHits, error) { + return strg.GetStreamFieldValues(ctx, tenantIDs, q, fieldName, limit) } // GetStreams executes q and returns streams seen in query results. diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 985a5aebe..109a1d5b7 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -63,8 +63,8 @@ Released at 2024-05-22 * FEATURE: add ability to unpack [logfmt](https://brandur.org/logfmt) fields with [`unpack_logfmt` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-unpack_logfmt). * FEATURE: add [`fields_min`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_min-stats) and [`fields_max`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_max-stats) functions for [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe), which allow returning all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for the log entry with the minimum / maximum value at the given field. * FEATURE: add `/select/logsql/streams` HTTP endpoint for returning [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-streams) for details. -* FEATURE: add `/select/logsql/stream_label_names` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-names) for details. -* FEATURE: add `/select/logsql/stream_label_values` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values for the given label from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-values) for details. +* FEATURE: add `/select/logsql/stream_field_names` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-names) for details. +* FEATURE: add `/select/logsql/stream_field_values` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values for the given label from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-values) for details. * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): change time range limitation from `_time` in the expression to `start` and `end` query args. * BUGFIX: fix `invalid memory address or nil pointer dereference` panic when using [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe), [`unpack_json`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe) or [`unpack_logfmt`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) pipes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6306). diff --git a/docs/VictoriaLogs/keyConcepts.md b/docs/VictoriaLogs/keyConcepts.md index 4cdc76099..c2cacfd08 100644 --- a/docs/VictoriaLogs/keyConcepts.md +++ b/docs/VictoriaLogs/keyConcepts.md @@ -74,14 +74,14 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges } ``` -Both label name and label value may contain arbitrary chars. Such chars must be encoded +Both field name and field value may contain arbitrary chars. Such chars must be encoded during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) according to [JSON string encoding](https://www.rfc-editor.org/rfc/rfc7159.html#section-7). Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding: ```json { - "label with whitepsace": "value\nwith\nnewlines", + "field with whitepsace": "value\nwith\nnewlines", "Поле": "价值", } ``` @@ -89,13 +89,11 @@ Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs. This enables [full-text search](https://docs.victoriametrics.com/victorialogs/logsql/) across all the fields. -VictoriaLogs supports the following field types: +VictoriaLogs supports the following special fields additionally to arbitrary [other fields](#other-field): * [`_msg` field](#message-field) * [`_time` field](#time-field) * [`_stream` fields](#stream-fields) -* [other fields](#other-fields) - ### Message field @@ -116,7 +114,9 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges ### Time field The ingested [log entries](#data-model) may contain `_time` field with the timestamp of the ingested log entry. -For example: +The timestamp must be in [RFC3339](https://www.rfc-editor.org/rfc/rfc3339) format. The most commonly used subset of [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) +is also supported. It is allowed specifying seconds part of the timestamp with any precision up to nanoseconds. +For example, the following [log entry](#data-model) contains valid timestamp with millisecond precision in the `_time` field: ```json { @@ -132,29 +132,39 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges If `_time` field is missing, then the data ingestion time is used as log entry timestamp. -The log entry timestamp allows quickly narrowing down the search to a particular time range. -See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) for details. +The `_time` field is used in [time filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) for quickly narrowing down +the search to a particular time range. ### Stream fields Some [structured logging](#data-model) fields may uniquely identify the application instance, which generates log entries. -This may be either a single field such as `instance=host123:456` or a set of fields such as -`(datacenter=..., env=..., job=..., instance=...)` or -`(kubernetes.namespace=..., kubernetes.node.name=..., kubernetes.pod.name=..., kubernetes.container.name=...)`. +This may be either a single field such as `instance="host123:456"` or a set of fields such as +`{datacenter="...", env="...", job="...", instance="..."}` or +`{kubernetes.namespace="...", kubernetes.node.name="...", kubernetes.pod.name="...", kubernetes.container.name="..."}`. -Log entries received from a single application instance form a log stream in VictoriaLogs. -VictoriaLogs optimizes storing and querying of individual log streams. This provides the following benefits: +Log entries received from a single application instance form a **log stream** in VictoriaLogs. +VictoriaLogs optimizes storing and [querying](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter) of individual log streams. +This provides the following benefits: - Reduced disk space usage, since a log stream from a single application instance is usually compressed better than a mixed log stream from multiple distinct applications. - Increased query performance, since VictoriaLogs needs to scan lower amounts of data - when [searching by stream labels](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter). + when [searching by stream fields](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter). -VictoriaLogs cannot determine automatically, which fields uniquely identify every log stream, -so it stores all the received log entries in a single default stream - `{}`. -This may lead to not-so-optimal resource usage and query performance. +Every ingested log entry is associated with a log stream. The name of this stream is stored in `_stream` field. +This field has the format similar to [labels in Prometheus metrics](https://docs.victoriametrics.com/keyconcepts/#labels): +``` +{field1="value1", ..., fieldN="valueN"} +``` + +For example, if `host` and `app` fields are associated with the stream, then the `_stream` field will have `{host="host-123",app="my-app"}` value +for the log entry with `host="host-123"` and `app="my-app"` fields. The `_stream` field can be searched +with [stream filters](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter). + +By default the value of `_stream` field is `{}`, since VictoriaLogs cannot determine automatically, +which fields uniquely identify every log stream. This may lead to not-so-optimal resource usage and query performance. Therefore it is recommended specifying stream-level fields via `_stream_fields` query arg during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/). For example, if logs from Kubernetes containers have the following fields: @@ -175,20 +185,17 @@ per-container logs into distinct streams. #### How to determine which fields must be associated with log streams? -[Log streams](#stream-fields) can be associated with fields, which simultaneously meet the following conditions: +[Log streams](#stream-fields) must contain [fields](#data-model), which uniquely identify the application instance, which generates logs. +For example, `container`, `instance` and `host` are good candidates for stream fields. -- Fields, which remain constant across log entries received from a single application instance. -- Fields, which uniquely identify the application instance. For example, `instance`, `host`, `container`, etc. +Additional fields may be added to log streams if they **remain constant during application instance lifetime**. +For example, `namespace`, `node`, `pod` and `job` are good candidates for additional stream fields. Adding such fields to log streams +makes sense if you are going to use these fields during search and want speeding up it with [stream filters](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter). -Sometimes a single application instance may generate multiple log streams and store them into distinct log files. -In this case it is OK to associate the log stream with filepath fields such as `log.file.path` additionally to instance-specific fields. +There is **no need to add all the constant fields to log streams**, since this may increase resource usage during data ingestion and querying. -Structured logs may contain big number of fields, which do not change across log entries received from a single application instance. -There is no need in associating all these fields with log stream - it is enough to associate only those fields, which uniquely identify -the application instance across all the ingested logs. Additionally, some fields such as `datacenter`, `environment`, `namespace`, `job` or `app`, -can be associated with log stream in order to optimize searching by these fields with [stream filtering](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter). - -Never associate log streams with fields, which may change across log entries of the same application instance. See [these docs](#high-cardinality) for details. +**Never add non-nonstant fields to streams if these fields may change with every log entry of the same stream**. +For example, `ip`, `user_id` and `trace_id` **must never be associated with log streams**, since this may lead to [high cardinality issues](#high-cardinality). #### High cardinality @@ -196,8 +203,7 @@ Some fields in the [ingested logs](#data-model) may contain big number of unique For example, fields with names such as `ip`, `user_id` or `trace_id` tend to contain big number of unique values. VictoriaLogs works perfectly with such fields unless they are associated with [log streams](#stream-fields). -Never associate high-cardinality fields with [log streams](#stream-fields), since this may result -to the following issues: +**Never** associate high-cardinality fields with [log streams](#stream-fields), since this may lead to the following issues: - Performance degradation during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) and [querying](https://docs.victoriametrics.com/victorialogs/querying/) @@ -214,9 +220,9 @@ This can help narrowing down and eliminating high-cardinality fields from [log s ### Other fields -The rest of [structured logging](#data-model) fields are optional. They can be used for simplifying and optimizing search queries. -For example, it is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long log message. -E.g. the `trace_id:XXXX-YYYY-ZZZZ` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query. +Every ingested log entry may contain arbitrary number of [fields](#data-model) additionally to [`_msg`](#message-field) and [`_time`](#time-field). +For example, `level`, `ip`, `user_id`, `trace_id`, etc. Such fields can be used for simplifying and optimizing [search queries](#https://docs.victoriametrics.com/victorialogs/logsql/). +It is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long [log message](#message-field). +E.g. the `trace_id:="XXXX-YYYY-ZZZZ"` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query. See [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/) for more details. - diff --git a/docs/VictoriaLogs/querying/README.md b/docs/VictoriaLogs/querying/README.md index 4c4d6a2bd..bb6dc7adf 100644 --- a/docs/VictoriaLogs/querying/README.md +++ b/docs/VictoriaLogs/querying/README.md @@ -28,8 +28,8 @@ VictoriaLogs provides the following HTTP endpoints: - [`/select/logsql/query`](#querying-logs) for querying logs - [`/select/logsql/hits`](#querying-hits-stats) for querying log hits stats over the given time range - [`/select/logsql/streams`](#querying-streams) for querying [log streams](#https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) -- [`/select/logsql/stream_label_names`](#querying-stream-label-names) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names -- [`/select/logsql/stream_label_values`](#querying-stream-label-values) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values +- [`/select/logsql/stream_field_names`](#querying-stream-field-names) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names +- [`/select/logsql/stream_field_values`](#querying-stream-field-values) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values - [`/select/logsql/field_names`](#querying-field-names) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names. - [`/select/logsql/field_values`](#querying-field-values) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values. @@ -107,7 +107,7 @@ with `vl_http_requests_total{path="/select/logsql/query"}` metric. ### Querying hits stats VictoriaMetrics provides `/select/logsql/hits?query=&start=&end=&step=` HTTP endpoint, which returns the number -of matching log entries for the given `` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` +of matching log entries for the given [``](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` time range grouped by `` buckets. The returned results are sorted by time. The `` and `` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). @@ -210,7 +210,7 @@ See also: ### Querying streams VictoriaLogs provides `/select/logsql/streams?query=&start=&end=` HTTP endpoint, which returns [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) -from results of the given `` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` time range. +from results of the given [``](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` time range. The response also contains the number of log results per every `stream`. The `` and `` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). @@ -254,22 +254,22 @@ See also: - [Querying hits stats](#querying-hits-stats) - [HTTP API](#http-api) -### Querying stream label names +### Querying stream field names -VictoriaLogs provides `/select/logsql/stream_label_names?query=&start=&end=` HTTP endpoint, which returns -[log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names from results -of the given `` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` time range. -The response also contains the number of log results per every label name. +VictoriaLogs provides `/select/logsql/stream_field_names?query=&start=&end=` HTTP endpoint, which returns +[log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names from results +of the given [``](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` time range. +The response also contains the number of log results per every field name. The `` and `` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). If `` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs. If `` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs. -For example, the following command returns stream label names across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) +For example, the following command returns stream field names across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) for the last 5 minutes: ```sh -curl http://localhost:9428/select/logsql/stream_label_names -d 'query=error' -d 'start=5m' +curl http://localhost:9428/select/logsql/stream_field_names -d 'query=error' -d 'start=5m' ``` Below is an example JSON output returned from this endpoint: @@ -295,27 +295,27 @@ Below is an example JSON output returned from this endpoint: See also: -- [Querying stream label names](#querying-stream-label-names) +- [Querying stream field names](#querying-stream-field-names) - [Querying field values](#querying-field-values) - [Querying streams](#querying-streams) - [HTTP API](#http-api) -### Querying stream label values +### Querying stream field values -VictoriaLogs provides `/select/logsql/stream_label_values?query=&start=&&label=` HTTP endpoint, -which returns [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values for the label with the given `` name -from results of the given `` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` time range. -The response also contains the number of log results per every label value. +VictoriaLogs provides `/select/logsql/stream_field_values?query=&start=&&field=` HTTP endpoint, +which returns [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values for the field with the given `` name +from results of the given [``](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` time range. +The response also contains the number of log results per every field value. The `` and `` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). If `` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs. If `` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs. -For example, the following command returns values for the stream label `host` across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) +For example, the following command returns values for the stream field `host` across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) for the last 5 minutes: ```sh -curl http://localhost:9428/select/logsql/stream_label_values -d 'query=error' -d 'start=5m' -d 'label=host' +curl http://localhost:9428/select/logsql/stream_field_values -d 'query=error' -d 'start=5m' -d 'field=host' ``` Below is an example JSON output returned from this endpoint: @@ -335,12 +335,12 @@ Below is an example JSON output returned from this endpoint: } ``` -The `/select/logsql/stream_label_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`. +The `/select/logsql/stream_field_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`. The endpoint returns arbitrary subset of values if their number exceeds `N`, so `limit=N` cannot be used for pagination over big number of field values. See also: -- [Querying stream label values](#querying-stream-label-values) +- [Querying stream field values](#querying-stream-field-values) - [Querying field names](#querying-field-names) - [Querying streams](#querying-streams) - [HTTP API](#http-api) @@ -348,7 +348,7 @@ See also: ### Querying field names VictoriaLogs provides `/select/logsql/field_names?query=&start=&end=` HTTP endpoint, which returns field names -from results of the given `` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` time range. +from results of the given [``](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` time range. The response also contains the number of log results per every field name. The `` and `` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). @@ -385,7 +385,7 @@ Below is an example JSON output returned from this endpoint: See also: -- [Querying stream label names](#querying-stream-label-names) +- [Querying stream field names](#querying-stream-field-names) - [Querying field values](#querying-field-values) - [Querying streams](#querying-streams) - [HTTP API](#http-api) @@ -394,7 +394,7 @@ See also: VictoriaLogs provides `/select/logsql/field_values?query=&field=&start=&end=` HTTP endpoint, which returns unique values for the given `` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) -from results of the given `` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` time range. +from results of the given [``](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[ ... ]` time range. The response also contains the number of log results per every field value. The `` and `` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). @@ -435,7 +435,7 @@ When the `limit` is reached, `hits` are zeroed, since they cannot be calculated See also: -- [Querying stream label values](#querying-stream-label-values) +- [Querying stream field values](#querying-stream-field-values) - [Querying field names](#querying-field-names) - [Querying streams](#querying-streams) - [HTTP API](#http-api) diff --git a/lib/logstorage/storage_search.go b/lib/logstorage/storage_search.go index 95ea35af6..ad2372376 100644 --- a/lib/logstorage/storage_search.go +++ b/lib/logstorage/storage_search.go @@ -288,18 +288,18 @@ func sortValuesWithHits(results []ValueWithHits) { }) } -// GetStreamLabelNames returns stream label names from q results for the given tenantIDs. -func (s *Storage) GetStreamLabelNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) { +// GetStreamFieldNames returns stream field names from q results for the given tenantIDs. +func (s *Storage) GetStreamFieldNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) { streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64) if err != nil { return nil, err } m := make(map[string]*uint64) - forEachStreamLabel(streams, func(label Field, hits uint64) { - pHits, ok := m[label.Name] + forEachStreamField(streams, func(f Field, hits uint64) { + pHits, ok := m[f.Name] if !ok { - nameCopy := strings.Clone(label.Name) + nameCopy := strings.Clone(f.Name) hitsLocal := uint64(0) pHits = &hitsLocal m[nameCopy] = pHits @@ -310,23 +310,23 @@ func (s *Storage) GetStreamLabelNames(ctx context.Context, tenantIDs []TenantID, return names, nil } -// GetStreamLabelValues returns stream label values for the given labelName from q results for the given tenantIDs. +// GetStreamFieldValues returns stream field values for the given fieldName from q results for the given tenantIDs. // -// If limit > 9, then up to limit unique label values are returned. -func (s *Storage) GetStreamLabelValues(ctx context.Context, tenantIDs []TenantID, q *Query, labelName string, limit uint64) ([]ValueWithHits, error) { +// If limit > 9, then up to limit unique values are returned. +func (s *Storage) GetStreamFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]ValueWithHits, error) { streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64) if err != nil { return nil, err } m := make(map[string]*uint64) - forEachStreamLabel(streams, func(label Field, hits uint64) { - if label.Name != labelName { + forEachStreamField(streams, func(f Field, hits uint64) { + if f.Name != fieldName { return } - pHits, ok := m[label.Value] + pHits, ok := m[f.Value] if !ok { - valueCopy := strings.Clone(label.Value) + valueCopy := strings.Clone(f.Value) hitsLocal := uint64(0) pHits = &hitsLocal m[valueCopy] = pHits @@ -1099,22 +1099,22 @@ func getFilterTimeRange(f filter) (int64, int64) { return math.MinInt64, math.MaxInt64 } -func forEachStreamLabel(streams []ValueWithHits, f func(label Field, hits uint64)) { - var labels []Field +func forEachStreamField(streams []ValueWithHits, f func(f Field, hits uint64)) { + var fields []Field for i := range streams { var err error - labels, err = parseStreamLabels(labels[:0], streams[i].Value) + fields, err = parseStreamFields(fields[:0], streams[i].Value) if err != nil { continue } hits := streams[i].Hits - for j := range labels { - f(labels[j], hits) + for j := range fields { + f(fields[j], hits) } } } -func parseStreamLabels(dst []Field, s string) ([]Field, error) { +func parseStreamFields(dst []Field, s string) ([]Field, error) { if len(s) == 0 || s[0] != '{' { return dst, fmt.Errorf("missing '{' at the beginning of stream name") } @@ -1130,14 +1130,14 @@ func parseStreamLabels(dst []Field, s string) ([]Field, error) { for { n := strings.Index(s, `="`) if n < 0 { - return dst, fmt.Errorf("cannot find label value in double quotes at [%s]", s) + return dst, fmt.Errorf("cannot find field value in double quotes at [%s]", s) } name := s[:n] s = s[n+1:] value, nOffset := tryUnquoteString(s, "") if nOffset < 0 { - return dst, fmt.Errorf("cannot find parse label value in double quotes at [%s]", s) + return dst, fmt.Errorf("cannot find parse field value in double quotes at [%s]", s) } s = s[nOffset:] diff --git a/lib/logstorage/storage_search_test.go b/lib/logstorage/storage_search_test.go index e80409d08..9f9a43c49 100644 --- a/lib/logstorage/storage_search_test.go +++ b/lib/logstorage/storage_search_test.go @@ -650,11 +650,11 @@ func TestStorageSearch(t *testing.T) { fs.MustRemoveAll(path) } -func TestParseStreamLabelsSuccess(t *testing.T) { +func TestParseStreamFieldsSuccess(t *testing.T) { f := func(s, resultExpected string) { t.Helper() - labels, err := parseStreamLabels(nil, s) + labels, err := parseStreamFields(nil, s) if err != nil { t.Fatalf("unexpected error: %s", err) }