This commit is contained in:
Aliaksandr Valialkin 2024-05-25 11:59:47 +02:00
parent c2050495c4
commit e93a71d741
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
10 changed files with 129 additions and 123 deletions

View file

@ -6,8 +6,8 @@
{% stripspace %} {% stripspace %}
// LabelsForHits formats labels for /select/logsql/hits response // FieldsForHits formats labels for /select/logsql/hits response
{% func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) %} {% func FieldsForHits(columns []logstorage.BlockColumn, rowIdx int) %}
{ {
{% if len(columns) > 0 %} {% if len(columns) > 0 %}
{%q= columns[0].Name %}:{%q= columns[0].Values[rowIdx] %} {%q= columns[0].Name %}:{%q= columns[0].Values[rowIdx] %}

View file

@ -11,7 +11,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
) )
// LabelsForHits formats labels for /select/logsql/hits response // FieldsForHits formats labels for /select/logsql/hits response
//line app/vlselect/logsql/hits_response.qtpl:10 //line app/vlselect/logsql/hits_response.qtpl:10
import ( import (
@ -27,7 +27,7 @@ var (
) )
//line app/vlselect/logsql/hits_response.qtpl:10 //line app/vlselect/logsql/hits_response.qtpl:10
func StreamLabelsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) { func StreamFieldsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
//line app/vlselect/logsql/hits_response.qtpl:10 //line app/vlselect/logsql/hits_response.qtpl:10
qw422016.N().S(`{`) qw422016.N().S(`{`)
//line app/vlselect/logsql/hits_response.qtpl:12 //line app/vlselect/logsql/hits_response.qtpl:12
@ -58,22 +58,22 @@ func StreamLabelsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockCo
} }
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
func WriteLabelsForHits(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) { func WriteFieldsForHits(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
qw422016 := qt422016.AcquireWriter(qq422016) qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
StreamLabelsForHits(qw422016, columns, rowIdx) StreamFieldsForHits(qw422016, columns, rowIdx)
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
qt422016.ReleaseWriter(qw422016) qt422016.ReleaseWriter(qw422016)
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
} }
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) string { func FieldsForHits(columns []logstorage.BlockColumn, rowIdx int) string {
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
qb422016 := qt422016.AcquireByteBuffer() qb422016 := qt422016.AcquireByteBuffer()
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
WriteLabelsForHits(qb422016, columns, rowIdx) WriteFieldsForHits(qb422016, columns, rowIdx)
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19
qs422016 := string(qb422016.B) qs422016 := string(qb422016.B)
//line app/vlselect/logsql/hits_response.qtpl:19 //line app/vlselect/logsql/hits_response.qtpl:19

View file

@ -77,7 +77,7 @@ func ProcessHitsRequest(ctx context.Context, w http.ResponseWriter, r *http.Requ
hitsStr := strings.Clone(hitsValues[i]) hitsStr := strings.Clone(hitsValues[i])
bb.Reset() bb.Reset()
WriteLabelsForHits(bb, columns, i) WriteFieldsForHits(bb, columns, i)
mLock.Lock() mLock.Lock()
hs, ok := m[string(bb.B)] hs, ok := m[string(bb.B)]
@ -189,21 +189,21 @@ func ProcessFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *ht
WriteValuesWithHitsJSON(w, values) WriteValuesWithHitsJSON(w, values)
} }
// ProcessStreamLabelNamesRequest processes /select/logsql/stream_label_names request. // ProcessStreamFieldNamesRequest processes /select/logsql/stream_field_names request.
// //
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-names // See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-names
func ProcessStreamLabelNamesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) { func ProcessStreamFieldNamesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
q, tenantIDs, err := parseCommonArgs(r) q, tenantIDs, err := parseCommonArgs(r)
if err != nil { if err != nil {
httpserver.Errorf(w, r, "%s", err) httpserver.Errorf(w, r, "%s", err)
return return
} }
// Obtain stream label names for the given query // Obtain stream field names for the given query
q.Optimize() q.Optimize()
names, err := vlstorage.GetStreamLabelNames(ctx, tenantIDs, q) names, err := vlstorage.GetStreamFieldNames(ctx, tenantIDs, q)
if err != nil { if err != nil {
httpserver.Errorf(w, r, "cannot obtain stream label names: %s", err) httpserver.Errorf(w, r, "cannot obtain stream field names: %s", err)
} }
// Write results // Write results
@ -211,20 +211,20 @@ func ProcessStreamLabelNamesRequest(ctx context.Context, w http.ResponseWriter,
WriteValuesWithHitsJSON(w, names) WriteValuesWithHitsJSON(w, names)
} }
// ProcessStreamLabelValuesRequest processes /select/logsql/stream_label_values request. // ProcessStreamFieldValuesRequest processes /select/logsql/stream_field_values request.
// //
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-values // See https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-values
func ProcessStreamLabelValuesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) { func ProcessStreamFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
q, tenantIDs, err := parseCommonArgs(r) q, tenantIDs, err := parseCommonArgs(r)
if err != nil { if err != nil {
httpserver.Errorf(w, r, "%s", err) httpserver.Errorf(w, r, "%s", err)
return return
} }
// Parse labelName query arg // Parse fieldName query arg
labelName := r.FormValue("label") fieldName := r.FormValue("field")
if labelName == "" { if fieldName == "" {
httpserver.Errorf(w, r, "missing 'label' query arg") httpserver.Errorf(w, r, "missing 'field' query arg")
return return
} }
@ -238,11 +238,11 @@ func ProcessStreamLabelValuesRequest(ctx context.Context, w http.ResponseWriter,
limit = 0 limit = 0
} }
// Obtain stream label names for the given query // Obtain stream field values for the given query and the given fieldName
q.Optimize() q.Optimize()
values, err := vlstorage.GetStreamLabelValues(ctx, tenantIDs, q, labelName, uint64(limit)) values, err := vlstorage.GetStreamFieldValues(ctx, tenantIDs, q, fieldName, uint64(limit))
if err != nil { if err != nil {
httpserver.Errorf(w, r, "cannot obtain stream label values: %s", err) httpserver.Errorf(w, r, "cannot obtain stream field values: %s", err)
} }
// Write results // Write results

View file

@ -157,13 +157,13 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
logsqlQueryRequests.Inc() logsqlQueryRequests.Inc()
logsql.ProcessQueryRequest(ctx, w, r) logsql.ProcessQueryRequest(ctx, w, r)
return true return true
case "/select/logsql/stream_label_names": case "/select/logsql/stream_field_names":
logsqlStreamLabelNamesRequests.Inc() logsqlStreamFieldNamesRequests.Inc()
logsql.ProcessStreamLabelNamesRequest(ctx, w, r) logsql.ProcessStreamFieldNamesRequest(ctx, w, r)
return true return true
case "/select/logsql/stream_label_values": case "/select/logsql/stream_field_values":
logsqlStreamLabelValuesRequests.Inc() logsqlStreamFieldValuesRequests.Inc()
logsql.ProcessStreamLabelValuesRequest(ctx, w, r) logsql.ProcessStreamFieldValuesRequest(ctx, w, r)
return true return true
case "/select/logsql/streams": case "/select/logsql/streams":
logsqlStreamsRequests.Inc() logsqlStreamsRequests.Inc()
@ -192,7 +192,7 @@ var (
logsqlFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_values"}`) logsqlFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_values"}`)
logsqlHitsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/hits"}`) logsqlHitsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/hits"}`)
logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`) logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`)
logsqlStreamLabelNamesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_names"}`) logsqlStreamFieldNamesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_field_names"}`)
logsqlStreamLabelValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_values"}`) logsqlStreamFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_field_values"}`)
logsqlStreamsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/streams"}`) logsqlStreamsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/streams"}`)
) )

View file

@ -123,16 +123,16 @@ func GetFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *log
return strg.GetFieldValues(ctx, tenantIDs, q, fieldName, limit) return strg.GetFieldValues(ctx, tenantIDs, q, fieldName, limit)
} }
// GetStreamLabelNames executes q and returns stream labels names seen in results. // GetStreamFieldNames executes q and returns stream field names seen in results.
func GetStreamLabelNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]logstorage.ValueWithHits, error) { func GetStreamFieldNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]logstorage.ValueWithHits, error) {
return strg.GetStreamLabelNames(ctx, tenantIDs, q) return strg.GetStreamFieldNames(ctx, tenantIDs, q)
} }
// GetStreamLabelValues executes q and returns stream label values for the given labelName seen in results. // GetStreamFieldValues executes q and returns stream field values for the given fieldName seen in results.
// //
// If limit > 0, then up to limit unique stream label values are returned. // If limit > 0, then up to limit unique stream field values are returned.
func GetStreamLabelValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, labelName string, limit uint64) ([]logstorage.ValueWithHits, error) { func GetStreamFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, fieldName string, limit uint64) ([]logstorage.ValueWithHits, error) {
return strg.GetStreamLabelValues(ctx, tenantIDs, q, labelName, limit) return strg.GetStreamFieldValues(ctx, tenantIDs, q, fieldName, limit)
} }
// GetStreams executes q and returns streams seen in query results. // GetStreams executes q and returns streams seen in query results.

View file

@ -63,8 +63,8 @@ Released at 2024-05-22
* FEATURE: add ability to unpack [logfmt](https://brandur.org/logfmt) fields with [`unpack_logfmt` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-unpack_logfmt). * FEATURE: add ability to unpack [logfmt](https://brandur.org/logfmt) fields with [`unpack_logfmt` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-unpack_logfmt).
* FEATURE: add [`fields_min`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_min-stats) and [`fields_max`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_max-stats) functions for [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe), which allow returning all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for the log entry with the minimum / maximum value at the given field. * FEATURE: add [`fields_min`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_min-stats) and [`fields_max`](https://docs.victoriametrics.com/victorialogs/logsql/#fields_max-stats) functions for [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe), which allow returning all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for the log entry with the minimum / maximum value at the given field.
* FEATURE: add `/select/logsql/streams` HTTP endpoint for returning [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-streams) for details. * FEATURE: add `/select/logsql/streams` HTTP endpoint for returning [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-streams) for details.
* FEATURE: add `/select/logsql/stream_label_names` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-names) for details. * FEATURE: add `/select/logsql/stream_field_names` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-names) for details.
* FEATURE: add `/select/logsql/stream_label_values` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values for the given label from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-label-values) for details. * FEATURE: add `/select/logsql/stream_field_values` HTTP endpoint for returning [stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values for the given label from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-stream-field-values) for details.
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): change time range limitation from `_time` in the expression to `start` and `end` query args. * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): change time range limitation from `_time` in the expression to `start` and `end` query args.
* BUGFIX: fix `invalid memory address or nil pointer dereference` panic when using [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe), [`unpack_json`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe) or [`unpack_logfmt`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) pipes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6306). * BUGFIX: fix `invalid memory address or nil pointer dereference` panic when using [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe), [`unpack_json`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe) or [`unpack_logfmt`](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) pipes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6306).

View file

@ -74,14 +74,14 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges
} }
``` ```
Both label name and label value may contain arbitrary chars. Such chars must be encoded Both field name and field value may contain arbitrary chars. Such chars must be encoded
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
according to [JSON string encoding](https://www.rfc-editor.org/rfc/rfc7159.html#section-7). according to [JSON string encoding](https://www.rfc-editor.org/rfc/rfc7159.html#section-7).
Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding: Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding:
```json ```json
{ {
"label with whitepsace": "value\nwith\nnewlines", "field with whitepsace": "value\nwith\nnewlines",
"Поле": "价值", "Поле": "价值",
} }
``` ```
@ -89,13 +89,11 @@ Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8)
VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs. VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs.
This enables [full-text search](https://docs.victoriametrics.com/victorialogs/logsql/) across all the fields. This enables [full-text search](https://docs.victoriametrics.com/victorialogs/logsql/) across all the fields.
VictoriaLogs supports the following field types: VictoriaLogs supports the following special fields additionally to arbitrary [other fields](#other-field):
* [`_msg` field](#message-field) * [`_msg` field](#message-field)
* [`_time` field](#time-field) * [`_time` field](#time-field)
* [`_stream` fields](#stream-fields) * [`_stream` fields](#stream-fields)
* [other fields](#other-fields)
### Message field ### Message field
@ -116,7 +114,9 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges
### Time field ### Time field
The ingested [log entries](#data-model) may contain `_time` field with the timestamp of the ingested log entry. The ingested [log entries](#data-model) may contain `_time` field with the timestamp of the ingested log entry.
For example: The timestamp must be in [RFC3339](https://www.rfc-editor.org/rfc/rfc3339) format. The most commonly used subset of [ISO8601](https://en.wikipedia.org/wiki/ISO_8601)
is also supported. It is allowed specifying seconds part of the timestamp with any precision up to nanoseconds.
For example, the following [log entry](#data-model) contains valid timestamp with millisecond precision in the `_time` field:
```json ```json
{ {
@ -132,29 +132,39 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges
If `_time` field is missing, then the data ingestion time is used as log entry timestamp. If `_time` field is missing, then the data ingestion time is used as log entry timestamp.
The log entry timestamp allows quickly narrowing down the search to a particular time range. The `_time` field is used in [time filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) for quickly narrowing down
See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) for details. the search to a particular time range.
### Stream fields ### Stream fields
Some [structured logging](#data-model) fields may uniquely identify the application instance, which generates log entries. Some [structured logging](#data-model) fields may uniquely identify the application instance, which generates log entries.
This may be either a single field such as `instance=host123:456` or a set of fields such as This may be either a single field such as `instance="host123:456"` or a set of fields such as
`(datacenter=..., env=..., job=..., instance=...)` or `{datacenter="...", env="...", job="...", instance="..."}` or
`(kubernetes.namespace=..., kubernetes.node.name=..., kubernetes.pod.name=..., kubernetes.container.name=...)`. `{kubernetes.namespace="...", kubernetes.node.name="...", kubernetes.pod.name="...", kubernetes.container.name="..."}`.
Log entries received from a single application instance form a log stream in VictoriaLogs. Log entries received from a single application instance form a **log stream** in VictoriaLogs.
VictoriaLogs optimizes storing and querying of individual log streams. This provides the following benefits: VictoriaLogs optimizes storing and [querying](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter) of individual log streams.
This provides the following benefits:
- Reduced disk space usage, since a log stream from a single application instance is usually compressed better - Reduced disk space usage, since a log stream from a single application instance is usually compressed better
than a mixed log stream from multiple distinct applications. than a mixed log stream from multiple distinct applications.
- Increased query performance, since VictoriaLogs needs to scan lower amounts of data - Increased query performance, since VictoriaLogs needs to scan lower amounts of data
when [searching by stream labels](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter). when [searching by stream fields](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
VictoriaLogs cannot determine automatically, which fields uniquely identify every log stream, Every ingested log entry is associated with a log stream. The name of this stream is stored in `_stream` field.
so it stores all the received log entries in a single default stream - `{}`. This field has the format similar to [labels in Prometheus metrics](https://docs.victoriametrics.com/keyconcepts/#labels):
This may lead to not-so-optimal resource usage and query performance.
```
{field1="value1", ..., fieldN="valueN"}
```
For example, if `host` and `app` fields are associated with the stream, then the `_stream` field will have `{host="host-123",app="my-app"}` value
for the log entry with `host="host-123"` and `app="my-app"` fields. The `_stream` field can be searched
with [stream filters](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
By default the value of `_stream` field is `{}`, since VictoriaLogs cannot determine automatically,
which fields uniquely identify every log stream. This may lead to not-so-optimal resource usage and query performance.
Therefore it is recommended specifying stream-level fields via `_stream_fields` query arg Therefore it is recommended specifying stream-level fields via `_stream_fields` query arg
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/). during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
For example, if logs from Kubernetes containers have the following fields: For example, if logs from Kubernetes containers have the following fields:
@ -175,20 +185,17 @@ per-container logs into distinct streams.
#### How to determine which fields must be associated with log streams? #### How to determine which fields must be associated with log streams?
[Log streams](#stream-fields) can be associated with fields, which simultaneously meet the following conditions: [Log streams](#stream-fields) must contain [fields](#data-model), which uniquely identify the application instance, which generates logs.
For example, `container`, `instance` and `host` are good candidates for stream fields.
- Fields, which remain constant across log entries received from a single application instance. Additional fields may be added to log streams if they **remain constant during application instance lifetime**.
- Fields, which uniquely identify the application instance. For example, `instance`, `host`, `container`, etc. For example, `namespace`, `node`, `pod` and `job` are good candidates for additional stream fields. Adding such fields to log streams
makes sense if you are going to use these fields during search and want speeding up it with [stream filters](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
Sometimes a single application instance may generate multiple log streams and store them into distinct log files. There is **no need to add all the constant fields to log streams**, since this may increase resource usage during data ingestion and querying.
In this case it is OK to associate the log stream with filepath fields such as `log.file.path` additionally to instance-specific fields.
Structured logs may contain big number of fields, which do not change across log entries received from a single application instance. **Never add non-nonstant fields to streams if these fields may change with every log entry of the same stream**.
There is no need in associating all these fields with log stream - it is enough to associate only those fields, which uniquely identify For example, `ip`, `user_id` and `trace_id` **must never be associated with log streams**, since this may lead to [high cardinality issues](#high-cardinality).
the application instance across all the ingested logs. Additionally, some fields such as `datacenter`, `environment`, `namespace`, `job` or `app`,
can be associated with log stream in order to optimize searching by these fields with [stream filtering](https://docs.victoriametrics.com/victorialogs/logsql/#stream-filter).
Never associate log streams with fields, which may change across log entries of the same application instance. See [these docs](#high-cardinality) for details.
#### High cardinality #### High cardinality
@ -196,8 +203,7 @@ Some fields in the [ingested logs](#data-model) may contain big number of unique
For example, fields with names such as `ip`, `user_id` or `trace_id` tend to contain big number of unique values. For example, fields with names such as `ip`, `user_id` or `trace_id` tend to contain big number of unique values.
VictoriaLogs works perfectly with such fields unless they are associated with [log streams](#stream-fields). VictoriaLogs works perfectly with such fields unless they are associated with [log streams](#stream-fields).
Never associate high-cardinality fields with [log streams](#stream-fields), since this may result **Never** associate high-cardinality fields with [log streams](#stream-fields), since this may lead to the following issues:
to the following issues:
- Performance degradation during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) - Performance degradation during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
and [querying](https://docs.victoriametrics.com/victorialogs/querying/) and [querying](https://docs.victoriametrics.com/victorialogs/querying/)
@ -214,9 +220,9 @@ This can help narrowing down and eliminating high-cardinality fields from [log s
### Other fields ### Other fields
The rest of [structured logging](#data-model) fields are optional. They can be used for simplifying and optimizing search queries. Every ingested log entry may contain arbitrary number of [fields](#data-model) additionally to [`_msg`](#message-field) and [`_time`](#time-field).
For example, it is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long log message. For example, `level`, `ip`, `user_id`, `trace_id`, etc. Such fields can be used for simplifying and optimizing [search queries](#https://docs.victoriametrics.com/victorialogs/logsql/).
E.g. the `trace_id:XXXX-YYYY-ZZZZ` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query. It is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long [log message](#message-field).
E.g. the `trace_id:="XXXX-YYYY-ZZZZ"` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query.
See [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/) for more details. See [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/) for more details.

View file

@ -28,8 +28,8 @@ VictoriaLogs provides the following HTTP endpoints:
- [`/select/logsql/query`](#querying-logs) for querying logs - [`/select/logsql/query`](#querying-logs) for querying logs
- [`/select/logsql/hits`](#querying-hits-stats) for querying log hits stats over the given time range - [`/select/logsql/hits`](#querying-hits-stats) for querying log hits stats over the given time range
- [`/select/logsql/streams`](#querying-streams) for querying [log streams](#https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) - [`/select/logsql/streams`](#querying-streams) for querying [log streams](#https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
- [`/select/logsql/stream_label_names`](#querying-stream-label-names) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names - [`/select/logsql/stream_field_names`](#querying-stream-field-names) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names
- [`/select/logsql/stream_label_values`](#querying-stream-label-values) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values - [`/select/logsql/stream_field_values`](#querying-stream-field-values) for querying [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values
- [`/select/logsql/field_names`](#querying-field-names) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names. - [`/select/logsql/field_names`](#querying-field-names) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names.
- [`/select/logsql/field_values`](#querying-field-values) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values. - [`/select/logsql/field_values`](#querying-field-values) for querying [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values.
@ -107,7 +107,7 @@ with `vl_http_requests_total{path="/select/logsql/query"}` metric.
### Querying hits stats ### Querying hits stats
VictoriaMetrics provides `/select/logsql/hits?query=<query>&start=<start>&end=<end>&step=<step>` HTTP endpoint, which returns the number VictoriaMetrics provides `/select/logsql/hits?query=<query>&start=<start>&end=<end>&step=<step>` HTTP endpoint, which returns the number
of matching log entries for the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` of matching log entries for the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]`
time range grouped by `<step>` buckets. The returned results are sorted by time. time range grouped by `<step>` buckets. The returned results are sorted by time.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -210,7 +210,7 @@ See also:
### Querying streams ### Querying streams
VictoriaLogs provides `/select/logsql/streams?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) VictoriaLogs provides `/select/logsql/streams?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns [streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range. from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every `stream`. The response also contains the number of log results per every `stream`.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -254,22 +254,22 @@ See also:
- [Querying hits stats](#querying-hits-stats) - [Querying hits stats](#querying-hits-stats)
- [HTTP API](#http-api) - [HTTP API](#http-api)
### Querying stream label names ### Querying stream field names
VictoriaLogs provides `/select/logsql/stream_label_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns VictoriaLogs provides `/select/logsql/stream_field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns
[log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label names from results [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field names from results
of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range. of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every label name. The response also contains the number of log results per every field name.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs. If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs. If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
For example, the following command returns stream label names across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) For example, the following command returns stream field names across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
for the last 5 minutes: for the last 5 minutes:
```sh ```sh
curl http://localhost:9428/select/logsql/stream_label_names -d 'query=error' -d 'start=5m' curl http://localhost:9428/select/logsql/stream_field_names -d 'query=error' -d 'start=5m'
``` ```
Below is an example JSON output returned from this endpoint: Below is an example JSON output returned from this endpoint:
@ -295,27 +295,27 @@ Below is an example JSON output returned from this endpoint:
See also: See also:
- [Querying stream label names](#querying-stream-label-names) - [Querying stream field names](#querying-stream-field-names)
- [Querying field values](#querying-field-values) - [Querying field values](#querying-field-values)
- [Querying streams](#querying-streams) - [Querying streams](#querying-streams)
- [HTTP API](#http-api) - [HTTP API](#http-api)
### Querying stream label values ### Querying stream field values
VictoriaLogs provides `/select/logsql/stream_label_values?query=<query>&start=<start>&<end>&label=<labelName>` HTTP endpoint, VictoriaLogs provides `/select/logsql/stream_field_values?query=<query>&start=<start>&<end>&field=<fieldName>` HTTP endpoint,
which returns [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) label values for the label with the given `<labelName>` name which returns [log stream](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field values for the field with the given `<fieldName>` name
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range. from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every label value. The response also contains the number of log results per every field value.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs. If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs. If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
For example, the following command returns values for the stream label `host` across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) For example, the following command returns values for the stream field `host` across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
for the last 5 minutes: for the last 5 minutes:
```sh ```sh
curl http://localhost:9428/select/logsql/stream_label_values -d 'query=error' -d 'start=5m' -d 'label=host' curl http://localhost:9428/select/logsql/stream_field_values -d 'query=error' -d 'start=5m' -d 'field=host'
``` ```
Below is an example JSON output returned from this endpoint: Below is an example JSON output returned from this endpoint:
@ -335,12 +335,12 @@ Below is an example JSON output returned from this endpoint:
} }
``` ```
The `/select/logsql/stream_label_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`. The `/select/logsql/stream_field_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`.
The endpoint returns arbitrary subset of values if their number exceeds `N`, so `limit=N` cannot be used for pagination over big number of field values. The endpoint returns arbitrary subset of values if their number exceeds `N`, so `limit=N` cannot be used for pagination over big number of field values.
See also: See also:
- [Querying stream label values](#querying-stream-label-values) - [Querying stream field values](#querying-stream-field-values)
- [Querying field names](#querying-field-names) - [Querying field names](#querying-field-names)
- [Querying streams](#querying-streams) - [Querying streams](#querying-streams)
- [HTTP API](#http-api) - [HTTP API](#http-api)
@ -348,7 +348,7 @@ See also:
### Querying field names ### Querying field names
VictoriaLogs provides `/select/logsql/field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns field names VictoriaLogs provides `/select/logsql/field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns field names
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range. from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every field name. The response also contains the number of log results per every field name.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -385,7 +385,7 @@ Below is an example JSON output returned from this endpoint:
See also: See also:
- [Querying stream label names](#querying-stream-label-names) - [Querying stream field names](#querying-stream-field-names)
- [Querying field values](#querying-field-values) - [Querying field values](#querying-field-values)
- [Querying streams](#querying-streams) - [Querying streams](#querying-streams)
- [HTTP API](#http-api) - [HTTP API](#http-api)
@ -394,7 +394,7 @@ See also:
VictoriaLogs provides `/select/logsql/field_values?query=<query>&field=<fieldName>&start=<start>&end=<end>` HTTP endpoint, which returns VictoriaLogs provides `/select/logsql/field_values?query=<query>&field=<fieldName>&start=<start>&end=<end>` HTTP endpoint, which returns
unique values for the given `<fieldName>` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) unique values for the given `<fieldName>` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range. from results of the given [`<query>`](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
The response also contains the number of log results per every field value. The response also contains the number of log results per every field value.
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats). The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
@ -435,7 +435,7 @@ When the `limit` is reached, `hits` are zeroed, since they cannot be calculated
See also: See also:
- [Querying stream label values](#querying-stream-label-values) - [Querying stream field values](#querying-stream-field-values)
- [Querying field names](#querying-field-names) - [Querying field names](#querying-field-names)
- [Querying streams](#querying-streams) - [Querying streams](#querying-streams)
- [HTTP API](#http-api) - [HTTP API](#http-api)

View file

@ -288,18 +288,18 @@ func sortValuesWithHits(results []ValueWithHits) {
}) })
} }
// GetStreamLabelNames returns stream label names from q results for the given tenantIDs. // GetStreamFieldNames returns stream field names from q results for the given tenantIDs.
func (s *Storage) GetStreamLabelNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) { func (s *Storage) GetStreamFieldNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) {
streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64) streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
if err != nil { if err != nil {
return nil, err return nil, err
} }
m := make(map[string]*uint64) m := make(map[string]*uint64)
forEachStreamLabel(streams, func(label Field, hits uint64) { forEachStreamField(streams, func(f Field, hits uint64) {
pHits, ok := m[label.Name] pHits, ok := m[f.Name]
if !ok { if !ok {
nameCopy := strings.Clone(label.Name) nameCopy := strings.Clone(f.Name)
hitsLocal := uint64(0) hitsLocal := uint64(0)
pHits = &hitsLocal pHits = &hitsLocal
m[nameCopy] = pHits m[nameCopy] = pHits
@ -310,23 +310,23 @@ func (s *Storage) GetStreamLabelNames(ctx context.Context, tenantIDs []TenantID,
return names, nil return names, nil
} }
// GetStreamLabelValues returns stream label values for the given labelName from q results for the given tenantIDs. // GetStreamFieldValues returns stream field values for the given fieldName from q results for the given tenantIDs.
// //
// If limit > 9, then up to limit unique label values are returned. // If limit > 9, then up to limit unique values are returned.
func (s *Storage) GetStreamLabelValues(ctx context.Context, tenantIDs []TenantID, q *Query, labelName string, limit uint64) ([]ValueWithHits, error) { func (s *Storage) GetStreamFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]ValueWithHits, error) {
streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64) streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
if err != nil { if err != nil {
return nil, err return nil, err
} }
m := make(map[string]*uint64) m := make(map[string]*uint64)
forEachStreamLabel(streams, func(label Field, hits uint64) { forEachStreamField(streams, func(f Field, hits uint64) {
if label.Name != labelName { if f.Name != fieldName {
return return
} }
pHits, ok := m[label.Value] pHits, ok := m[f.Value]
if !ok { if !ok {
valueCopy := strings.Clone(label.Value) valueCopy := strings.Clone(f.Value)
hitsLocal := uint64(0) hitsLocal := uint64(0)
pHits = &hitsLocal pHits = &hitsLocal
m[valueCopy] = pHits m[valueCopy] = pHits
@ -1099,22 +1099,22 @@ func getFilterTimeRange(f filter) (int64, int64) {
return math.MinInt64, math.MaxInt64 return math.MinInt64, math.MaxInt64
} }
func forEachStreamLabel(streams []ValueWithHits, f func(label Field, hits uint64)) { func forEachStreamField(streams []ValueWithHits, f func(f Field, hits uint64)) {
var labels []Field var fields []Field
for i := range streams { for i := range streams {
var err error var err error
labels, err = parseStreamLabels(labels[:0], streams[i].Value) fields, err = parseStreamFields(fields[:0], streams[i].Value)
if err != nil { if err != nil {
continue continue
} }
hits := streams[i].Hits hits := streams[i].Hits
for j := range labels { for j := range fields {
f(labels[j], hits) f(fields[j], hits)
} }
} }
} }
func parseStreamLabels(dst []Field, s string) ([]Field, error) { func parseStreamFields(dst []Field, s string) ([]Field, error) {
if len(s) == 0 || s[0] != '{' { if len(s) == 0 || s[0] != '{' {
return dst, fmt.Errorf("missing '{' at the beginning of stream name") return dst, fmt.Errorf("missing '{' at the beginning of stream name")
} }
@ -1130,14 +1130,14 @@ func parseStreamLabels(dst []Field, s string) ([]Field, error) {
for { for {
n := strings.Index(s, `="`) n := strings.Index(s, `="`)
if n < 0 { if n < 0 {
return dst, fmt.Errorf("cannot find label value in double quotes at [%s]", s) return dst, fmt.Errorf("cannot find field value in double quotes at [%s]", s)
} }
name := s[:n] name := s[:n]
s = s[n+1:] s = s[n+1:]
value, nOffset := tryUnquoteString(s, "") value, nOffset := tryUnquoteString(s, "")
if nOffset < 0 { if nOffset < 0 {
return dst, fmt.Errorf("cannot find parse label value in double quotes at [%s]", s) return dst, fmt.Errorf("cannot find parse field value in double quotes at [%s]", s)
} }
s = s[nOffset:] s = s[nOffset:]

View file

@ -650,11 +650,11 @@ func TestStorageSearch(t *testing.T) {
fs.MustRemoveAll(path) fs.MustRemoveAll(path)
} }
func TestParseStreamLabelsSuccess(t *testing.T) { func TestParseStreamFieldsSuccess(t *testing.T) {
f := func(s, resultExpected string) { f := func(s, resultExpected string) {
t.Helper() t.Helper()
labels, err := parseStreamLabels(nil, s) labels, err := parseStreamFields(nil, s)
if err != nil { if err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }