This commit is contained in:
Aliaksandr Valialkin 2024-04-27 00:52:15 +02:00
parent 640b18cd66
commit 9f8dd1ef31
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
4 changed files with 61 additions and 15 deletions

View file

@ -19,8 +19,10 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
## tip ## tip
* FEATURE: return all the log fields by default in query results. Previously only [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields), [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields were returned by default.
* FEATURE: add support for returning only the requested log [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields).
* FEATURE: add support for calculating the number of matching logs and the number of logs with non-empty [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Grouping by arbitrary set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) is supported. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stats) for details.
* FEATURE: optimize performance for [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/), which contains multiple filters for [words](https://docs.victoriametrics.com/victorialogs/logsql/#word-filter) or [phrases](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter) delimited with [`AND` operator](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter). For example, `foo AND bar` query must find [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with `foo` and `bar` words at faster speed. * FEATURE: optimize performance for [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/), which contains multiple filters for [words](https://docs.victoriametrics.com/victorialogs/logsql/#word-filter) or [phrases](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter) delimited with [`AND` operator](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter). For example, `foo AND bar` query must find [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with `foo` and `bar` words at faster speed.
* FEATURE: return all the log fields by default in query results. Previously only [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields), [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields were returned by default. If only some fields must be returned, then they can be listed in `| fields ...` section as described in [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields).
* BUGFIX: prevent from additional CPU usage for up to a few seconds after canceling the query. * BUGFIX: prevent from additional CPU usage for up to a few seconds after canceling the query.
* BUGFIX: prevent from returning log entries with emtpy `_stream` field in the form `"_stream":""` in [search query results](https://docs.victoriametrics.com/victorialogs/querying/). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6042). * BUGFIX: prevent from returning log entries with emtpy `_stream` field in the form `"_stream":""` in [search query results](https://docs.victoriametrics.com/victorialogs/querying/). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6042).

View file

@ -1050,14 +1050,18 @@ See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) fo
## Stats ## Stats
It is possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands LogsQL supports calculating the following stats:
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
LogsQL will support calculating the following stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) - The number of matching log entries. Examples:
- `error | stats count() as errors_total` returns the number of log messages containing the `error` [word](#word).
- `error | stats by (_stream) count() as errors_by_stream` returns the number of log messages containing the `error` [word](#word)
grouped by [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
- `error | stats by (datacenter, namespace) count(trace_id, user_id) as errors_with_trace_and_user` returns the number of log messages containing the `error` [word](#word),
which contain non-empty `trace_id` or `user_id` [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model), grouped by `datacenter` and `namespace` fields.
LogsQL will support calculating the following additional stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
and fields created by [transformations](#transformations): and fields created by [transformations](#transformations):
- The number of selected logs via `query | stats count() as total` syntax.
- The number of non-empty values for the given field.
- The number of unique values for the given field. - The number of unique values for the given field.
- The min, max, avg, and sum for the given field. - The min, max, avg, and sum for the given field.
- The median and [percentile](https://en.wikipedia.org/wiki/Percentile) for the given field. - The median and [percentile](https://en.wikipedia.org/wiki/Percentile) for the given field.
@ -1068,6 +1072,9 @@ For example, `sumIf(response_size, is_admin:true)` calculates the total response
It will be possible to group stats by the specified [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) It will be possible to group stats by the specified [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
and by the specified time buckets. and by the specified time buckets.
It is possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details. See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
## Sorting ## Sorting
@ -1097,7 +1104,7 @@ See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) fo
By default VictoriaLogs query response contains all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). By default VictoriaLogs query response contains all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
If you want selecting some specific fields, then add `| fields field1, field2, ... fieldN` to the end of the query. If you want selecting some specific fields, then add `| fields field1, field2, ... fieldN` to the query.
For example, the following query returns only [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field), For example, the following query returns only [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field),
[`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields), `host` and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields: [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields), `host` and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields:

View file

@ -815,8 +815,8 @@ func TestParseQuerySuccess(t *testing.T) {
// stats count pipe // stats count pipe
f(`* | Stats count() AS foo`, `* | stats count() as foo`) f(`* | Stats count() AS foo`, `* | stats count() as foo`)
f(`* | STATS bY (foo, b.a/r, "b az") count(*) as XYz`, `* | stats by (foo, "b.a/r", "b az") count() as XYz`) f(`* | STATS bY (foo, b.a/r, "b az") count(*) as XYz`, `* | stats by (foo, "b.a/r", "b az") count(*) as XYz`)
f(`* | stats by() count(x, 'a).b,c|d') as qwert`, `* | stats count(x, "a).b,c|d") as qwert`) f(`* | stats by() COUNT(x, 'a).b,c|d') as qwert`, `* | stats count(x, "a).b,c|d") as qwert`)
} }
func TestParseQueryFailure(t *testing.T) { func TestParseQueryFailure(t *testing.T) {

View file

@ -474,8 +474,7 @@ type statsFuncCount struct {
} }
func (sfc *statsFuncCount) String() string { func (sfc *statsFuncCount) String() string {
fields := getFieldsIgnoreStar(sfc.fields) return "count(" + fieldNamesString(sfc.fields) + ") as " + quoteTokenIfNeeded(sfc.resultName)
return "count(" + fieldNamesString(fields) + ") as " + quoteTokenIfNeeded(sfc.resultName)
} }
func (sfc *statsFuncCount) newStatsFuncProcessor() statsFuncProcessor { func (sfc *statsFuncCount) newStatsFuncProcessor() statsFuncProcessor {
@ -493,12 +492,50 @@ type statsFuncCountProcessor struct {
rowsCount uint64 rowsCount uint64
} }
func (sfcp *statsFuncCountProcessor) updateStatsForAllRows(timestamps []int64, _ []BlockColumn) { func (sfcp *statsFuncCountProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) {
fields := sfcp.sfc.fields
if len(fields) == 0 || slices.Contains(fields, "*") {
// Fast path - count all the columns.
sfcp.rowsCount += uint64(len(timestamps)) sfcp.rowsCount += uint64(len(timestamps))
return
}
// Slow path - count rows containing at least a single non-empty value for the fields enumerated inside count().
bm := getFilterBitmap(len(timestamps))
bm.setBits()
for _, f := range fields {
if idx := getBlockColumnIndex(columns, f); idx >= 0 {
values := columns[idx].Values
bm.forEachSetBit(func(i int) bool {
return values[i] == ""
})
}
}
emptyValues := 0
bm.forEachSetBit(func(i int) bool {
emptyValues++
return true
})
sfcp.rowsCount += uint64(len(timestamps) - emptyValues)
} }
func (sfcp *statsFuncCountProcessor) updateStatsForRow(_ []int64, _ []BlockColumn, _ int) { func (sfcp *statsFuncCountProcessor) updateStatsForRow(_ []int64, columns []BlockColumn, rowIdx int) {
fields := sfcp.sfc.fields
if len(fields) == 0 || slices.Contains(fields, "*") {
// Fast path - count the given column
sfcp.rowsCount++ sfcp.rowsCount++
return
}
// Slow path - count the row at rowIdx if at least a single field enumerated inside count() is non-empty
for _, f := range fields {
if idx := getBlockColumnIndex(columns, f); idx >= 0 && columns[idx].Values[rowIdx] != "" {
sfcp.rowsCount++
return
}
}
} }
func (sfcp *statsFuncCountProcessor) mergeState(sfp statsFuncProcessor) { func (sfcp *statsFuncCountProcessor) mergeState(sfp statsFuncProcessor) {