mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
wip
This commit is contained in:
parent
640b18cd66
commit
9f8dd1ef31
4 changed files with 61 additions and 15 deletions
|
@ -19,8 +19,10 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
|
|||
|
||||
## tip
|
||||
|
||||
* FEATURE: return all the log fields by default in query results. Previously only [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields), [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields were returned by default.
|
||||
* FEATURE: add support for returning only the requested log [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields).
|
||||
* FEATURE: add support for calculating the number of matching logs and the number of logs with non-empty [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Grouping by arbitrary set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) is supported. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stats) for details.
|
||||
* FEATURE: optimize performance for [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/), which contains multiple filters for [words](https://docs.victoriametrics.com/victorialogs/logsql/#word-filter) or [phrases](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter) delimited with [`AND` operator](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter). For example, `foo AND bar` query must find [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with `foo` and `bar` words at faster speed.
|
||||
* FEATURE: return all the log fields by default in query results. Previously only [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields), [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields were returned by default. If only some fields must be returned, then they can be listed in `| fields ...` section as described in [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields).
|
||||
|
||||
* BUGFIX: prevent from additional CPU usage for up to a few seconds after canceling the query.
|
||||
* BUGFIX: prevent from returning log entries with emtpy `_stream` field in the form `"_stream":""` in [search query results](https://docs.victoriametrics.com/victorialogs/querying/). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6042).
|
||||
|
|
|
@ -1050,14 +1050,18 @@ See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) fo
|
|||
|
||||
## Stats
|
||||
|
||||
It is possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands
|
||||
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
|
||||
LogsQL supports calculating the following stats:
|
||||
|
||||
LogsQL will support calculating the following stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||
- The number of matching log entries. Examples:
|
||||
- `error | stats count() as errors_total` returns the number of log messages containing the `error` [word](#word).
|
||||
- `error | stats by (_stream) count() as errors_by_stream` returns the number of log messages containing the `error` [word](#word)
|
||||
grouped by [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields).
|
||||
- `error | stats by (datacenter, namespace) count(trace_id, user_id) as errors_with_trace_and_user` returns the number of log messages containing the `error` [word](#word),
|
||||
which contain non-empty `trace_id` or `user_id` [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model), grouped by `datacenter` and `namespace` fields.
|
||||
|
||||
LogsQL will support calculating the following additional stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||
and fields created by [transformations](#transformations):
|
||||
|
||||
- The number of selected logs via `query | stats count() as total` syntax.
|
||||
- The number of non-empty values for the given field.
|
||||
- The number of unique values for the given field.
|
||||
- The min, max, avg, and sum for the given field.
|
||||
- The median and [percentile](https://en.wikipedia.org/wiki/Percentile) for the given field.
|
||||
|
@ -1068,6 +1072,9 @@ For example, `sumIf(response_size, is_admin:true)` calculates the total response
|
|||
It will be possible to group stats by the specified [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||
and by the specified time buckets.
|
||||
|
||||
It is possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands
|
||||
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
|
||||
|
||||
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
|
||||
|
||||
## Sorting
|
||||
|
@ -1097,7 +1104,7 @@ See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) fo
|
|||
|
||||
By default VictoriaLogs query response contains all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
|
||||
If you want selecting some specific fields, then add `| fields field1, field2, ... fieldN` to the end of the query.
|
||||
If you want selecting some specific fields, then add `| fields field1, field2, ... fieldN` to the query.
|
||||
For example, the following query returns only [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field),
|
||||
[`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields), `host` and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields:
|
||||
|
||||
|
|
|
@ -815,8 +815,8 @@ func TestParseQuerySuccess(t *testing.T) {
|
|||
|
||||
// stats count pipe
|
||||
f(`* | Stats count() AS foo`, `* | stats count() as foo`)
|
||||
f(`* | STATS bY (foo, b.a/r, "b az") count(*) as XYz`, `* | stats by (foo, "b.a/r", "b az") count() as XYz`)
|
||||
f(`* | stats by() count(x, 'a).b,c|d') as qwert`, `* | stats count(x, "a).b,c|d") as qwert`)
|
||||
f(`* | STATS bY (foo, b.a/r, "b az") count(*) as XYz`, `* | stats by (foo, "b.a/r", "b az") count(*) as XYz`)
|
||||
f(`* | stats by() COUNT(x, 'a).b,c|d') as qwert`, `* | stats count(x, "a).b,c|d") as qwert`)
|
||||
}
|
||||
|
||||
func TestParseQueryFailure(t *testing.T) {
|
||||
|
|
|
@ -474,8 +474,7 @@ type statsFuncCount struct {
|
|||
}
|
||||
|
||||
func (sfc *statsFuncCount) String() string {
|
||||
fields := getFieldsIgnoreStar(sfc.fields)
|
||||
return "count(" + fieldNamesString(fields) + ") as " + quoteTokenIfNeeded(sfc.resultName)
|
||||
return "count(" + fieldNamesString(sfc.fields) + ") as " + quoteTokenIfNeeded(sfc.resultName)
|
||||
}
|
||||
|
||||
func (sfc *statsFuncCount) newStatsFuncProcessor() statsFuncProcessor {
|
||||
|
@ -493,12 +492,50 @@ type statsFuncCountProcessor struct {
|
|||
rowsCount uint64
|
||||
}
|
||||
|
||||
func (sfcp *statsFuncCountProcessor) updateStatsForAllRows(timestamps []int64, _ []BlockColumn) {
|
||||
func (sfcp *statsFuncCountProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) {
|
||||
fields := sfcp.sfc.fields
|
||||
if len(fields) == 0 || slices.Contains(fields, "*") {
|
||||
// Fast path - count all the columns.
|
||||
sfcp.rowsCount += uint64(len(timestamps))
|
||||
return
|
||||
}
|
||||
|
||||
// Slow path - count rows containing at least a single non-empty value for the fields enumerated inside count().
|
||||
bm := getFilterBitmap(len(timestamps))
|
||||
bm.setBits()
|
||||
for _, f := range fields {
|
||||
if idx := getBlockColumnIndex(columns, f); idx >= 0 {
|
||||
values := columns[idx].Values
|
||||
bm.forEachSetBit(func(i int) bool {
|
||||
return values[i] == ""
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
emptyValues := 0
|
||||
bm.forEachSetBit(func(i int) bool {
|
||||
emptyValues++
|
||||
return true
|
||||
})
|
||||
|
||||
sfcp.rowsCount += uint64(len(timestamps) - emptyValues)
|
||||
}
|
||||
|
||||
func (sfcp *statsFuncCountProcessor) updateStatsForRow(_ []int64, _ []BlockColumn, _ int) {
|
||||
func (sfcp *statsFuncCountProcessor) updateStatsForRow(_ []int64, columns []BlockColumn, rowIdx int) {
|
||||
fields := sfcp.sfc.fields
|
||||
if len(fields) == 0 || slices.Contains(fields, "*") {
|
||||
// Fast path - count the given column
|
||||
sfcp.rowsCount++
|
||||
return
|
||||
}
|
||||
|
||||
// Slow path - count the row at rowIdx if at least a single field enumerated inside count() is non-empty
|
||||
for _, f := range fields {
|
||||
if idx := getBlockColumnIndex(columns, f); idx >= 0 && columns[idx].Values[rowIdx] != "" {
|
||||
sfcp.rowsCount++
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (sfcp *statsFuncCountProcessor) mergeState(sfp statsFuncProcessor) {
|
||||
|
|
Loading…
Reference in a new issue