mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
wip
This commit is contained in:
parent
b21c39a871
commit
68b3bd370e
5 changed files with 343 additions and 0 deletions
|
@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
|
||||||
|
|
||||||
## tip
|
## tip
|
||||||
|
|
||||||
|
* FEATURE: add [`drop_empty_fields` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#drop_empty_fields-pipe) for dropping [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with empty values.
|
||||||
|
|
||||||
## [v0.15.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.15.0-victorialogs)
|
## [v0.15.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.15.0-victorialogs)
|
||||||
|
|
||||||
Released at 2024-05-30
|
Released at 2024-05-30
|
||||||
|
|
|
@ -1155,6 +1155,7 @@ LogsQL supports the following pipes:
|
||||||
|
|
||||||
- [`copy`](#copy-pipe) copies [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`copy`](#copy-pipe) copies [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
- [`delete`](#delete-pipe) deletes [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`delete`](#delete-pipe) deletes [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
|
- [`drop_empty_fields`](#drop_empty_fields-pipe) drops [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with empty values.
|
||||||
- [`extract`](#extract-pipe) extracts the sepcified text into the given log fields.
|
- [`extract`](#extract-pipe) extracts the sepcified text into the given log fields.
|
||||||
- [`extract_regexp`](#extract_regexp-pipe) extracts the sepcified text into the given log fields via [RE2 regular expressions](https://github.com/google/re2/wiki/Syntax).
|
- [`extract_regexp`](#extract_regexp-pipe) extracts the sepcified text into the given log fields via [RE2 regular expressions](https://github.com/google/re2/wiki/Syntax).
|
||||||
- [`field_names`](#field_names-pipe) returns all the names of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`field_names`](#field_names-pipe) returns all the names of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
|
@ -1219,6 +1220,22 @@ See also:
|
||||||
- [`rename` pipe](#rename-pipe)
|
- [`rename` pipe](#rename-pipe)
|
||||||
- [`fields` pipe](#fields-pipe)
|
- [`fields` pipe](#fields-pipe)
|
||||||
|
|
||||||
|
### drop_empty_fields pipe
|
||||||
|
|
||||||
|
`| drop_empty_fields` pipe drops [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with empty values. It also skips log entries with zero non-empty fields.
|
||||||
|
|
||||||
|
For example, the following query drops possible empty `email` field generated by [`extract` pipe](#extract-pipe) if the `foo` field doesn't contain email:
|
||||||
|
|
||||||
|
```logsql
|
||||||
|
_time:5m | extract 'email: <email>,' from foo | drop_empty_fields
|
||||||
|
```
|
||||||
|
|
||||||
|
See also:
|
||||||
|
|
||||||
|
- [`filter` pipe](#filter-pipe)
|
||||||
|
- [`extract` pipe](#extract-pipe)
|
||||||
|
|
||||||
|
|
||||||
### extract pipe
|
### extract pipe
|
||||||
|
|
||||||
`| extract "pattern" from field_name` [pipe](#pipes) allows extracting abitrary text into output fields according to the [`pattern`](#format-for-extract-pipe-pattern) from the given
|
`| extract "pattern" from field_name` [pipe](#pipes) allows extracting abitrary text into output fields according to the [`pattern`](#format-for-extract-pipe-pattern) from the given
|
||||||
|
@ -1295,6 +1312,7 @@ the corresponding matching substring to.
|
||||||
Matching starts from the first occurence of the `text1` in the input text. If the `pattern` starts with `<field1>` and doesn't contain `text1`,
|
Matching starts from the first occurence of the `text1` in the input text. If the `pattern` starts with `<field1>` and doesn't contain `text1`,
|
||||||
then the matching starts from the beginning of the input text. Matching is performed sequentially according to the `pattern`. If some `textX` isn't found
|
then the matching starts from the beginning of the input text. Matching is performed sequentially according to the `pattern`. If some `textX` isn't found
|
||||||
in the remaining input text, then the remaining named placeholders receive empty string values and the matching finishes prematurely.
|
in the remaining input text, then the remaining named placeholders receive empty string values and the matching finishes prematurely.
|
||||||
|
The empty string values can be dropped with [`drop_empty_fields` pipe](#drop_empty_fields-pipe).
|
||||||
|
|
||||||
Matching finishes successfully when `textN+1` is found in the input text.
|
Matching finishes successfully when `textN+1` is found in the input text.
|
||||||
If the `pattern` ends with `<fieldN>` and doesn't contain `textN+1`, then the `<fieldN>` matches the remaining input text.
|
If the `pattern` ends with `<fieldN>` and doesn't contain `textN+1`, then the `<fieldN>` matches the remaining input text.
|
||||||
|
|
|
@ -106,6 +106,12 @@ func parsePipe(lex *lexer) (pipe, error) {
|
||||||
return nil, fmt.Errorf("cannot parse 'delete' pipe: %w", err)
|
return nil, fmt.Errorf("cannot parse 'delete' pipe: %w", err)
|
||||||
}
|
}
|
||||||
return pd, nil
|
return pd, nil
|
||||||
|
case lex.isKeyword("drop_empty_fields"):
|
||||||
|
pd, err := parsePipeDropEmptyFields(lex)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("cannot parse 'drop_empty_fields' pipe: %w", err)
|
||||||
|
}
|
||||||
|
return pd, nil
|
||||||
case lex.isKeyword("extract"):
|
case lex.isKeyword("extract"):
|
||||||
pe, err := parsePipeExtract(lex)
|
pe, err := parsePipeExtract(lex)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
223
lib/logstorage/pipe_drop_empty_fields.go
Normal file
223
lib/logstorage/pipe_drop_empty_fields.go
Normal file
|
@ -0,0 +1,223 @@
|
||||||
|
package logstorage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
// pipeDropEmptyFields processes '| drop_empty_fields ...' pipe.
|
||||||
|
//
|
||||||
|
// See https://docs.victoriametrics.com/victorialogs/logsql/#drop_empty_fields-pipe
|
||||||
|
type pipeDropEmptyFields struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pd *pipeDropEmptyFields) String() string {
|
||||||
|
return "drop_empty_fields"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pd *pipeDropEmptyFields) optimize() {
|
||||||
|
// nothing to do
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pd *pipeDropEmptyFields) hasFilterInWithQuery() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pd *pipeDropEmptyFields) initFilterInValues(_ map[string][]string, _ getFieldValuesFunc) (pipe, error) {
|
||||||
|
return pd, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pd *pipeDropEmptyFields) updateNeededFields(_, _ fieldsSet) {
|
||||||
|
// nothing to do
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pd *pipeDropEmptyFields) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
|
||||||
|
return &pipeDropEmptyFieldsProcessor{
|
||||||
|
ppNext: ppNext,
|
||||||
|
|
||||||
|
shards: make([]pipeDropEmptyFieldsProcessorShard, workersCount),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type pipeDropEmptyFieldsProcessor struct {
|
||||||
|
ppNext pipeProcessor
|
||||||
|
|
||||||
|
shards []pipeDropEmptyFieldsProcessorShard
|
||||||
|
}
|
||||||
|
|
||||||
|
type pipeDropEmptyFieldsProcessorShard struct {
|
||||||
|
pipeDropEmptyFieldsProcessorShardNopad
|
||||||
|
|
||||||
|
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
|
||||||
|
_ [128 - unsafe.Sizeof(pipeDropEmptyFieldsProcessorShardNopad{})%128]byte
|
||||||
|
}
|
||||||
|
|
||||||
|
type pipeDropEmptyFieldsProcessorShardNopad struct {
|
||||||
|
columnValues [][]string
|
||||||
|
fields []Field
|
||||||
|
|
||||||
|
wctx pipeDropEmptyFieldsWriteContext
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pdp *pipeDropEmptyFieldsProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||||
|
if len(br.timestamps) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
shard := &pdp.shards[workerID]
|
||||||
|
|
||||||
|
cs := br.getColumns()
|
||||||
|
|
||||||
|
shard.columnValues = slicesutil.SetLength(shard.columnValues, len(cs))
|
||||||
|
columnValues := shard.columnValues
|
||||||
|
for i, c := range cs {
|
||||||
|
columnValues[i] = c.getValues(br)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !hasEmptyValues(columnValues) {
|
||||||
|
// Fast path - just write br to ppNext, since it has no empty values.
|
||||||
|
pdp.ppNext.writeBlock(workerID, br)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Slow path - drop fields with empty values
|
||||||
|
shard.wctx.init(workerID, pdp.ppNext)
|
||||||
|
|
||||||
|
fields := shard.fields
|
||||||
|
for rowIdx := range br.timestamps {
|
||||||
|
fields = fields[:0]
|
||||||
|
for i, values := range columnValues {
|
||||||
|
v := values[rowIdx]
|
||||||
|
if v == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fields = append(fields, Field{
|
||||||
|
Name: cs[i].name,
|
||||||
|
Value: values[rowIdx],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
shard.wctx.writeRow(fields)
|
||||||
|
}
|
||||||
|
shard.fields = fields
|
||||||
|
|
||||||
|
shard.wctx.flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pdp *pipeDropEmptyFieldsProcessor) flush() error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type pipeDropEmptyFieldsWriteContext struct {
|
||||||
|
workerID uint
|
||||||
|
ppNext pipeProcessor
|
||||||
|
|
||||||
|
rcs []resultColumn
|
||||||
|
br blockResult
|
||||||
|
|
||||||
|
// rowsCount is the number of rows in the current block
|
||||||
|
rowsCount int
|
||||||
|
|
||||||
|
// valuesLen is the total length of values in the current block
|
||||||
|
valuesLen int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wctx *pipeDropEmptyFieldsWriteContext) reset() {
|
||||||
|
wctx.workerID = 0
|
||||||
|
wctx.ppNext = nil
|
||||||
|
|
||||||
|
rcs := wctx.rcs
|
||||||
|
for i := range rcs {
|
||||||
|
rcs[i].reset()
|
||||||
|
}
|
||||||
|
wctx.rcs = rcs[:0]
|
||||||
|
|
||||||
|
wctx.rowsCount = 0
|
||||||
|
wctx.valuesLen = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wctx *pipeDropEmptyFieldsWriteContext) init(workerID uint, ppNext pipeProcessor) {
|
||||||
|
wctx.reset()
|
||||||
|
|
||||||
|
wctx.workerID = workerID
|
||||||
|
wctx.ppNext = ppNext
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wctx *pipeDropEmptyFieldsWriteContext) writeRow(fields []Field) {
|
||||||
|
if len(fields) == 0 {
|
||||||
|
// skip rows without non-empty fields
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rcs := wctx.rcs
|
||||||
|
|
||||||
|
areEqualColumns := len(rcs) == len(fields)
|
||||||
|
if areEqualColumns {
|
||||||
|
for i, f := range fields {
|
||||||
|
if rcs[i].name != f.Name {
|
||||||
|
areEqualColumns = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !areEqualColumns {
|
||||||
|
// send the current block to ppNext and construct a block with new set of columns
|
||||||
|
wctx.flush()
|
||||||
|
|
||||||
|
rcs = wctx.rcs[:0]
|
||||||
|
for _, f := range fields {
|
||||||
|
rcs = appendResultColumnWithName(rcs, f.Name)
|
||||||
|
}
|
||||||
|
wctx.rcs = rcs
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, f := range fields {
|
||||||
|
v := f.Value
|
||||||
|
rcs[i].addValue(v)
|
||||||
|
wctx.valuesLen += len(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
wctx.rowsCount++
|
||||||
|
if wctx.valuesLen >= 1_000_000 {
|
||||||
|
wctx.flush()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wctx *pipeDropEmptyFieldsWriteContext) flush() {
|
||||||
|
rcs := wctx.rcs
|
||||||
|
|
||||||
|
wctx.valuesLen = 0
|
||||||
|
|
||||||
|
// Flush rcs to ppNext
|
||||||
|
br := &wctx.br
|
||||||
|
br.setResultColumns(rcs, wctx.rowsCount)
|
||||||
|
wctx.rowsCount = 0
|
||||||
|
wctx.ppNext.writeBlock(wctx.workerID, br)
|
||||||
|
br.reset()
|
||||||
|
for i := range rcs {
|
||||||
|
rcs[i].resetValues()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func parsePipeDropEmptyFields(lex *lexer) (*pipeDropEmptyFields, error) {
|
||||||
|
if !lex.isKeyword("drop_empty_fields") {
|
||||||
|
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "drop_empty_fields")
|
||||||
|
}
|
||||||
|
lex.nextToken()
|
||||||
|
|
||||||
|
pd := &pipeDropEmptyFields{}
|
||||||
|
|
||||||
|
return pd, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasEmptyValues(columnValues [][]string) bool {
|
||||||
|
for _, values := range columnValues {
|
||||||
|
for _, v := range values {
|
||||||
|
if v == "" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
94
lib/logstorage/pipe_drop_empty_fields_test.go
Normal file
94
lib/logstorage/pipe_drop_empty_fields_test.go
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
package logstorage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParsePipeDropEmptyFieldsSuccess(t *testing.T) {
|
||||||
|
f := func(pipeStr string) {
|
||||||
|
t.Helper()
|
||||||
|
expectParsePipeSuccess(t, pipeStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
f(`drop_empty_fields`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParsePipeDropEmptyFieldsFailure(t *testing.T) {
|
||||||
|
f := func(pipeStr string) {
|
||||||
|
t.Helper()
|
||||||
|
expectParsePipeFailure(t, pipeStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
f(`drop_empty_fields foo`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPipeDropEmptyFields(t *testing.T) {
|
||||||
|
f := func(pipeStr string, rows, rowsExpected [][]Field) {
|
||||||
|
t.Helper()
|
||||||
|
expectPipeResults(t, pipeStr, rows, rowsExpected)
|
||||||
|
}
|
||||||
|
|
||||||
|
f(`drop_empty_fields`, [][]Field{
|
||||||
|
{
|
||||||
|
{"a", "foo"},
|
||||||
|
{"b", "bar"},
|
||||||
|
{"c", "baz"},
|
||||||
|
},
|
||||||
|
}, [][]Field{
|
||||||
|
{
|
||||||
|
{"a", "foo"},
|
||||||
|
{"b", "bar"},
|
||||||
|
{"c", "baz"},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
f(`drop_empty_fields`, [][]Field{
|
||||||
|
{
|
||||||
|
{"a", "foo"},
|
||||||
|
{"b", "bar"},
|
||||||
|
{"c", "baz"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{"a", "foo1"},
|
||||||
|
{"b", ""},
|
||||||
|
{"c", "baz1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{"a", ""},
|
||||||
|
{"b", "bar2"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{"a", ""},
|
||||||
|
{"b", ""},
|
||||||
|
{"c", ""},
|
||||||
|
},
|
||||||
|
}, [][]Field{
|
||||||
|
{
|
||||||
|
{"a", "foo"},
|
||||||
|
{"b", "bar"},
|
||||||
|
{"c", "baz"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{"a", "foo1"},
|
||||||
|
{"c", "baz1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{"b", "bar2"},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPipeDropEmptyFieldsUpdateNeededFields(t *testing.T) {
|
||||||
|
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||||
|
t.Helper()
|
||||||
|
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||||
|
}
|
||||||
|
|
||||||
|
// all the needed fields
|
||||||
|
f(`drop_empty_fields`, "*", "", "*", "")
|
||||||
|
|
||||||
|
// non-empty unneeded fields
|
||||||
|
f(`drop_empty_fields`, "*", "f1,f2", "*", "f1,f2")
|
||||||
|
|
||||||
|
// non-empty needed fields
|
||||||
|
f(`drop_empty_fields`, "x,y", "", "x,y", "")
|
||||||
|
}
|
Loading…
Reference in a new issue