diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go index a4a2a170b..a15545ede 100644 --- a/lib/logstorage/parser_test.go +++ b/lib/logstorage/parser_test.go @@ -1544,6 +1544,16 @@ func TestQueryGetNeededColumns(t *testing.T) { f(`* | fields x,y | field_names as bar | fields baz`, `x,y`, ``) f(`* | rm x,y | field_names as bar | fields baz`, `*`, `x,y`) + f(`* | extract from s1 "x"`, `*`, ``) + f(`* | extract from s1 "x" | fields foo`, `foo`, ``) + f(`* | extract from s1 "x" | fields foo,s1`, `foo,s1`, ``) + f(`* | extract from s1 "x" | fields foo,f1`, `foo,s1`, ``) + f(`* | extract from s1 "x" | fields foo,f1,f2`, `foo,s1`, ``) + f(`* | extract from s1 "x" | rm foo`, `*`, `foo`) + f(`* | extract from s1 "x" | rm foo,s1`, `*`, `foo`) + f(`* | extract from s1 "x" | rm foo,f1`, `*`, `foo`) + f(`* | extract from s1 "x" | rm foo,f1,f2`, `*`, `foo,s1`) + f(`* | rm f1, f2`, `*`, `f1,f2`) f(`* | rm f1, f2 | mv f2 f3`, `*`, `f1,f2,f3`) f(`* | rm f1, f2 | cp f2 f3`, `*`, `f1,f2,f3`) diff --git a/lib/logstorage/pipe_extract.go b/lib/logstorage/pipe_extract.go index 9501f9fcc..541688b6b 100644 --- a/lib/logstorage/pipe_extract.go +++ b/lib/logstorage/pipe_extract.go @@ -10,14 +10,14 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) -// pipeExtract processes '| extract (field, format)' pipe. +// pipeExtract processes '| extract from ' pipe. // // See https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe type pipeExtract struct { fromField string steps []extractFormatStep - format string + pattern string } func (pe *pipeExtract) String() string { @@ -25,16 +25,39 @@ func (pe *pipeExtract) String() string { if !isMsgFieldName(pe.fromField) { s += " from " + quoteTokenIfNeeded(pe.fromField) } - s += " " + quoteTokenIfNeeded(pe.format) + s += " " + quoteTokenIfNeeded(pe.pattern) return s } func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet) { - neededFields.add(pe.fromField) - - for _, step := range pe.steps { - if step.field != "" { - unneededFields.remove(step.field) + if neededFields.contains("*") { + needFromField := false + for _, step := range pe.steps { + if step.field != "" { + if !unneededFields.contains(step.field) { + needFromField = true + } else { + unneededFields.remove(step.field) + } + } + } + if needFromField { + unneededFields.remove(pe.fromField) + } else { + unneededFields.add(pe.fromField) + } + } else { + needFromField := false + for _, step := range pe.steps { + if step.field != "" { + if neededFields.contains(step.field) { + needFromField = true + neededFields.remove(step.field) + } + } + } + if needFromField { + neededFields.add(pe.fromField) } } } @@ -146,19 +169,19 @@ func parsePipeExtract(lex *lexer) (*pipeExtract, error) { fromField = f } - format, err := getCompoundToken(lex) + pattern, err := getCompoundToken(lex) if err != nil { - return nil, fmt.Errorf("cannot read 'format': %w", err) + return nil, fmt.Errorf("cannot read 'pattern': %w", err) } - steps, err := parseExtractFormatSteps(format) + steps, err := parseExtractFormatSteps(pattern) if err != nil { - return nil, fmt.Errorf("cannot parse 'format' %q: %w", format, err) + return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", pattern, err) } pe := &pipeExtract{ fromField: fromField, steps: steps, - format: format, + pattern: pattern, } return pe, nil } diff --git a/lib/logstorage/pipe_extract_test.go b/lib/logstorage/pipe_extract_test.go index 49255dee0..8d0d05d1a 100644 --- a/lib/logstorage/pipe_extract_test.go +++ b/lib/logstorage/pipe_extract_test.go @@ -6,10 +6,10 @@ import ( ) func TestExtractFormatApply(t *testing.T) { - f := func(format, s string, resultsExpected []string) { + f := func(pattern, s string, resultsExpected []string) { t.Helper() - steps, err := parseExtractFormatSteps(format) + steps, err := parseExtractFormatSteps(pattern) if err != nil { t.Fatalf("unexpected error: %s", err) } @@ -45,7 +45,7 @@ func TestExtractFormatApply(t *testing.T) { f("ip= <> path= ", "x=a, ip=1.2.3.4 method=GET host='abc' path=/foo/bar some tail here", []string{"1.2.3.4", "/foo/bar"}) - // escaped format + // escaped pattern f("ip=<>", "foo ip=<1.2.3.4> bar", []string{"1.2.3.4"}) f("ip=<>", "foo ip= bar", []string{"foo&bar"}) @@ -177,3 +177,48 @@ func TestParseExtractFormatStepFailure(t *testing.T) { f("'", "*", "", "*", "") + + // all the needed fields, unneeded fields do not intersect with fromField and output fields + f("extract from x ''", "*", "f1,f2", "*", "f1,f2") + + // all the needed fields, unneeded fields intersect with fromField + f("extract from x ''", "*", "f2,x", "*", "f2") + + // all the needed fields, unneeded fields intersect with output fields + f("extract from x 'x'", "*", "f2,foo", "*", "f2") + + // all the needed fields, unneeded fields intersect with all the output fields + f("extract from x 'x'", "*", "f2,foo,bar", "*", "f2,x") + + // needed fields do not intersect with fromField and output fields + f("extract from x 'x'", "f1,f2", "", "f1,f2", "") + + // needed fields intersect with fromField + f("extract from x 'x'", "f2,x", "", "f2,x", "") + + // needed fields intersect with output fields + f("extract from x 'x'", "f2,foo", "", "f2,x", "") + + // needed fields intersect with fromField and output fields + f("extract from x 'x'", "f2,foo,x,y", "", "f2,x,y", "") +} diff --git a/lib/logstorage/pipe_extract_timing_test.go b/lib/logstorage/pipe_extract_timing_test.go index ea2dea926..89e63f3bc 100644 --- a/lib/logstorage/pipe_extract_timing_test.go +++ b/lib/logstorage/pipe_extract_timing_test.go @@ -51,8 +51,8 @@ func BenchmarkExtractFormatApply(b *testing.B) { }) } -func benchmarkExtractFormatApply(b *testing.B, format string, a []string) { - steps, err := parseExtractFormatSteps(format) +func benchmarkExtractFormatApply(b *testing.B, pattern string, a []string) { + steps, err := parseExtractFormatSteps(pattern) if err != nil { b.Fatalf("unexpected error: %s", err) }