wip

2024-12-31 15:06:26 +00:00 · 2024-05-19 15:11:17 +02:00 · 2024-05-19 15:11:17 +02:00 · fde91c8f97
commit fde91c8f97
parent 38b9213a0c
4 changed files with 96 additions and 18 deletions
--- a/lib/logstorage/parser_test.go
+++ b/lib/logstorage/parser_test.go
@ -1544,6 +1544,16 @@ func TestQueryGetNeededColumns(t *testing.T) {
 	f(`* | fields x,y | field_names as bar | fields baz`, `x,y`, ``)
 	f(`* | rm x,y | field_names as bar | fields baz`, `*`, `x,y`)

+	f(`* | extract from s1 "<f1>x<f2>"`, `*`, ``)
+	f(`* | extract from s1 "<f1>x<f2>" | fields foo`, `foo`, ``)
+	f(`* | extract from s1 "<f1>x<f2>" | fields foo,s1`, `foo,s1`, ``)
+	f(`* | extract from s1 "<f1>x<f2>" | fields foo,f1`, `foo,s1`, ``)
+	f(`* | extract from s1 "<f1>x<f2>" | fields foo,f1,f2`, `foo,s1`, ``)
+	f(`* | extract from s1 "<f1>x<f2>" | rm foo`, `*`, `foo`)
+	f(`* | extract from s1 "<f1>x<f2>" | rm foo,s1`, `*`, `foo`)
+	f(`* | extract from s1 "<f1>x<f2>" | rm foo,f1`, `*`, `foo`)
+	f(`* | extract from s1 "<f1>x<f2>" | rm foo,f1,f2`, `*`, `foo,s1`)
+
 	f(`* | rm f1, f2`, `*`, `f1,f2`)
 	f(`* | rm f1, f2 | mv f2 f3`, `*`, `f1,f2,f3`)
 	f(`* | rm f1, f2 | cp f2 f3`, `*`, `f1,f2,f3`)
--- a/lib/logstorage/pipe_extract.go
+++ b/lib/logstorage/pipe_extract.go
@ -10,14 +10,14 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 )

-// pipeExtract processes '| extract (field, format)' pipe.
+// pipeExtract processes '| extract from <field> <pattern>' pipe.
 //
 // See https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe
 type pipeExtract struct {
 	fromField string
 	steps     []extractFormatStep

-	format string
+	pattern string
 }

 func (pe *pipeExtract) String() string {
@ -25,16 +25,39 @@ func (pe *pipeExtract) String() string {
 	if !isMsgFieldName(pe.fromField) {
 		s += " from " + quoteTokenIfNeeded(pe.fromField)
 	}
-	s += " " + quoteTokenIfNeeded(pe.format)
+	s += " " + quoteTokenIfNeeded(pe.pattern)
 	return s
 }

 func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet) {
-	neededFields.add(pe.fromField)
-
-	for _, step := range pe.steps {
-		if step.field != "" {
-			unneededFields.remove(step.field)
+	if neededFields.contains("*") {
+		needFromField := false
+		for _, step := range pe.steps {
+			if step.field != "" {
+				if !unneededFields.contains(step.field) {
+					needFromField = true
+				} else {
+					unneededFields.remove(step.field)
+				}
+			}
+		}
+		if needFromField {
+			unneededFields.remove(pe.fromField)
+		} else {
+			unneededFields.add(pe.fromField)
+		}
+	} else {
+		needFromField := false
+		for _, step := range pe.steps {
+			if step.field != "" {
+				if neededFields.contains(step.field) {
+					needFromField = true
+					neededFields.remove(step.field)
+				}
+			}
+		}
+		if needFromField {
+			neededFields.add(pe.fromField)
 		}
 	}
 }
@ -146,19 +169,19 @@ func parsePipeExtract(lex *lexer) (*pipeExtract, error) {
 		fromField = f
 	}

-	format, err := getCompoundToken(lex)
+	pattern, err := getCompoundToken(lex)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read 'format': %w", err)
+		return nil, fmt.Errorf("cannot read 'pattern': %w", err)
 	}
-	steps, err := parseExtractFormatSteps(format)
+	steps, err := parseExtractFormatSteps(pattern)
 	if err != nil {
-		return nil, fmt.Errorf("cannot parse 'format' %q: %w", format, err)
+		return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", pattern, err)
 	}

 	pe := &pipeExtract{
 		fromField: fromField,
 		steps:     steps,
-		format:    format,
+		pattern:   pattern,
 	}
 	return pe, nil
 }
--- a/lib/logstorage/pipe_extract_test.go
+++ b/lib/logstorage/pipe_extract_test.go
@ -6,10 +6,10 @@ import (
 )

 func TestExtractFormatApply(t *testing.T) {
-	f := func(format, s string, resultsExpected []string) {
+	f := func(pattern, s string, resultsExpected []string) {
 		t.Helper()

-		steps, err := parseExtractFormatSteps(format)
+		steps, err := parseExtractFormatSteps(pattern)
 		if err != nil {
 			t.Fatalf("unexpected error: %s", err)
 		}
@ -45,7 +45,7 @@ func TestExtractFormatApply(t *testing.T) {

 	f("ip=<ip> <> path=<path> ", "x=a, ip=1.2.3.4 method=GET host='abc' path=/foo/bar some tail here", []string{"1.2.3.4", "/foo/bar"})

-	// escaped format
+	// escaped pattern
 	f("ip=&lt;<ip>&gt;", "foo ip=<1.2.3.4> bar", []string{"1.2.3.4"})
 	f("ip=&lt;<ip>&gt;", "foo ip=<foo&amp;bar> bar", []string{"foo&amp;bar"})

@ -177,3 +177,48 @@ func TestParseExtractFormatStepFailure(t *testing.T) {
 	f("<foo")
 	f("foo<bar")
 }
+
+func TestPipeExtractUpdateNeededFields(t *testing.T) {
+	f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
+		t.Helper()
+
+		nfs := newTestFieldsSet(neededFields)
+		unfs := newTestFieldsSet(unneededFields)
+
+		lex := newLexer(s)
+		p, err := parsePipeExtract(lex)
+		if err != nil {
+			t.Fatalf("cannot parse %s: %s", s, err)
+		}
+		p.updateNeededFields(nfs, unfs)
+
+		assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
+	}
+
+	// all the needed fields
+	f("extract from x '<foo>'", "*", "", "*", "")
+
+	// all the needed fields, unneeded fields do not intersect with fromField and output fields
+	f("extract from x '<foo>'", "*", "f1,f2", "*", "f1,f2")
+
+	// all the needed fields, unneeded fields intersect with fromField
+	f("extract from x '<foo>'", "*", "f2,x", "*", "f2")
+
+	// all the needed fields, unneeded fields intersect with output fields
+	f("extract from x '<foo>x<bar>'", "*", "f2,foo", "*", "f2")
+
+	// all the needed fields, unneeded fields intersect with all the output fields
+	f("extract from x '<foo>x<bar>'", "*", "f2,foo,bar", "*", "f2,x")
+
+	// needed fields do not intersect with fromField and output fields
+	f("extract from x '<foo>x<bar>'", "f1,f2", "", "f1,f2", "")
+
+	// needed fields intersect with fromField
+	f("extract from x '<foo>x<bar>'", "f2,x", "", "f2,x", "")
+
+	// needed fields intersect with output fields
+	f("extract from x '<foo>x<bar>'", "f2,foo", "", "f2,x", "")
+
+	// needed fields intersect with fromField and output fields
+	f("extract from x '<foo>x<bar>'", "f2,foo,x,y", "", "f2,x,y", "")
+}
--- a/lib/logstorage/pipe_extract_timing_test.go
+++ b/lib/logstorage/pipe_extract_timing_test.go
@ -51,8 +51,8 @@ func BenchmarkExtractFormatApply(b *testing.B) {
 	})
 }

-func benchmarkExtractFormatApply(b *testing.B, format string, a []string) {
-	steps, err := parseExtractFormatSteps(format)
+func benchmarkExtractFormatApply(b *testing.B, pattern string, a []string) {
+	steps, err := parseExtractFormatSteps(pattern)
 	if err != nil {
 		b.Fatalf("unexpected error: %s", err)
 	}