From a686c7dd74e79d6bfdbe1d8d8432c01e34f529bc Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 23 May 2024 13:58:30 +0200 Subject: [PATCH] wip --- lib/logstorage/pattern.go | 85 +++++++++++++++++++--------------- lib/logstorage/pattern_test.go | 13 ++++-- lib/logstorage/pipe_format.go | 2 +- 3 files changed, 56 insertions(+), 44 deletions(-) diff --git a/lib/logstorage/pattern.go b/lib/logstorage/pattern.go index 416cb4c69..af538feaf 100644 --- a/lib/logstorage/pattern.go +++ b/lib/logstorage/pattern.go @@ -5,8 +5,6 @@ import ( "html" "strconv" "strings" - - "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) // pattern represents text pattern in the form 'some_textother_text...' @@ -28,18 +26,25 @@ type patternField struct { type patternStep struct { prefix string - field string - opt string + + field string + fieldOpt string } func (ptn *pattern) clone() *pattern { - steps := ptn.steps - fields, matches := newFieldsAndMatchesFromPatternSteps(steps) - if len(fields) == 0 { - logger.Panicf("BUG: fields cannot be empty for steps=%v", steps) + matches := make([]string, len(ptn.steps)) + var fields []patternField + for i, step := range ptn.steps { + if step.field != "" { + fields = append(fields, patternField{ + name: step.field, + value: &matches[i], + }) + } } + return &pattern{ - steps: steps, + steps: ptn.steps, matches: matches, fields: fields, } @@ -59,7 +64,18 @@ func parsePattern(s string) (*pattern, error) { } // Build pattern struct - fields, matches := newFieldsAndMatchesFromPatternSteps(steps) + + matches := make([]string, len(steps)) + + var fields []patternField + for i, step := range steps { + if step.field != "" { + fields = append(fields, patternField{ + name: step.field, + value: &matches[i], + }) + } + } if len(fields) == 0 { return nil, fmt.Errorf("pattern %q must contain at least a single named field in the form ", s) } @@ -72,35 +88,17 @@ func parsePattern(s string) (*pattern, error) { return ptn, nil } -func newFieldsAndMatchesFromPatternSteps(steps []patternStep) ([]patternField, []string) { - matches := make([]string, len(steps)) - - var fields []patternField - for i, step := range steps { - if step.field != "" { - fields = append(fields, patternField{ - name: step.field, - value: &matches[i], - }) - } - } - - return fields, matches -} - func (ptn *pattern) apply(s string) { clear(ptn.matches) steps := ptn.steps - if prefix := steps[0].prefix; prefix != "" { - n := strings.Index(s, prefix) - if n < 0 { - // Mismatch - return - } - s = s[n+len(prefix):] + n, prefixLen := prefixIndex(s, steps[0].prefix) + if n < 0 { + // Mismatch + return } + s = s[n+prefixLen:] matches := ptn.matches for i := range steps { @@ -109,7 +107,7 @@ func (ptn *pattern) apply(s string) { nextPrefix = steps[i+1].prefix } - us, nOffset := tryUnquoteString(s, steps[i].opt) + us, nOffset := tryUnquoteString(s, steps[i].fieldOpt) if nOffset >= 0 { // Matched quoted string matches[i] = us @@ -125,17 +123,28 @@ func (ptn *pattern) apply(s string) { matches[i] = s return } - n := strings.Index(s, nextPrefix) + n, prefixLen := prefixIndex(s, nextPrefix) if n < 0 { // Mismatch return } matches[i] = s[:n] - s = s[n+len(nextPrefix):] + s = s[n+prefixLen:] } } } +func prefixIndex(s, prefix string) (int, int) { + if len(prefix) == 0 { + return 0, 0 + } + n := strings.Index(s, prefix) + if n < 0 { + return -1, 0 + } + return n, len(prefix) +} + func tryUnquoteString(s, opt string) (string, int) { if opt == "plain" { return "", -1 @@ -163,7 +172,7 @@ func parsePatternSteps(s string) ([]patternStep, error) { return nil, err } - // Unescape prefixes + // unescape prefixes for i := range steps { step := &steps[i] step.prefix = html.UnescapeString(step.prefix) @@ -174,7 +183,7 @@ func parsePatternSteps(s string) ([]patternStep, error) { step := &steps[i] field := step.field if n := strings.IndexByte(field, ':'); n >= 0 { - step.opt = strings.TrimSpace(field[:n]) + step.fieldOpt = strings.TrimSpace(field[:n]) field = field[n+1:] } step.field = strings.TrimSpace(field) diff --git a/lib/logstorage/pattern_test.go b/lib/logstorage/pattern_test.go index 3dff47543..626dda743 100644 --- a/lib/logstorage/pattern_test.go +++ b/lib/logstorage/pattern_test.go @@ -63,6 +63,9 @@ func TestPatternApply(t *testing.T) { f(`foo= `, "foo=`bar baz,abc` def", []string{"bar baz,abc"}) f(``, `"foo,\"bar"`, []string{`foo,"bar`}) f(`,"bar`, `"foo,\"bar"`, []string{`foo,"bar`}) + + // disable automatic unquoting of quoted field + f(`[]`, `["foo","bar"]`, []string{`"foo","bar"`}) } func TestParsePatternFailure(t *testing.T) { @@ -207,13 +210,13 @@ func TestParsePatternStepsSuccess(t *testing.T) { }) f("< q : foo >barf<:foo:bar:baz>", []patternStep{ { - field: "foo", - opt: "q", + field: "foo", + fieldOpt: "q", }, { - prefix: "bar", - field: "baz:c:y", - opt: "plain", + prefix: "bar", + field: "baz:c:y", + fieldOpt: "plain", }, { prefix: "f", diff --git a/lib/logstorage/pipe_format.go b/lib/logstorage/pipe_format.go index 459400c4d..1fe18b5ba 100644 --- a/lib/logstorage/pipe_format.go +++ b/lib/logstorage/pipe_format.go @@ -137,7 +137,7 @@ func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult if step.field != "" { c := br.getColumnByName(step.field) v := c.getValueAtRow(br, rowIdx) - if step.opt == "q" { + if step.fieldOpt == "q" { b = strconv.AppendQuote(b, v) } else { b = append(b, v...)