This commit is contained in:
Aliaksandr Valialkin 2024-05-22 14:05:32 +02:00
parent fb251af08a
commit 6458b5c138
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
5 changed files with 323 additions and 77 deletions

View file

@ -31,11 +31,47 @@ type patternStep struct {
field string field string
} }
func newPattern(steps []patternStep) *pattern { func (ptn *pattern) clone() *pattern {
if len(steps) == 0 { steps := ptn.steps
logger.Panicf("BUG: steps cannot be empty") fields, matches := newFieldsAndMatchesFromPatternSteps(steps)
if len(fields) == 0 {
logger.Panicf("BUG: fields cannot be empty for steps=%v", steps)
}
return &pattern{
steps: steps,
matches: matches,
fields: fields,
}
}
func parsePattern(s string) (*pattern, error) {
steps, err := parsePatternSteps(s)
if err != nil {
return nil, err
} }
// Verify that prefixes are non-empty between fields. The first prefix may be empty.
for i := 1; i < len(steps); i++ {
if steps[i].prefix == "" {
return nil, fmt.Errorf("missing delimiter between <%s> and <%s>", steps[i-1].field, steps[i].field)
}
}
// Build pattern struct
fields, matches := newFieldsAndMatchesFromPatternSteps(steps)
if len(fields) == 0 {
return nil, fmt.Errorf("pattern %q must contain at least a single named field in the form <field_name>", s)
}
ptn := &pattern{
steps: steps,
matches: matches,
fields: fields,
}
return ptn, nil
}
func newFieldsAndMatchesFromPatternSteps(steps []patternStep) ([]patternField, []string) {
matches := make([]string, len(steps)) matches := make([]string, len(steps))
var fields []patternField var fields []patternField
@ -47,22 +83,14 @@ func newPattern(steps []patternStep) *pattern {
}) })
} }
} }
if len(fields) == 0 {
logger.Panicf("BUG: fields cannot be empty")
}
ef := &pattern{ return fields, matches
steps: steps,
matches: matches,
fields: fields,
}
return ef
} }
func (ef *pattern) apply(s string) { func (ptn *pattern) apply(s string) {
clear(ef.matches) clear(ptn.matches)
steps := ef.steps steps := ptn.steps
if prefix := steps[0].prefix; prefix != "" { if prefix := steps[0].prefix; prefix != "" {
n := strings.Index(s, prefix) n := strings.Index(s, prefix)
@ -73,7 +101,7 @@ func (ef *pattern) apply(s string) {
s = s[n+len(prefix):] s = s[n+len(prefix):]
} }
matches := ef.matches matches := ptn.matches
for i := range steps { for i := range steps {
nextPrefix := "" nextPrefix := ""
if i+1 < len(steps) { if i+1 < len(steps) {
@ -126,13 +154,18 @@ func tryUnquoteString(s string) (string, int) {
} }
func parsePatternSteps(s string) ([]patternStep, error) { func parsePatternSteps(s string) ([]patternStep, error) {
var steps []patternStep if len(s) == 0 {
return nil, nil
}
hasNamedField := false var steps []patternStep
n := strings.IndexByte(s, '<') n := strings.IndexByte(s, '<')
if n < 0 { if n < 0 {
return nil, fmt.Errorf("missing <...> fields") steps = append(steps, patternStep{
prefix: s,
})
return steps, nil
} }
prefix := s[:n] prefix := s[:n]
s = s[n+1:] s = s[n+1:]
@ -151,9 +184,6 @@ func parsePatternSteps(s string) ([]patternStep, error) {
prefix: prefix, prefix: prefix,
field: field, field: field,
}) })
if !hasNamedField && field != "" {
hasNamedField = true
}
if len(s) == 0 { if len(s) == 0 {
break break
} }
@ -165,17 +195,10 @@ func parsePatternSteps(s string) ([]patternStep, error) {
}) })
break break
} }
if n == 0 {
return nil, fmt.Errorf("missing delimiter after <%s>", field)
}
prefix = s[:n] prefix = s[:n]
s = s[n+1:] s = s[n+1:]
} }
if !hasNamedField {
return nil, fmt.Errorf("missing named fields like <name>")
}
for i := range steps { for i := range steps {
step := &steps[i] step := &steps[i]
step.prefix = html.UnescapeString(step.prefix) step.prefix = html.UnescapeString(step.prefix)

View file

@ -6,24 +6,32 @@ import (
) )
func TestPatternApply(t *testing.T) { func TestPatternApply(t *testing.T) {
f := func(pattern, s string, resultsExpected []string) { f := func(patternStr, s string, resultsExpected []string) {
t.Helper() t.Helper()
steps, err := parsePatternSteps(pattern) checkFields := func(ptn *pattern) {
if err != nil { t.Helper()
t.Fatalf("unexpected error: %s", err) if len(ptn.fields) != len(resultsExpected) {
} t.Fatalf("unexpected number of results; got %d; want %d", len(ptn.fields), len(resultsExpected))
ef := newPattern(steps) }
ef.apply(s) for i, f := range ptn.fields {
if v := *f.value; v != resultsExpected[i] {
if len(ef.fields) != len(resultsExpected) { t.Fatalf("unexpected value for field %q; got %q; want %q", f.name, v, resultsExpected[i])
t.Fatalf("unexpected number of results; got %d; want %d", len(ef.fields), len(resultsExpected)) }
}
for i, f := range ef.fields {
if v := *f.value; v != resultsExpected[i] {
t.Fatalf("unexpected value for field %q; got %q; want %q", f.name, v, resultsExpected[i])
} }
} }
ptn, err := parsePattern(patternStr)
if err != nil {
t.Fatalf("cannot parse %q: %s", patternStr, err)
}
ptn.apply(s)
checkFields(ptn)
// clone pattern and check fields again
ptnCopy := ptn.clone()
ptnCopy.apply(s)
checkFields(ptn)
} }
f("<foo>", "", []string{""}) f("<foo>", "", []string{""})
@ -57,6 +65,30 @@ func TestPatternApply(t *testing.T) {
f(`<foo>,"bar`, `"foo,\"bar"`, []string{`foo,"bar`}) f(`<foo>,"bar`, `"foo,\"bar"`, []string{`foo,"bar`})
} }
func TestParsePatternFailure(t *testing.T) {
f := func(patternStr string) {
t.Helper()
ptn, err := parsePattern(patternStr)
if err == nil {
t.Fatalf("expecting error when parsing %q; got %v", patternStr, ptn)
}
}
// Missing named fields
f("")
f("foobar")
f("<>")
f("<>foo<>bar")
// Missing delimiter between fields
f("<foo><bar>")
f("abc<foo><bar>def")
f("abc<foo><bar>")
f("abc<foo><_>")
f("abc<_><_>")
}
func TestParsePatternStepsSuccess(t *testing.T) { func TestParsePatternStepsSuccess(t *testing.T) {
f := func(s string, stepsExpected []patternStep) { f := func(s string, stepsExpected []patternStep) {
t.Helper() t.Helper()
@ -70,6 +102,33 @@ func TestParsePatternStepsSuccess(t *testing.T) {
} }
} }
f("", nil)
f("foobar", []patternStep{
{
prefix: "foobar",
},
})
f("<>", []patternStep{
{},
})
f("foo<>", []patternStep{
{
prefix: "foo",
},
})
f("<foo><bar>", []patternStep{
{
field: "foo",
},
{
field: "bar",
},
})
f("<foo>", []patternStep{ f("<foo>", []patternStep{
{ {
field: "foo", field: "foo",
@ -141,38 +200,19 @@ func TestParsePatternStepsSuccess(t *testing.T) {
prefix: "&gt;", prefix: "&gt;",
}, },
}) })
} }
func TestParsePatternStepsFailure(t *testing.T) { func TestParsePatternStepsFailure(t *testing.T) {
f := func(s string) { f := func(s string) {
t.Helper() t.Helper()
_, err := parsePatternSteps(s) steps, err := parsePatternSteps(s)
if err == nil { if err == nil {
t.Fatalf("expecting non-nil error when parsing %q", s) t.Fatalf("expecting non-nil error when parsing %q; got steps: %v", s, steps)
} }
} }
// empty string
f("")
// zero fields
f("foobar")
// Zero named fields
f("<>")
f("foo<>")
f("<>foo")
f("foo<_>bar<*>baz<>xxx")
// missing delimiter between fields
f("<foo><bar>")
f("<><bar>")
f("<foo><>")
f("bb<foo><><bar>aa")
f("aa<foo><bar>")
f("aa<foo><bar>bb")
// missing > // missing >
f("<foo") f("<foo")
f("foo<bar") f("foo<bar")

View file

@ -51,10 +51,10 @@ func BenchmarkPatternApply(b *testing.B) {
}) })
} }
func benchmarkPatternApply(b *testing.B, pattern string, a []string) { func benchmarkPatternApply(b *testing.B, patternStr string, a []string) {
steps, err := parsePatternSteps(pattern) ptnMain, err := parsePattern(patternStr)
if err != nil { if err != nil {
b.Fatalf("unexpected error: %s", err) b.Fatalf("cannot parse pattern %q: %s", patternStr, err)
} }
n := 0 n := 0
@ -65,12 +65,12 @@ func benchmarkPatternApply(b *testing.B, pattern string, a []string) {
b.ReportAllocs() b.ReportAllocs()
b.SetBytes(int64(n)) b.SetBytes(int64(n))
b.RunParallel(func(pb *testing.PB) { b.RunParallel(func(pb *testing.PB) {
ptn := ptnMain.clone()
sink := 0 sink := 0
ef := newPattern(steps)
for pb.Next() { for pb.Next() {
for _, s := range a { for _, s := range a {
ef.apply(s) ptn.apply(s)
for _, v := range ef.matches { for _, v := range ptn.matches {
sink += len(v) sink += len(v)
} }
} }

View file

@ -9,7 +9,7 @@ import (
// See https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe // See https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe
type pipeExtract struct { type pipeExtract struct {
fromField string fromField string
steps []patternStep ptn *pattern
patternStr string patternStr string
@ -33,7 +33,7 @@ func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet
if neededFields.contains("*") { if neededFields.contains("*") {
unneededFieldsOrig := unneededFields.clone() unneededFieldsOrig := unneededFields.clone()
needFromField := false needFromField := false
for _, step := range pe.steps { for _, step := range pe.ptn.steps {
if step.field != "" { if step.field != "" {
if !unneededFieldsOrig.contains(step.field) { if !unneededFieldsOrig.contains(step.field) {
needFromField = true needFromField = true
@ -52,7 +52,7 @@ func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet
} else { } else {
neededFieldsOrig := neededFields.clone() neededFieldsOrig := neededFields.clone()
needFromField := false needFromField := false
for _, step := range pe.steps { for _, step := range pe.ptn.steps {
if step.field != "" && neededFieldsOrig.contains(step.field) { if step.field != "" && neededFieldsOrig.contains(step.field) {
needFromField = true needFromField = true
neededFields.remove(step.field) neededFields.remove(step.field)
@ -70,7 +70,7 @@ func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet
func (pe *pipeExtract) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pe *pipeExtract) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
patterns := make([]*pattern, workersCount) patterns := make([]*pattern, workersCount)
for i := range patterns { for i := range patterns {
patterns[i] = newPattern(pe.steps) patterns[i] = pe.ptn.clone()
} }
unpackFunc := func(uctx *fieldsUnpackerContext, s string) { unpackFunc := func(uctx *fieldsUnpackerContext, s string) {
@ -105,14 +105,14 @@ func parsePipeExtract(lex *lexer) (*pipeExtract, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot read 'pattern': %w", err) return nil, fmt.Errorf("cannot read 'pattern': %w", err)
} }
steps, err := parsePatternSteps(patternStr) ptn, err := parsePattern(patternStr)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", patternStr, err) return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", patternStr, err)
} }
pe := &pipeExtract{ pe := &pipeExtract{
fromField: fromField, fromField: fromField,
steps: steps, ptn: ptn,
patternStr: patternStr, patternStr: patternStr,
} }

View file

@ -0,0 +1,183 @@
package logstorage
import (
"fmt"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
)
// pipeFormat processes '| format ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe
type pipeFormat struct {
formatStr string
steps []patternStep
resultField string
// iff is an optional filter for skipping the format func
iff *ifFilter
}
func (pf *pipeFormat) String() string {
s := "format" + quoteTokenIfNeeded(pf.formatStr)
if pf.iff != nil {
s += " " + pf.iff.String()
}
s += " as " + quoteTokenIfNeeded(pf.resultField)
return s
}
func (pf *pipeFormat) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.contains("*") {
if !unneededFields.contains(pf.resultField) {
unneededFields.add(pf.resultField)
for _, step := range pf.steps {
if step.field != "" {
unneededFields.remove(step.field)
}
}
}
} else {
if neededFields.contains(pf.resultField) {
neededFields.remove(pf.resultField)
for _, step := range pf.steps {
if step.field != "" {
neededFields.add(step.field)
}
}
}
}
}
func (pf *pipeFormat) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
return &pipeFormatProcessor{
pf: pf,
ppBase: ppBase,
shards: make([]pipeFormatProcessorShard, workersCount),
}
}
type pipeFormatProcessor struct {
pf *pipeFormat
ppBase pipeProcessor
shards []pipeFormatProcessorShard
}
type pipeFormatProcessorShard struct {
pipeFormatProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeFormatProcessorShardNopad{})%128]byte
}
type pipeFormatProcessorShardNopad struct {
bm bitmap
uctx fieldsUnpackerContext
wctx pipeUnpackWriteContext
}
func (pfp *pipeFormatProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &pfp.shards[workerID]
shard.wctx.init(workerID, pfp.ppBase, br)
shard.uctx.init(workerID, "")
bm := &shard.bm
bm.init(len(br.timestamps))
bm.setBits()
if iff := pfp.pf.iff; iff != nil {
iff.f.applyToBlockResult(br, bm)
if bm.isZero() {
pfp.ppBase.writeBlock(workerID, br)
return
}
}
for rowIdx := range br.timestamps {
if bm.isSetBit(rowIdx) {
shard.formatRow(pfp.pf, br, rowIdx)
shard.wctx.writeRow(rowIdx, shard.uctx.fields)
} else {
shard.wctx.writeRow(rowIdx, nil)
}
}
shard.wctx.flush()
shard.wctx.reset()
shard.uctx.reset()
}
func (pfp *pipeFormatProcessor) flush() error {
return nil
}
func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult, rowIdx int) {
bb := bbPool.Get()
b := bb.B
for _, step := range pf.steps {
b = append(b, step.prefix...)
if step.field != "" {
c := br.getColumnByName(step.field)
v := c.getValueAtRow(br, rowIdx)
b = append(b, v...)
}
}
bb.B = b
s := bytesutil.ToUnsafeString(b)
shard.uctx.resetFields()
shard.uctx.addField(pf.resultField, s)
bbPool.Put(bb)
}
func parsePipeFormat(lex *lexer) (*pipeFormat, error) {
if !lex.isKeyword("format") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "format")
}
lex.nextToken()
// parse format
formatStr, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot read 'format': %w", err)
}
steps, err := parsePatternSteps(formatStr)
if err != nil {
return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", formatStr, err)
}
if !lex.isKeyword("as") {
return nil, fmt.Errorf("missing 'as' keyword after 'format %q'", formatStr)
}
lex.nextToken()
resultField, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse result field after 'format %q as': %w", formatStr, err)
}
pf := &pipeFormat{
formatStr: formatStr,
steps: steps,
resultField: resultField,
}
// parse optional if (...)
if lex.isKeyword("if") {
iff, err := parseIfFilter(lex)
if err != nil {
return nil, err
}
pf.iff = iff
}
return pf, nil
}