mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
wip
This commit is contained in:
parent
fb251af08a
commit
6458b5c138
5 changed files with 323 additions and 77 deletions
|
@ -31,11 +31,47 @@ type patternStep struct {
|
|||
field string
|
||||
}
|
||||
|
||||
func newPattern(steps []patternStep) *pattern {
|
||||
if len(steps) == 0 {
|
||||
logger.Panicf("BUG: steps cannot be empty")
|
||||
func (ptn *pattern) clone() *pattern {
|
||||
steps := ptn.steps
|
||||
fields, matches := newFieldsAndMatchesFromPatternSteps(steps)
|
||||
if len(fields) == 0 {
|
||||
logger.Panicf("BUG: fields cannot be empty for steps=%v", steps)
|
||||
}
|
||||
return &pattern{
|
||||
steps: steps,
|
||||
matches: matches,
|
||||
fields: fields,
|
||||
}
|
||||
}
|
||||
|
||||
func parsePattern(s string) (*pattern, error) {
|
||||
steps, err := parsePatternSteps(s)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Verify that prefixes are non-empty between fields. The first prefix may be empty.
|
||||
for i := 1; i < len(steps); i++ {
|
||||
if steps[i].prefix == "" {
|
||||
return nil, fmt.Errorf("missing delimiter between <%s> and <%s>", steps[i-1].field, steps[i].field)
|
||||
}
|
||||
}
|
||||
|
||||
// Build pattern struct
|
||||
fields, matches := newFieldsAndMatchesFromPatternSteps(steps)
|
||||
if len(fields) == 0 {
|
||||
return nil, fmt.Errorf("pattern %q must contain at least a single named field in the form <field_name>", s)
|
||||
}
|
||||
|
||||
ptn := &pattern{
|
||||
steps: steps,
|
||||
matches: matches,
|
||||
fields: fields,
|
||||
}
|
||||
return ptn, nil
|
||||
}
|
||||
|
||||
func newFieldsAndMatchesFromPatternSteps(steps []patternStep) ([]patternField, []string) {
|
||||
matches := make([]string, len(steps))
|
||||
|
||||
var fields []patternField
|
||||
|
@ -47,22 +83,14 @@ func newPattern(steps []patternStep) *pattern {
|
|||
})
|
||||
}
|
||||
}
|
||||
if len(fields) == 0 {
|
||||
logger.Panicf("BUG: fields cannot be empty")
|
||||
|
||||
return fields, matches
|
||||
}
|
||||
|
||||
ef := &pattern{
|
||||
steps: steps,
|
||||
matches: matches,
|
||||
fields: fields,
|
||||
}
|
||||
return ef
|
||||
}
|
||||
func (ptn *pattern) apply(s string) {
|
||||
clear(ptn.matches)
|
||||
|
||||
func (ef *pattern) apply(s string) {
|
||||
clear(ef.matches)
|
||||
|
||||
steps := ef.steps
|
||||
steps := ptn.steps
|
||||
|
||||
if prefix := steps[0].prefix; prefix != "" {
|
||||
n := strings.Index(s, prefix)
|
||||
|
@ -73,7 +101,7 @@ func (ef *pattern) apply(s string) {
|
|||
s = s[n+len(prefix):]
|
||||
}
|
||||
|
||||
matches := ef.matches
|
||||
matches := ptn.matches
|
||||
for i := range steps {
|
||||
nextPrefix := ""
|
||||
if i+1 < len(steps) {
|
||||
|
@ -126,13 +154,18 @@ func tryUnquoteString(s string) (string, int) {
|
|||
}
|
||||
|
||||
func parsePatternSteps(s string) ([]patternStep, error) {
|
||||
var steps []patternStep
|
||||
if len(s) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
hasNamedField := false
|
||||
var steps []patternStep
|
||||
|
||||
n := strings.IndexByte(s, '<')
|
||||
if n < 0 {
|
||||
return nil, fmt.Errorf("missing <...> fields")
|
||||
steps = append(steps, patternStep{
|
||||
prefix: s,
|
||||
})
|
||||
return steps, nil
|
||||
}
|
||||
prefix := s[:n]
|
||||
s = s[n+1:]
|
||||
|
@ -151,9 +184,6 @@ func parsePatternSteps(s string) ([]patternStep, error) {
|
|||
prefix: prefix,
|
||||
field: field,
|
||||
})
|
||||
if !hasNamedField && field != "" {
|
||||
hasNamedField = true
|
||||
}
|
||||
if len(s) == 0 {
|
||||
break
|
||||
}
|
||||
|
@ -165,17 +195,10 @@ func parsePatternSteps(s string) ([]patternStep, error) {
|
|||
})
|
||||
break
|
||||
}
|
||||
if n == 0 {
|
||||
return nil, fmt.Errorf("missing delimiter after <%s>", field)
|
||||
}
|
||||
prefix = s[:n]
|
||||
s = s[n+1:]
|
||||
}
|
||||
|
||||
if !hasNamedField {
|
||||
return nil, fmt.Errorf("missing named fields like <name>")
|
||||
}
|
||||
|
||||
for i := range steps {
|
||||
step := &steps[i]
|
||||
step.prefix = html.UnescapeString(step.prefix)
|
||||
|
|
|
@ -6,26 +6,34 @@ import (
|
|||
)
|
||||
|
||||
func TestPatternApply(t *testing.T) {
|
||||
f := func(pattern, s string, resultsExpected []string) {
|
||||
f := func(patternStr, s string, resultsExpected []string) {
|
||||
t.Helper()
|
||||
|
||||
steps, err := parsePatternSteps(pattern)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
checkFields := func(ptn *pattern) {
|
||||
t.Helper()
|
||||
if len(ptn.fields) != len(resultsExpected) {
|
||||
t.Fatalf("unexpected number of results; got %d; want %d", len(ptn.fields), len(resultsExpected))
|
||||
}
|
||||
ef := newPattern(steps)
|
||||
ef.apply(s)
|
||||
|
||||
if len(ef.fields) != len(resultsExpected) {
|
||||
t.Fatalf("unexpected number of results; got %d; want %d", len(ef.fields), len(resultsExpected))
|
||||
}
|
||||
for i, f := range ef.fields {
|
||||
for i, f := range ptn.fields {
|
||||
if v := *f.value; v != resultsExpected[i] {
|
||||
t.Fatalf("unexpected value for field %q; got %q; want %q", f.name, v, resultsExpected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ptn, err := parsePattern(patternStr)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %q: %s", patternStr, err)
|
||||
}
|
||||
ptn.apply(s)
|
||||
checkFields(ptn)
|
||||
|
||||
// clone pattern and check fields again
|
||||
ptnCopy := ptn.clone()
|
||||
ptnCopy.apply(s)
|
||||
checkFields(ptn)
|
||||
}
|
||||
|
||||
f("<foo>", "", []string{""})
|
||||
f("<foo>", "abc", []string{"abc"})
|
||||
f("<foo>bar", "", []string{""})
|
||||
|
@ -57,6 +65,30 @@ func TestPatternApply(t *testing.T) {
|
|||
f(`<foo>,"bar`, `"foo,\"bar"`, []string{`foo,"bar`})
|
||||
}
|
||||
|
||||
func TestParsePatternFailure(t *testing.T) {
|
||||
f := func(patternStr string) {
|
||||
t.Helper()
|
||||
|
||||
ptn, err := parsePattern(patternStr)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting error when parsing %q; got %v", patternStr, ptn)
|
||||
}
|
||||
}
|
||||
|
||||
// Missing named fields
|
||||
f("")
|
||||
f("foobar")
|
||||
f("<>")
|
||||
f("<>foo<>bar")
|
||||
|
||||
// Missing delimiter between fields
|
||||
f("<foo><bar>")
|
||||
f("abc<foo><bar>def")
|
||||
f("abc<foo><bar>")
|
||||
f("abc<foo><_>")
|
||||
f("abc<_><_>")
|
||||
}
|
||||
|
||||
func TestParsePatternStepsSuccess(t *testing.T) {
|
||||
f := func(s string, stepsExpected []patternStep) {
|
||||
t.Helper()
|
||||
|
@ -70,6 +102,33 @@ func TestParsePatternStepsSuccess(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
f("", nil)
|
||||
|
||||
f("foobar", []patternStep{
|
||||
{
|
||||
prefix: "foobar",
|
||||
},
|
||||
})
|
||||
|
||||
f("<>", []patternStep{
|
||||
{},
|
||||
})
|
||||
|
||||
f("foo<>", []patternStep{
|
||||
{
|
||||
prefix: "foo",
|
||||
},
|
||||
})
|
||||
|
||||
f("<foo><bar>", []patternStep{
|
||||
{
|
||||
field: "foo",
|
||||
},
|
||||
{
|
||||
field: "bar",
|
||||
},
|
||||
})
|
||||
|
||||
f("<foo>", []patternStep{
|
||||
{
|
||||
field: "foo",
|
||||
|
@ -141,38 +200,19 @@ func TestParsePatternStepsSuccess(t *testing.T) {
|
|||
prefix: ">",
|
||||
},
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestParsePatternStepsFailure(t *testing.T) {
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
|
||||
_, err := parsePatternSteps(s)
|
||||
steps, err := parsePatternSteps(s)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error when parsing %q", s)
|
||||
t.Fatalf("expecting non-nil error when parsing %q; got steps: %v", s, steps)
|
||||
}
|
||||
}
|
||||
|
||||
// empty string
|
||||
f("")
|
||||
|
||||
// zero fields
|
||||
f("foobar")
|
||||
|
||||
// Zero named fields
|
||||
f("<>")
|
||||
f("foo<>")
|
||||
f("<>foo")
|
||||
f("foo<_>bar<*>baz<>xxx")
|
||||
|
||||
// missing delimiter between fields
|
||||
f("<foo><bar>")
|
||||
f("<><bar>")
|
||||
f("<foo><>")
|
||||
f("bb<foo><><bar>aa")
|
||||
f("aa<foo><bar>")
|
||||
f("aa<foo><bar>bb")
|
||||
|
||||
// missing >
|
||||
f("<foo")
|
||||
f("foo<bar")
|
||||
|
|
|
@ -51,10 +51,10 @@ func BenchmarkPatternApply(b *testing.B) {
|
|||
})
|
||||
}
|
||||
|
||||
func benchmarkPatternApply(b *testing.B, pattern string, a []string) {
|
||||
steps, err := parsePatternSteps(pattern)
|
||||
func benchmarkPatternApply(b *testing.B, patternStr string, a []string) {
|
||||
ptnMain, err := parsePattern(patternStr)
|
||||
if err != nil {
|
||||
b.Fatalf("unexpected error: %s", err)
|
||||
b.Fatalf("cannot parse pattern %q: %s", patternStr, err)
|
||||
}
|
||||
|
||||
n := 0
|
||||
|
@ -65,12 +65,12 @@ func benchmarkPatternApply(b *testing.B, pattern string, a []string) {
|
|||
b.ReportAllocs()
|
||||
b.SetBytes(int64(n))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
ptn := ptnMain.clone()
|
||||
sink := 0
|
||||
ef := newPattern(steps)
|
||||
for pb.Next() {
|
||||
for _, s := range a {
|
||||
ef.apply(s)
|
||||
for _, v := range ef.matches {
|
||||
ptn.apply(s)
|
||||
for _, v := range ptn.matches {
|
||||
sink += len(v)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@ import (
|
|||
// See https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe
|
||||
type pipeExtract struct {
|
||||
fromField string
|
||||
steps []patternStep
|
||||
ptn *pattern
|
||||
|
||||
patternStr string
|
||||
|
||||
|
@ -33,7 +33,7 @@ func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet
|
|||
if neededFields.contains("*") {
|
||||
unneededFieldsOrig := unneededFields.clone()
|
||||
needFromField := false
|
||||
for _, step := range pe.steps {
|
||||
for _, step := range pe.ptn.steps {
|
||||
if step.field != "" {
|
||||
if !unneededFieldsOrig.contains(step.field) {
|
||||
needFromField = true
|
||||
|
@ -52,7 +52,7 @@ func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet
|
|||
} else {
|
||||
neededFieldsOrig := neededFields.clone()
|
||||
needFromField := false
|
||||
for _, step := range pe.steps {
|
||||
for _, step := range pe.ptn.steps {
|
||||
if step.field != "" && neededFieldsOrig.contains(step.field) {
|
||||
needFromField = true
|
||||
neededFields.remove(step.field)
|
||||
|
@ -70,7 +70,7 @@ func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet
|
|||
func (pe *pipeExtract) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
|
||||
patterns := make([]*pattern, workersCount)
|
||||
for i := range patterns {
|
||||
patterns[i] = newPattern(pe.steps)
|
||||
patterns[i] = pe.ptn.clone()
|
||||
}
|
||||
|
||||
unpackFunc := func(uctx *fieldsUnpackerContext, s string) {
|
||||
|
@ -105,14 +105,14 @@ func parsePipeExtract(lex *lexer) (*pipeExtract, error) {
|
|||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read 'pattern': %w", err)
|
||||
}
|
||||
steps, err := parsePatternSteps(patternStr)
|
||||
ptn, err := parsePattern(patternStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", patternStr, err)
|
||||
}
|
||||
|
||||
pe := &pipeExtract{
|
||||
fromField: fromField,
|
||||
steps: steps,
|
||||
ptn: ptn,
|
||||
patternStr: patternStr,
|
||||
}
|
||||
|
||||
|
|
183
lib/logstorage/pipe_format.go
Normal file
183
lib/logstorage/pipe_format.go
Normal file
|
@ -0,0 +1,183 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
)
|
||||
|
||||
// pipeFormat processes '| format ...' pipe.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe
|
||||
type pipeFormat struct {
|
||||
formatStr string
|
||||
steps []patternStep
|
||||
|
||||
resultField string
|
||||
|
||||
// iff is an optional filter for skipping the format func
|
||||
iff *ifFilter
|
||||
}
|
||||
|
||||
func (pf *pipeFormat) String() string {
|
||||
s := "format" + quoteTokenIfNeeded(pf.formatStr)
|
||||
if pf.iff != nil {
|
||||
s += " " + pf.iff.String()
|
||||
}
|
||||
s += " as " + quoteTokenIfNeeded(pf.resultField)
|
||||
return s
|
||||
}
|
||||
|
||||
func (pf *pipeFormat) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
||||
if neededFields.contains("*") {
|
||||
if !unneededFields.contains(pf.resultField) {
|
||||
unneededFields.add(pf.resultField)
|
||||
for _, step := range pf.steps {
|
||||
if step.field != "" {
|
||||
unneededFields.remove(step.field)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if neededFields.contains(pf.resultField) {
|
||||
neededFields.remove(pf.resultField)
|
||||
for _, step := range pf.steps {
|
||||
if step.field != "" {
|
||||
neededFields.add(step.field)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (pf *pipeFormat) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
|
||||
return &pipeFormatProcessor{
|
||||
pf: pf,
|
||||
ppBase: ppBase,
|
||||
|
||||
shards: make([]pipeFormatProcessorShard, workersCount),
|
||||
}
|
||||
}
|
||||
|
||||
type pipeFormatProcessor struct {
|
||||
pf *pipeFormat
|
||||
ppBase pipeProcessor
|
||||
|
||||
shards []pipeFormatProcessorShard
|
||||
}
|
||||
|
||||
type pipeFormatProcessorShard struct {
|
||||
pipeFormatProcessorShardNopad
|
||||
|
||||
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
|
||||
_ [128 - unsafe.Sizeof(pipeFormatProcessorShardNopad{})%128]byte
|
||||
}
|
||||
|
||||
type pipeFormatProcessorShardNopad struct {
|
||||
bm bitmap
|
||||
|
||||
uctx fieldsUnpackerContext
|
||||
wctx pipeUnpackWriteContext
|
||||
}
|
||||
|
||||
func (pfp *pipeFormatProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
if len(br.timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
shard := &pfp.shards[workerID]
|
||||
shard.wctx.init(workerID, pfp.ppBase, br)
|
||||
shard.uctx.init(workerID, "")
|
||||
|
||||
bm := &shard.bm
|
||||
bm.init(len(br.timestamps))
|
||||
bm.setBits()
|
||||
if iff := pfp.pf.iff; iff != nil {
|
||||
iff.f.applyToBlockResult(br, bm)
|
||||
if bm.isZero() {
|
||||
pfp.ppBase.writeBlock(workerID, br)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for rowIdx := range br.timestamps {
|
||||
if bm.isSetBit(rowIdx) {
|
||||
shard.formatRow(pfp.pf, br, rowIdx)
|
||||
shard.wctx.writeRow(rowIdx, shard.uctx.fields)
|
||||
} else {
|
||||
shard.wctx.writeRow(rowIdx, nil)
|
||||
}
|
||||
}
|
||||
|
||||
shard.wctx.flush()
|
||||
shard.wctx.reset()
|
||||
shard.uctx.reset()
|
||||
}
|
||||
|
||||
func (pfp *pipeFormatProcessor) flush() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult, rowIdx int) {
|
||||
bb := bbPool.Get()
|
||||
b := bb.B
|
||||
for _, step := range pf.steps {
|
||||
b = append(b, step.prefix...)
|
||||
if step.field != "" {
|
||||
c := br.getColumnByName(step.field)
|
||||
v := c.getValueAtRow(br, rowIdx)
|
||||
b = append(b, v...)
|
||||
}
|
||||
}
|
||||
bb.B = b
|
||||
|
||||
s := bytesutil.ToUnsafeString(b)
|
||||
shard.uctx.resetFields()
|
||||
shard.uctx.addField(pf.resultField, s)
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func parsePipeFormat(lex *lexer) (*pipeFormat, error) {
|
||||
if !lex.isKeyword("format") {
|
||||
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "format")
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
// parse format
|
||||
formatStr, err := getCompoundToken(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read 'format': %w", err)
|
||||
}
|
||||
steps, err := parsePatternSteps(formatStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", formatStr, err)
|
||||
}
|
||||
|
||||
if !lex.isKeyword("as") {
|
||||
return nil, fmt.Errorf("missing 'as' keyword after 'format %q'", formatStr)
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
resultField, err := parseFieldName(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse result field after 'format %q as': %w", formatStr, err)
|
||||
}
|
||||
|
||||
pf := &pipeFormat{
|
||||
formatStr: formatStr,
|
||||
steps: steps,
|
||||
resultField: resultField,
|
||||
}
|
||||
|
||||
// parse optional if (...)
|
||||
if lex.isKeyword("if") {
|
||||
iff, err := parseIfFilter(lex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pf.iff = iff
|
||||
}
|
||||
|
||||
return pf, nil
|
||||
}
|
Loading…
Reference in a new issue