This commit is contained in:
Aliaksandr Valialkin 2024-05-20 02:41:03 +02:00
parent aafd3c28b5
commit 2b59220864
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
7 changed files with 162 additions and 151 deletions

View file

@ -20,7 +20,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logjson"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter" "github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
@ -210,8 +209,8 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
return false, fmt.Errorf(`missing log message after the "create" or "index" command`) return false, fmt.Errorf(`missing log message after the "create" or "index" command`)
} }
line = sc.Bytes() line = sc.Bytes()
p := logjson.GetParser() p := logstorage.GetJSONParser()
if err := p.ParseLogMessage(line); err != nil { if err := p.ParseLogMessage(line, ""); err != nil {
return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err) return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err)
} }
@ -224,7 +223,7 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
} }
p.RenameField(msgField, "_msg") p.RenameField(msgField, "_msg")
processLogMessage(ts, p.Fields) processLogMessage(ts, p.Fields)
logjson.PutParser(p) logstorage.PutJSONParser(p)
return true, nil return true, nil
} }

View file

@ -12,7 +12,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logjson"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter" "github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
@ -105,8 +104,8 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage f
line = sc.Bytes() line = sc.Bytes()
} }
p := logjson.GetParser() p := logstorage.GetJSONParser()
if err := p.ParseLogMessage(line); err != nil { if err := p.ParseLogMessage(line, ""); err != nil {
return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err) return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err)
} }
ts, err := extractTimestampFromFields(timeField, p.Fields) ts, err := extractTimestampFromFields(timeField, p.Fields)
@ -118,7 +117,7 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage f
} }
p.RenameField(msgField, "_msg") p.RenameField(msgField, "_msg")
processLogMessage(ts, p.Fields) processLogMessage(ts, p.Fields)
logjson.PutParser(p) logstorage.PutJSONParser(p)
return true, nil return true, nil
} }

View file

@ -1,71 +0,0 @@
package logjson
import (
"reflect"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
)
func TestParserFailure(t *testing.T) {
f := func(data string) {
t.Helper()
p := GetParser()
err := p.ParseLogMessage([]byte(data))
if err == nil {
t.Fatalf("expecting non-nil error")
}
PutParser(p)
}
f("")
f("{foo")
f("[1,2,3]")
f(`{"foo",}`)
}
func TestParserSuccess(t *testing.T) {
f := func(data string, fieldsExpected []logstorage.Field) {
t.Helper()
p := GetParser()
err := p.ParseLogMessage([]byte(data))
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !reflect.DeepEqual(p.Fields, fieldsExpected) {
t.Fatalf("unexpected fields;\ngot\n%s\nwant\n%s", p.Fields, fieldsExpected)
}
PutParser(p)
}
f("{}", nil)
f(`{"foo":"bar"}`, []logstorage.Field{
{
Name: "foo",
Value: "bar",
},
})
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, []logstorage.Field{
{
Name: "foo.bar",
Value: "baz",
},
{
Name: "a",
Value: "1",
},
{
Name: "b",
Value: "true",
},
{
Name: "c",
Value: "[1,2]",
},
{
Name: "d",
Value: "false",
},
})
}

View file

@ -1,4 +1,4 @@
package logjson package logstorage
import ( import (
"fmt" "fmt"
@ -6,21 +6,20 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/valyala/fastjson" "github.com/valyala/fastjson"
) )
// Parser parses a single JSON log message into Fields. // JSONParser parses a single JSON log message into Fields.
// //
// See https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model // See https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model
// //
// Use GetParser() for obtaining the parser. // Use GetParser() for obtaining the parser.
type Parser struct { type JSONParser struct {
// Fields contains the parsed JSON line after Parse() call // Fields contains the parsed JSON line after Parse() call
// //
// The Fields are valid until the next call to ParseLogMessage() // The Fields are valid until the next call to ParseLogMessage()
// or until the parser is returned to the pool with PutParser() call. // or until the parser is returned to the pool with PutParser() call.
Fields []logstorage.Field Fields []Field
// p is used for fast JSON parsing // p is used for fast JSON parsing
p fastjson.Parser p fastjson.Parser
@ -33,59 +32,72 @@ type Parser struct {
prefixBuf []byte prefixBuf []byte
} }
func (p *Parser) reset() { func (p *JSONParser) reset() {
fields := p.Fields clear(p.Fields)
for i := range fields { p.Fields = p.Fields[:0]
lf := &fields[i]
lf.Name = ""
lf.Value = ""
}
p.Fields = fields[:0]
p.buf = p.buf[:0] p.buf = p.buf[:0]
p.prefixBuf = p.prefixBuf[:0] p.prefixBuf = p.prefixBuf[:0]
} }
// GetParser returns Parser ready to parse JSON lines. // GetJSONParser returns JSONParser ready to parse JSON lines.
// //
// Return the parser to the pool when it is no longer needed by calling PutParser(). // Return the parser to the pool when it is no longer needed by calling PutJSONParser().
func GetParser() *Parser { func GetJSONParser() *JSONParser {
v := parserPool.Get() v := parserPool.Get()
if v == nil { if v == nil {
return &Parser{} return &JSONParser{}
} }
return v.(*Parser) return v.(*JSONParser)
} }
// PutParser returns the parser to the pool. // PutJSONParser returns the parser to the pool.
// //
// The parser cannot be used after returning to the pool. // The parser cannot be used after returning to the pool.
func PutParser(p *Parser) { func PutJSONParser(p *JSONParser) {
p.reset() p.reset()
parserPool.Put(p) parserPool.Put(p)
} }
var parserPool sync.Pool var parserPool sync.Pool
// ParseLogMessageNoResetBuf parses the given JSON log message msg into p.Fields.
//
// It adds the given prefix to all the parsed field names.
//
// The p.Fields remains valid until the next call to PutJSONParser().
func (p *JSONParser) ParseLogMessageNoResetBuf(msg, prefix string) error {
return p.parseLogMessage(msg, prefix, false)
}
// ParseLogMessage parses the given JSON log message msg into p.Fields. // ParseLogMessage parses the given JSON log message msg into p.Fields.
// //
// The p.Fields remains valid until the next call to ParseLogMessage() or PutParser(). // It adds the given prefix to all the parsed field names.
func (p *Parser) ParseLogMessage(msg []byte) error { //
s := bytesutil.ToUnsafeString(msg) // The p.Fields remains valid until the next call to ParseLogMessage() or PutJSONParser().
v, err := p.p.Parse(s) func (p *JSONParser) ParseLogMessage(msg []byte, prefix string) error {
msgStr := bytesutil.ToUnsafeString(msg)
return p.parseLogMessage(msgStr, prefix, true)
}
func (p *JSONParser) parseLogMessage(msg, prefix string, resetBuf bool) error {
v, err := p.p.Parse(msg)
if err != nil { if err != nil {
return fmt.Errorf("cannot parse json: %w", err) return fmt.Errorf("cannot parse json: %w", err)
} }
if t := v.Type(); t != fastjson.TypeObject { if t := v.Type(); t != fastjson.TypeObject {
return fmt.Errorf("expecting json dictionary; got %s", t) return fmt.Errorf("expecting json dictionary; got %s", t)
} }
if resetBuf {
p.reset() p.reset()
}
p.prefixBuf = append(p.prefixBuf[:0], prefix...)
p.Fields, p.buf, p.prefixBuf = appendLogFields(p.Fields, p.buf, p.prefixBuf, v) p.Fields, p.buf, p.prefixBuf = appendLogFields(p.Fields, p.buf, p.prefixBuf, v)
return nil return nil
} }
// RenameField renames field with the oldName to newName in p.Fields // RenameField renames field with the oldName to newName in p.Fields
func (p *Parser) RenameField(oldName, newName string) { func (p *JSONParser) RenameField(oldName, newName string) {
if oldName == "" { if oldName == "" {
return return
} }
@ -99,7 +111,7 @@ func (p *Parser) RenameField(oldName, newName string) {
} }
} }
func appendLogFields(dst []logstorage.Field, dstBuf, prefixBuf []byte, v *fastjson.Value) ([]logstorage.Field, []byte, []byte) { func appendLogFields(dst []Field, dstBuf, prefixBuf []byte, v *fastjson.Value) ([]Field, []byte, []byte) {
o := v.GetObject() o := v.GetObject()
o.Visit(func(k []byte, v *fastjson.Value) { o.Visit(func(k []byte, v *fastjson.Value) {
t := v.Type() t := v.Type()
@ -133,13 +145,13 @@ func appendLogFields(dst []logstorage.Field, dstBuf, prefixBuf []byte, v *fastjs
return dst, dstBuf, prefixBuf return dst, dstBuf, prefixBuf
} }
func appendLogField(dst []logstorage.Field, dstBuf, prefixBuf, k, value []byte) ([]logstorage.Field, []byte) { func appendLogField(dst []Field, dstBuf, prefixBuf, k, value []byte) ([]Field, []byte) {
dstBufLen := len(dstBuf) dstBufLen := len(dstBuf)
dstBuf = append(dstBuf, prefixBuf...) dstBuf = append(dstBuf, prefixBuf...)
dstBuf = append(dstBuf, k...) dstBuf = append(dstBuf, k...)
name := dstBuf[dstBufLen:] name := dstBuf[dstBufLen:]
dst = append(dst, logstorage.Field{ dst = append(dst, Field{
Name: bytesutil.ToUnsafeString(name), Name: bytesutil.ToUnsafeString(name),
Value: bytesutil.ToUnsafeString(value), Value: bytesutil.ToUnsafeString(value),
}) })

View file

@ -0,0 +1,97 @@
package logstorage
import (
"reflect"
"testing"
)
func TestJSONParserFailure(t *testing.T) {
f := func(data string) {
t.Helper()
p := GetJSONParser()
err := p.ParseLogMessage([]byte(data), "")
if err == nil {
t.Fatalf("expecting non-nil error")
}
PutJSONParser(p)
}
f("")
f("{foo")
f("[1,2,3]")
f(`{"foo",}`)
}
func TestJSONParserSuccess(t *testing.T) {
f := func(data, prefix string, fieldsExpected []Field) {
t.Helper()
p := GetJSONParser()
err := p.ParseLogMessage([]byte(data), prefix)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !reflect.DeepEqual(p.Fields, fieldsExpected) {
t.Fatalf("unexpected fields;\ngot\n%s\nwant\n%s", p.Fields, fieldsExpected)
}
PutJSONParser(p)
}
f("{}", "", nil)
f(`{"foo":"bar"}`, "", []Field{
{
Name: "foo",
Value: "bar",
},
})
f(`{"foo":"bar"}`, "prefix_", []Field{
{
Name: "prefix_foo",
Value: "bar",
},
})
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, "", []Field{
{
Name: "foo.bar",
Value: "baz",
},
{
Name: "a",
Value: "1",
},
{
Name: "b",
Value: "true",
},
{
Name: "c",
Value: "[1,2]",
},
{
Name: "d",
Value: "false",
},
})
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, "prefix_", []Field{
{
Name: "prefix_foo.bar",
Value: "baz",
},
{
Name: "prefix_a",
Value: "1",
},
{
Name: "prefix_b",
Value: "true",
},
{
Name: "prefix_c",
Value: "[1,2]",
},
{
Name: "prefix_d",
Value: "false",
},
})
}

View file

@ -3,10 +3,6 @@ package logstorage
import ( import (
"fmt" "fmt"
"unsafe" "unsafe"
"github.com/valyala/fastjson"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
) )
// pipeUnpackJSON processes '| unpack_json ...' pipe. // pipeUnpackJSON processes '| unpack_json ...' pipe.
@ -64,9 +60,7 @@ type pipeUnpackJSONProcessorShard struct {
} }
type pipeUnpackJSONProcessorShardNopad struct { type pipeUnpackJSONProcessorShardNopad struct {
jsonParser fastjson.Parser jsonParser JSONParser
jsonFields []Field
jsonValuesBuf []byte
rcs []resultColumn rcs []resultColumn
br blockResult br blockResult
@ -134,38 +128,16 @@ func (shard *pipeUnpackJSONProcessorShard) flush(ppBase pipeProcessor) {
} }
} }
func (shard *pipeUnpackJSONProcessorShard) parseJSONFields(resultPrefix, v string) { func (shard *pipeUnpackJSONProcessorShard) parseJSON(v, resultPrefix string) []Field {
clear(shard.jsonFields) if len(v) == 0 || v[0] != '{' {
shard.jsonFields = shard.jsonFields[:0] // This isn't a JSON object
shard.jsonValuesBuf = shard.jsonValuesBuf[:0] return nil
jsv, err := shard.jsonParser.Parse(v)
if err != nil {
return
} }
jso := jsv.GetObject() if err := shard.jsonParser.ParseLogMessageNoResetBuf(v, resultPrefix); err != nil {
buf := shard.jsonValuesBuf // Cannot parse v
jso.Visit(func(k []byte, v *fastjson.Value) { return nil
var bv []byte
if v.Type() == fastjson.TypeString {
bv = v.GetStringBytes()
} else {
bufLen := len(buf)
buf = v.MarshalTo(buf)
bv = buf[bufLen:]
} }
if resultPrefix != "" { return shard.jsonParser.Fields
bufLen := len(buf)
buf = append(buf, resultPrefix...)
buf = append(buf, k...)
k = buf[bufLen:]
}
shard.jsonFields = append(shard.jsonFields, Field{
Name: bytesutil.ToUnsafeString(k),
Value: bytesutil.ToUnsafeString(bv),
})
})
shard.jsonValuesBuf = buf
} }
func (pup *pipeUnpackJSONProcessor) writeBlock(workerID uint, br *blockResult) { func (pup *pipeUnpackJSONProcessor) writeBlock(workerID uint, br *blockResult) {
@ -180,21 +152,23 @@ func (pup *pipeUnpackJSONProcessor) writeBlock(workerID uint, br *blockResult) {
c := br.getColumnByName(pup.pu.fromField) c := br.getColumnByName(pup.pu.fromField)
if c.isConst { if c.isConst {
v := c.valuesEncoded[0] v := c.valuesEncoded[0]
shard.parseJSONFields(resultPrefix, v) extraFields := shard.parseJSON(v, resultPrefix)
for rowIdx := range br.timestamps { for rowIdx := range br.timestamps {
shard.writeRow(pup.ppBase, br, cs, rowIdx, shard.jsonFields) shard.writeRow(pup.ppBase, br, cs, rowIdx, extraFields)
} }
} else { } else {
values := c.getValues(br) values := c.getValues(br)
var extraFields []Field
for i, v := range values { for i, v := range values {
if i == 0 || values[i-1] != v { if i == 0 || values[i-1] != v {
shard.parseJSONFields(resultPrefix, v) extraFields = shard.parseJSON(v, resultPrefix)
} }
shard.writeRow(pup.ppBase, br, cs, i, shard.jsonFields) shard.writeRow(pup.ppBase, br, cs, i, extraFields)
} }
} }
shard.flush(pup.ppBase) shard.flush(pup.ppBase)
shard.jsonParser.reset()
} }
func (pup *pipeUnpackJSONProcessor) flush() error { func (pup *pipeUnpackJSONProcessor) flush() error {

View file

@ -91,7 +91,7 @@ func TestPipeUnpackJSON(t *testing.T) {
// single row, unpack from named field // single row, unpack from named field
f("unpack_json from x", [][]Field{ f("unpack_json from x", [][]Field{
{ {
{"x", `{"foo":"bar","baz":"xyz","a":123,"b":["foo","bar"],"x":NaN}`}, {"x", `{"foo":"bar","baz":"xyz","a":123,"b":["foo","bar"],"x":NaN,"y":{"z":{"a":"b"}}}`},
}, },
}, [][]Field{ }, [][]Field{
{ {
@ -100,6 +100,7 @@ func TestPipeUnpackJSON(t *testing.T) {
{"baz", "xyz"}, {"baz", "xyz"},
{"a", "123"}, {"a", "123"},
{"b", `["foo","bar"]`}, {"b", `["foo","bar"]`},
{"y.z.a", "b"},
}, },
}) })