VictoriaMetrics/lib/logstorage/pipe_format.go

282 lines
6.1 KiB
Go
Raw Normal View History

2024-05-22 12:05:32 +00:00
package logstorage
import (
"fmt"
2024-06-04 14:20:02 +00:00
"math"
2024-05-22 15:41:45 +00:00
"strconv"
2024-05-22 12:05:32 +00:00
"unsafe"
2024-05-26 22:58:41 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
2024-05-22 12:05:32 +00:00
)
// pipeFormat processes '| format ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe
type pipeFormat struct {
formatStr string
steps []patternStep
resultField string
2024-05-24 16:31:49 +00:00
keepOriginalFields bool
2024-05-24 20:17:21 +00:00
skipEmptyResults bool
2024-05-24 16:31:49 +00:00
2024-05-22 12:05:32 +00:00
// iff is an optional filter for skipping the format func
iff *ifFilter
}
func (pf *pipeFormat) String() string {
2024-05-22 15:17:59 +00:00
s := "format"
2024-05-22 12:05:32 +00:00
if pf.iff != nil {
s += " " + pf.iff.String()
}
2024-05-22 15:17:59 +00:00
s += " " + quoteTokenIfNeeded(pf.formatStr)
if !isMsgFieldName(pf.resultField) {
s += " as " + quoteTokenIfNeeded(pf.resultField)
}
2024-05-24 16:31:49 +00:00
if pf.keepOriginalFields {
s += " keep_original_fields"
}
2024-05-24 20:17:21 +00:00
if pf.skipEmptyResults {
s += " skip_empty_results"
}
2024-05-22 12:05:32 +00:00
return s
}
func (pf *pipeFormat) updateNeededFields(neededFields, unneededFields fieldsSet) {
2024-05-30 09:44:29 +00:00
if neededFields.isEmpty() {
if pf.iff != nil {
neededFields.addFields(pf.iff.neededFields)
}
return
}
2024-05-22 12:05:32 +00:00
if neededFields.contains("*") {
if !unneededFields.contains(pf.resultField) {
2024-05-24 20:17:21 +00:00
if !pf.keepOriginalFields && !pf.skipEmptyResults {
2024-05-24 16:31:49 +00:00
unneededFields.add(pf.resultField)
}
2024-05-22 13:29:18 +00:00
if pf.iff != nil {
unneededFields.removeFields(pf.iff.neededFields)
}
2024-05-22 12:05:32 +00:00
for _, step := range pf.steps {
if step.field != "" {
unneededFields.remove(step.field)
}
}
}
} else {
if neededFields.contains(pf.resultField) {
2024-05-24 20:17:21 +00:00
if !pf.keepOriginalFields && !pf.skipEmptyResults {
2024-05-24 16:31:49 +00:00
neededFields.remove(pf.resultField)
}
2024-05-22 13:29:18 +00:00
if pf.iff != nil {
neededFields.addFields(pf.iff.neededFields)
}
2024-05-22 12:05:32 +00:00
for _, step := range pf.steps {
if step.field != "" {
neededFields.add(step.field)
}
}
}
}
}
2024-05-25 12:37:26 +00:00
func (pf *pipeFormat) optimize() {
pf.iff.optimizeFilterIn()
}
func (pf *pipeFormat) hasFilterInWithQuery() bool {
return pf.iff.hasFilterInWithQuery()
}
func (pf *pipeFormat) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
iffNew, err := pf.iff.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
}
pfNew := *pf
pfNew.iff = iffNew
return &pfNew, nil
}
2024-05-25 18:13:01 +00:00
func (pf *pipeFormat) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
2024-05-22 12:05:32 +00:00
return &pipeFormatProcessor{
pf: pf,
2024-05-25 18:13:01 +00:00
ppNext: ppNext,
2024-05-22 12:05:32 +00:00
shards: make([]pipeFormatProcessorShard, workersCount),
}
}
type pipeFormatProcessor struct {
pf *pipeFormat
2024-05-25 18:13:01 +00:00
ppNext pipeProcessor
2024-05-22 12:05:32 +00:00
shards []pipeFormatProcessorShard
}
type pipeFormatProcessorShard struct {
pipeFormatProcessorShardNopad
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
_ [128 - unsafe.Sizeof(pipeFormatProcessorShardNopad{})%128]byte
}
type pipeFormatProcessorShardNopad struct {
bm bitmap
2024-05-25 14:09:59 +00:00
a arena
rc resultColumn
2024-05-22 12:05:32 +00:00
}
func (pfp *pipeFormatProcessor) writeBlock(workerID uint, br *blockResult) {
if len(br.timestamps) == 0 {
return
}
shard := &pfp.shards[workerID]
2024-05-25 14:09:59 +00:00
pf := pfp.pf
2024-05-22 12:05:32 +00:00
bm := &shard.bm
bm.init(len(br.timestamps))
bm.setBits()
2024-05-25 14:09:59 +00:00
if iff := pf.iff; iff != nil {
2024-05-22 12:05:32 +00:00
iff.f.applyToBlockResult(br, bm)
if bm.isZero() {
2024-05-25 18:13:01 +00:00
pfp.ppNext.writeBlock(workerID, br)
2024-05-22 12:05:32 +00:00
return
}
}
2024-05-25 14:09:59 +00:00
shard.rc.name = pf.resultField
resultColumn := br.getColumnByName(pf.resultField)
2024-05-22 12:05:32 +00:00
for rowIdx := range br.timestamps {
2024-05-25 14:09:59 +00:00
v := ""
2024-05-22 12:05:32 +00:00
if bm.isSetBit(rowIdx) {
2024-05-25 14:09:59 +00:00
v = shard.formatRow(pf, br, rowIdx)
if v == "" && pf.skipEmptyResults || pf.keepOriginalFields {
if vOrig := resultColumn.getValueAtRow(br, rowIdx); vOrig != "" {
v = vOrig
}
}
2024-05-22 12:05:32 +00:00
} else {
2024-05-25 14:09:59 +00:00
v = resultColumn.getValueAtRow(br, rowIdx)
2024-05-22 12:05:32 +00:00
}
2024-05-25 14:09:59 +00:00
shard.rc.addValue(v)
2024-05-22 12:05:32 +00:00
}
2024-05-25 14:09:59 +00:00
br.addResultColumn(&shard.rc)
2024-05-25 18:13:01 +00:00
pfp.ppNext.writeBlock(workerID, br)
2024-05-25 14:09:59 +00:00
shard.a.reset()
shard.rc.reset()
2024-05-22 12:05:32 +00:00
}
func (pfp *pipeFormatProcessor) flush() error {
return nil
}
2024-05-25 14:09:59 +00:00
func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult, rowIdx int) string {
2024-05-26 22:58:41 +00:00
b := shard.a.b
bLen := len(b)
2024-05-22 12:05:32 +00:00
for _, step := range pf.steps {
b = append(b, step.prefix...)
if step.field != "" {
c := br.getColumnByName(step.field)
v := c.getValueAtRow(br, rowIdx)
2024-06-04 14:20:02 +00:00
switch step.fieldOpt {
case "q":
2024-05-22 15:41:45 +00:00
b = strconv.AppendQuote(b, v)
2024-06-04 14:20:02 +00:00
case "time":
nsecs, ok := tryParseInt64(v)
if !ok {
b = append(b, v...)
continue
}
b = marshalTimestampRFC3339NanoString(b, nsecs)
case "duration":
nsecs, ok := tryParseInt64(v)
if !ok {
b = append(b, v...)
continue
}
b = marshalDurationString(b, nsecs)
case "ipv4":
ipNum, ok := tryParseUint64(v)
if !ok || ipNum > math.MaxUint32 {
b = append(b, v...)
continue
}
b = marshalIPv4String(b, uint32(ipNum))
default:
2024-05-22 15:41:45 +00:00
b = append(b, v...)
}
2024-05-22 12:05:32 +00:00
}
}
2024-05-26 22:58:41 +00:00
shard.a.b = b
2024-05-22 12:05:32 +00:00
2024-05-26 22:58:41 +00:00
return bytesutil.ToUnsafeString(b[bLen:])
2024-05-22 12:05:32 +00:00
}
func parsePipeFormat(lex *lexer) (*pipeFormat, error) {
if !lex.isKeyword("format") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "format")
}
lex.nextToken()
2024-05-22 13:29:18 +00:00
// parse optional if (...)
var iff *ifFilter
if lex.isKeyword("if") {
f, err := parseIfFilter(lex)
if err != nil {
return nil, err
}
iff = f
}
2024-05-22 15:17:59 +00:00
// parse format
formatStr, err := getCompoundToken(lex)
if err != nil {
return nil, fmt.Errorf("cannot read 'format': %w", err)
2024-05-22 12:05:32 +00:00
}
2024-05-22 15:17:59 +00:00
steps, err := parsePatternSteps(formatStr)
2024-05-22 12:05:32 +00:00
if err != nil {
2024-05-22 15:17:59 +00:00
return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", formatStr, err)
}
// parse optional 'as ...` part
resultField := "_msg"
if lex.isKeyword("as") {
lex.nextToken()
field, err := parseFieldName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse result field after 'format %q as': %w", formatStr, err)
}
resultField = field
2024-05-22 12:05:32 +00:00
}
2024-05-24 16:31:49 +00:00
keepOriginalFields := false
2024-05-24 20:17:21 +00:00
skipEmptyResults := false
switch {
case lex.isKeyword("keep_original_fields"):
2024-05-24 16:31:49 +00:00
lex.nextToken()
keepOriginalFields = true
2024-05-24 20:17:21 +00:00
case lex.isKeyword("skip_empty_results"):
lex.nextToken()
skipEmptyResults = true
2024-05-24 16:31:49 +00:00
}
2024-05-22 12:05:32 +00:00
pf := &pipeFormat{
2024-05-24 16:31:49 +00:00
formatStr: formatStr,
steps: steps,
resultField: resultField,
keepOriginalFields: keepOriginalFields,
2024-05-24 20:17:21 +00:00
skipEmptyResults: skipEmptyResults,
2024-05-24 16:31:49 +00:00
iff: iff,
2024-05-22 12:05:32 +00:00
}
return pf, nil
}