2024-05-12 14:33:29 +00:00
|
|
|
package logstorage
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
)
|
|
|
|
|
|
|
|
type pipe interface {
|
|
|
|
// String returns string representation of the pipe.
|
|
|
|
String() string
|
|
|
|
|
2024-06-27 12:18:42 +00:00
|
|
|
// canLiveTail must return true if the given pipe can be used in live tailing
|
|
|
|
//
|
|
|
|
// See https://docs.victoriametrics.com/victorialogs/querying/#live-tailing
|
|
|
|
canLiveTail() bool
|
|
|
|
|
2024-05-12 14:33:29 +00:00
|
|
|
// updateNeededFields must update neededFields and unneededFields with fields it needs and not needs at the input.
|
|
|
|
updateNeededFields(neededFields, unneededFields fieldsSet)
|
|
|
|
|
2024-05-25 19:36:16 +00:00
|
|
|
// newPipeProcessor must return new pipeProcessor, which writes data to the given ppNext.
|
2024-05-12 14:33:29 +00:00
|
|
|
//
|
|
|
|
// workersCount is the number of goroutine workers, which will call writeBlock() method.
|
|
|
|
//
|
|
|
|
// If stopCh is closed, the returned pipeProcessor must stop performing CPU-intensive tasks which take more than a few milliseconds.
|
|
|
|
// It is OK to continue processing pipeProcessor calls if they take less than a few milliseconds.
|
|
|
|
//
|
2024-05-25 19:36:16 +00:00
|
|
|
// The returned pipeProcessor may call cancel() at any time in order to notify the caller to stop sending new data to it.
|
|
|
|
newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor
|
|
|
|
|
|
|
|
// optimize must optimize the pipe
|
|
|
|
optimize()
|
|
|
|
|
|
|
|
// hasFilterInWithQuery must return true of pipe contains 'in(subquery)' filter (recursively).
|
|
|
|
hasFilterInWithQuery() bool
|
|
|
|
|
|
|
|
// initFilterInValues must return new pipe with the initialized values for 'in(subquery)' filters (recursively).
|
|
|
|
//
|
|
|
|
// It is OK to return the pipe itself if it doesn't contain 'in(subquery)' filters.
|
|
|
|
initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (pipe, error)
|
2024-05-12 14:33:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// pipeProcessor must process a single pipe.
|
|
|
|
type pipeProcessor interface {
|
|
|
|
// writeBlock must write the given block of data to the given pipeProcessor.
|
|
|
|
//
|
|
|
|
// writeBlock is called concurrently from worker goroutines.
|
|
|
|
// The workerID is the id of the worker goroutine, which calls the writeBlock.
|
|
|
|
// It is in the range 0 ... workersCount-1 .
|
|
|
|
//
|
|
|
|
// It is OK to modify br contents inside writeBlock. The caller mustn't rely on br contents after writeBlock call.
|
|
|
|
// It is forbidden to hold references to br after returning from writeBlock, since the caller may re-use it.
|
|
|
|
//
|
|
|
|
// If any error occurs at writeBlock, then cancel() must be called by pipeProcessor in order to notify worker goroutines
|
|
|
|
// to stop sending new data. The occurred error must be returned from flush().
|
|
|
|
//
|
|
|
|
// cancel() may be called also when the pipeProcessor decides to stop accepting new data, even if there is no any error.
|
|
|
|
writeBlock(workerID uint, br *blockResult)
|
|
|
|
|
2024-05-25 19:36:16 +00:00
|
|
|
// flush must flush all the data accumulated in the pipeProcessor to the next pipeProcessor.
|
2024-05-12 14:33:29 +00:00
|
|
|
//
|
|
|
|
// flush is called after all the worker goroutines are stopped.
|
|
|
|
//
|
|
|
|
// It is guaranteed that flush() is called for every pipeProcessor returned from pipe.newPipeProcessor().
|
|
|
|
flush() error
|
|
|
|
}
|
|
|
|
|
|
|
|
type defaultPipeProcessor func(workerID uint, br *blockResult)
|
|
|
|
|
|
|
|
func newDefaultPipeProcessor(writeBlock func(workerID uint, br *blockResult)) pipeProcessor {
|
|
|
|
return defaultPipeProcessor(writeBlock)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dpp defaultPipeProcessor) writeBlock(workerID uint, br *blockResult) {
|
|
|
|
dpp(workerID, br)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dpp defaultPipeProcessor) flush() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func parsePipes(lex *lexer) ([]pipe, error) {
|
|
|
|
var pipes []pipe
|
2024-05-22 19:01:20 +00:00
|
|
|
for {
|
2024-05-20 02:08:30 +00:00
|
|
|
p, err := parsePipe(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2024-05-12 14:33:29 +00:00
|
|
|
}
|
2024-05-20 02:08:30 +00:00
|
|
|
pipes = append(pipes, p)
|
2024-05-22 19:01:20 +00:00
|
|
|
|
|
|
|
switch {
|
|
|
|
case lex.isKeyword("|"):
|
|
|
|
lex.nextToken()
|
|
|
|
case lex.isKeyword(")", ""):
|
|
|
|
return pipes, nil
|
2024-05-28 17:29:41 +00:00
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("unexpected token after [%s]: %q; expecting '|' or ')'", pipes[len(pipes)-1], lex.token)
|
2024-05-22 19:01:20 +00:00
|
|
|
}
|
2024-05-12 14:33:29 +00:00
|
|
|
}
|
|
|
|
}
|
2024-05-20 02:08:30 +00:00
|
|
|
|
|
|
|
func parsePipe(lex *lexer) (pipe, error) {
|
|
|
|
switch {
|
|
|
|
case lex.isKeyword("copy", "cp"):
|
|
|
|
pc, err := parsePipeCopy(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'copy' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pc, nil
|
2024-05-28 17:29:41 +00:00
|
|
|
case lex.isKeyword("delete", "del", "rm", "drop"):
|
2024-05-20 02:08:30 +00:00
|
|
|
pd, err := parsePipeDelete(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'delete' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pd, nil
|
2024-06-03 22:59:25 +00:00
|
|
|
case lex.isKeyword("drop_empty_fields"):
|
|
|
|
pd, err := parsePipeDropEmptyFields(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'drop_empty_fields' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pd, nil
|
2024-05-20 02:08:30 +00:00
|
|
|
case lex.isKeyword("extract"):
|
|
|
|
pe, err := parsePipeExtract(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'extract' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pe, nil
|
2024-05-28 17:29:41 +00:00
|
|
|
case lex.isKeyword("extract_regexp"):
|
|
|
|
pe, err := parsePipeExtractRegexp(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'extract_regexp' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pe, nil
|
2024-05-20 02:08:30 +00:00
|
|
|
case lex.isKeyword("field_names"):
|
|
|
|
pf, err := parsePipeFieldNames(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'field_names' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pf, nil
|
2024-05-28 17:29:41 +00:00
|
|
|
case lex.isKeyword("field_values"):
|
|
|
|
pf, err := parsePipeFieldValues(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot pase 'field_values' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pf, nil
|
2024-05-22 19:01:20 +00:00
|
|
|
case lex.isKeyword("fields", "keep"):
|
2024-05-20 02:08:30 +00:00
|
|
|
pf, err := parsePipeFields(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'fields' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pf, nil
|
2024-06-10 16:42:19 +00:00
|
|
|
case lex.isKeyword("filter", "where"):
|
2024-05-28 17:29:41 +00:00
|
|
|
pf, err := parsePipeFilter(lex, true)
|
2024-05-20 02:08:30 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'filter' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pf, nil
|
2024-05-22 19:01:20 +00:00
|
|
|
case lex.isKeyword("format"):
|
|
|
|
pf, err := parsePipeFormat(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'format' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pf, nil
|
2024-05-20 02:08:30 +00:00
|
|
|
case lex.isKeyword("limit", "head"):
|
|
|
|
pl, err := parsePipeLimit(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'limit' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pl, nil
|
2024-06-05 08:07:49 +00:00
|
|
|
case lex.isKeyword("math", "eval"):
|
2024-05-28 17:29:41 +00:00
|
|
|
pm, err := parsePipeMath(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'math' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pm, nil
|
2024-05-20 02:08:30 +00:00
|
|
|
case lex.isKeyword("offset", "skip"):
|
|
|
|
ps, err := parsePipeOffset(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'offset' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return ps, nil
|
2024-05-25 19:36:16 +00:00
|
|
|
case lex.isKeyword("pack_json"):
|
|
|
|
pp, err := parsePackJSON(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'pack_json' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pp, nil
|
2024-06-05 01:18:12 +00:00
|
|
|
case lex.isKeyword("pack_logfmt"):
|
|
|
|
pp, err := parsePackLogfmt(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'pack_logfmt' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pp, nil
|
2024-05-20 02:08:30 +00:00
|
|
|
case lex.isKeyword("rename", "mv"):
|
|
|
|
pr, err := parsePipeRename(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'rename' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pr, nil
|
2024-05-24 22:30:58 +00:00
|
|
|
case lex.isKeyword("replace"):
|
|
|
|
pr, err := parsePipeReplace(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'replace' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pr, nil
|
2024-05-25 19:36:16 +00:00
|
|
|
case lex.isKeyword("replace_regexp"):
|
|
|
|
pr, err := parsePipeReplaceRegexp(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'replace_regexp' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pr, nil
|
2024-05-20 02:08:30 +00:00
|
|
|
case lex.isKeyword("sort"):
|
|
|
|
ps, err := parsePipeSort(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'sort' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return ps, nil
|
|
|
|
case lex.isKeyword("stats"):
|
2024-05-28 17:29:41 +00:00
|
|
|
ps, err := parsePipeStats(lex, true)
|
2024-05-20 02:08:30 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'stats' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return ps, nil
|
2024-06-17 10:13:18 +00:00
|
|
|
case lex.isKeyword("top"):
|
|
|
|
pt, err := parsePipeTop(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'top' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pt, nil
|
2024-05-20 02:08:30 +00:00
|
|
|
case lex.isKeyword("uniq"):
|
|
|
|
pu, err := parsePipeUniq(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'uniq' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pu, nil
|
|
|
|
case lex.isKeyword("unpack_json"):
|
|
|
|
pu, err := parsePipeUnpackJSON(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'unpack_json' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pu, nil
|
|
|
|
case lex.isKeyword("unpack_logfmt"):
|
|
|
|
pu, err := parsePipeUnpackLogfmt(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'unpack_logfmt' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pu, nil
|
2024-06-03 22:59:25 +00:00
|
|
|
case lex.isKeyword("unpack_syslog"):
|
|
|
|
pu, err := parsePipeUnpackSyslog(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'unpack_syslog' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pu, nil
|
2024-05-25 19:36:16 +00:00
|
|
|
case lex.isKeyword("unroll"):
|
|
|
|
pu, err := parsePipeUnroll(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'unroll' pipe: %w", err)
|
|
|
|
}
|
|
|
|
return pu, nil
|
2024-05-20 02:08:30 +00:00
|
|
|
default:
|
2024-05-28 17:29:41 +00:00
|
|
|
lexState := lex.backupState()
|
|
|
|
|
|
|
|
// Try parsing stats pipe without 'stats' keyword
|
|
|
|
ps, err := parsePipeStats(lex, false)
|
|
|
|
if err == nil {
|
|
|
|
return ps, nil
|
|
|
|
}
|
|
|
|
lex.restoreState(lexState)
|
|
|
|
|
|
|
|
// Try parsing filter pipe without 'filter' keyword
|
|
|
|
pf, err := parsePipeFilter(lex, false)
|
|
|
|
if err == nil {
|
|
|
|
return pf, nil
|
|
|
|
}
|
|
|
|
lex.restoreState(lexState)
|
|
|
|
|
2024-05-20 02:08:30 +00:00
|
|
|
return nil, fmt.Errorf("unexpected pipe %q", lex.token)
|
|
|
|
}
|
|
|
|
}
|
2024-06-11 15:50:32 +00:00
|
|
|
|
|
|
|
var pipeNames = func() map[string]struct{} {
|
|
|
|
a := []string{
|
|
|
|
"copy", "cp",
|
|
|
|
"delete", "del", "rm", "drop",
|
|
|
|
"drop_empty_fields",
|
|
|
|
"extract",
|
|
|
|
"extract_regexp",
|
|
|
|
"field_names",
|
|
|
|
"field_values",
|
|
|
|
"fields", "keep",
|
|
|
|
"filter", "where",
|
|
|
|
"format",
|
|
|
|
"limit", "head",
|
|
|
|
"math", "eval",
|
|
|
|
"offset", "skip",
|
|
|
|
"pack_json",
|
|
|
|
"pack_logmft",
|
|
|
|
"rename", "mv",
|
|
|
|
"replace",
|
|
|
|
"replace_regexp",
|
|
|
|
"sort",
|
|
|
|
"stats",
|
2024-06-17 10:13:18 +00:00
|
|
|
"top",
|
2024-06-11 15:50:32 +00:00
|
|
|
"uniq",
|
|
|
|
"unpack_json",
|
|
|
|
"unpack_logfmt",
|
|
|
|
"unpack_syslog",
|
|
|
|
"unroll",
|
|
|
|
}
|
|
|
|
|
|
|
|
m := make(map[string]struct{}, len(a))
|
|
|
|
for _, s := range a {
|
|
|
|
m[s] = struct{}{}
|
|
|
|
}
|
|
|
|
|
|
|
|
// add stats names here, since they can be used without the initial `stats` keyword
|
|
|
|
for _, s := range statsNames {
|
|
|
|
m[s] = struct{}{}
|
|
|
|
}
|
|
|
|
return m
|
|
|
|
}()
|