2024-05-22 19:01:20 +00:00
|
|
|
package logstorage
|
|
|
|
|
|
|
|
import (
|
2025-01-13 05:47:33 +00:00
|
|
|
"encoding/base64"
|
2024-05-22 19:01:20 +00:00
|
|
|
"fmt"
|
2024-06-05 01:18:12 +00:00
|
|
|
"math"
|
2025-01-13 05:47:33 +00:00
|
|
|
"strings"
|
2024-12-04 13:35:31 +00:00
|
|
|
"unicode"
|
|
|
|
"unicode/utf8"
|
2024-05-22 19:01:20 +00:00
|
|
|
"unsafe"
|
2024-05-28 17:29:41 +00:00
|
|
|
|
2024-07-04 23:17:03 +00:00
|
|
|
"github.com/valyala/quicktemplate"
|
|
|
|
|
2024-05-28 17:29:41 +00:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
2024-05-22 19:01:20 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// pipeFormat processes '| format ...' pipe.
|
|
|
|
//
|
|
|
|
// See https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe
|
|
|
|
type pipeFormat struct {
|
|
|
|
formatStr string
|
|
|
|
steps []patternStep
|
|
|
|
|
|
|
|
resultField string
|
|
|
|
|
2024-05-24 22:30:58 +00:00
|
|
|
keepOriginalFields bool
|
|
|
|
skipEmptyResults bool
|
|
|
|
|
2024-05-22 19:01:20 +00:00
|
|
|
// iff is an optional filter for skipping the format func
|
|
|
|
iff *ifFilter
|
|
|
|
}
|
|
|
|
|
|
|
|
func (pf *pipeFormat) String() string {
|
|
|
|
s := "format"
|
|
|
|
if pf.iff != nil {
|
|
|
|
s += " " + pf.iff.String()
|
|
|
|
}
|
|
|
|
s += " " + quoteTokenIfNeeded(pf.formatStr)
|
|
|
|
if !isMsgFieldName(pf.resultField) {
|
|
|
|
s += " as " + quoteTokenIfNeeded(pf.resultField)
|
|
|
|
}
|
2024-05-24 22:30:58 +00:00
|
|
|
if pf.keepOriginalFields {
|
|
|
|
s += " keep_original_fields"
|
|
|
|
}
|
|
|
|
if pf.skipEmptyResults {
|
|
|
|
s += " skip_empty_results"
|
|
|
|
}
|
2024-05-22 19:01:20 +00:00
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2024-06-27 12:18:42 +00:00
|
|
|
func (pf *pipeFormat) canLiveTail() bool {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2024-05-22 19:01:20 +00:00
|
|
|
func (pf *pipeFormat) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
2024-05-30 14:19:23 +00:00
|
|
|
if neededFields.isEmpty() {
|
|
|
|
if pf.iff != nil {
|
|
|
|
neededFields.addFields(pf.iff.neededFields)
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-05-22 19:01:20 +00:00
|
|
|
if neededFields.contains("*") {
|
|
|
|
if !unneededFields.contains(pf.resultField) {
|
2024-05-24 22:30:58 +00:00
|
|
|
if !pf.keepOriginalFields && !pf.skipEmptyResults {
|
|
|
|
unneededFields.add(pf.resultField)
|
|
|
|
}
|
2024-05-22 19:01:20 +00:00
|
|
|
if pf.iff != nil {
|
|
|
|
unneededFields.removeFields(pf.iff.neededFields)
|
|
|
|
}
|
|
|
|
for _, step := range pf.steps {
|
|
|
|
if step.field != "" {
|
|
|
|
unneededFields.remove(step.field)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if neededFields.contains(pf.resultField) {
|
2024-05-24 22:30:58 +00:00
|
|
|
if !pf.keepOriginalFields && !pf.skipEmptyResults {
|
|
|
|
neededFields.remove(pf.resultField)
|
|
|
|
}
|
2024-05-22 19:01:20 +00:00
|
|
|
if pf.iff != nil {
|
|
|
|
neededFields.addFields(pf.iff.neededFields)
|
|
|
|
}
|
|
|
|
for _, step := range pf.steps {
|
|
|
|
if step.field != "" {
|
|
|
|
neededFields.add(step.field)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-25 19:36:16 +00:00
|
|
|
func (pf *pipeFormat) hasFilterInWithQuery() bool {
|
|
|
|
return pf.iff.hasFilterInWithQuery()
|
|
|
|
}
|
|
|
|
|
2024-12-22 12:09:42 +00:00
|
|
|
func (pf *pipeFormat) initFilterInValues(cache *inValuesCache, getFieldValuesFunc getFieldValuesFunc) (pipe, error) {
|
2024-05-25 19:36:16 +00:00
|
|
|
iffNew, err := pf.iff.initFilterInValues(cache, getFieldValuesFunc)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
pfNew := *pf
|
|
|
|
pfNew.iff = iffNew
|
|
|
|
return &pfNew, nil
|
|
|
|
}
|
|
|
|
|
2025-01-24 17:49:20 +00:00
|
|
|
func (pf *pipeFormat) visitSubqueries(visitFunc func(q *Query)) {
|
|
|
|
pf.iff.visitSubqueries(visitFunc)
|
|
|
|
}
|
|
|
|
|
2024-05-25 19:36:16 +00:00
|
|
|
func (pf *pipeFormat) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
|
2024-05-22 19:01:20 +00:00
|
|
|
return &pipeFormatProcessor{
|
|
|
|
pf: pf,
|
2024-05-25 19:36:16 +00:00
|
|
|
ppNext: ppNext,
|
2024-05-22 19:01:20 +00:00
|
|
|
|
|
|
|
shards: make([]pipeFormatProcessorShard, workersCount),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type pipeFormatProcessor struct {
|
|
|
|
pf *pipeFormat
|
2024-05-25 19:36:16 +00:00
|
|
|
ppNext pipeProcessor
|
2024-05-22 19:01:20 +00:00
|
|
|
|
|
|
|
shards []pipeFormatProcessorShard
|
|
|
|
}
|
|
|
|
|
|
|
|
type pipeFormatProcessorShard struct {
|
|
|
|
pipeFormatProcessorShardNopad
|
|
|
|
|
|
|
|
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
|
|
|
|
_ [128 - unsafe.Sizeof(pipeFormatProcessorShardNopad{})%128]byte
|
|
|
|
}
|
|
|
|
|
|
|
|
type pipeFormatProcessorShardNopad struct {
|
|
|
|
bm bitmap
|
|
|
|
|
2024-05-25 19:36:16 +00:00
|
|
|
a arena
|
|
|
|
rc resultColumn
|
2024-05-22 19:01:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (pfp *pipeFormatProcessor) writeBlock(workerID uint, br *blockResult) {
|
2024-09-25 14:16:53 +00:00
|
|
|
if br.rowsLen == 0 {
|
2024-05-22 19:01:20 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
shard := &pfp.shards[workerID]
|
2024-05-25 19:36:16 +00:00
|
|
|
pf := pfp.pf
|
2024-05-22 19:01:20 +00:00
|
|
|
|
|
|
|
bm := &shard.bm
|
2024-05-25 19:36:16 +00:00
|
|
|
if iff := pf.iff; iff != nil {
|
2024-12-22 01:09:36 +00:00
|
|
|
bm.init(br.rowsLen)
|
|
|
|
bm.setBits()
|
2024-05-22 19:01:20 +00:00
|
|
|
iff.f.applyToBlockResult(br, bm)
|
|
|
|
if bm.isZero() {
|
2024-05-25 19:36:16 +00:00
|
|
|
pfp.ppNext.writeBlock(workerID, br)
|
2024-05-22 19:01:20 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-25 19:36:16 +00:00
|
|
|
shard.rc.name = pf.resultField
|
|
|
|
|
|
|
|
resultColumn := br.getColumnByName(pf.resultField)
|
2024-09-25 14:16:53 +00:00
|
|
|
for rowIdx := 0; rowIdx < br.rowsLen; rowIdx++ {
|
2024-05-25 19:36:16 +00:00
|
|
|
v := ""
|
2024-12-22 01:09:36 +00:00
|
|
|
if pf.iff == nil || bm.isSetBit(rowIdx) {
|
2024-05-25 19:36:16 +00:00
|
|
|
v = shard.formatRow(pf, br, rowIdx)
|
|
|
|
if v == "" && pf.skipEmptyResults || pf.keepOriginalFields {
|
|
|
|
if vOrig := resultColumn.getValueAtRow(br, rowIdx); vOrig != "" {
|
|
|
|
v = vOrig
|
|
|
|
}
|
|
|
|
}
|
2024-05-22 19:01:20 +00:00
|
|
|
} else {
|
2024-05-25 19:36:16 +00:00
|
|
|
v = resultColumn.getValueAtRow(br, rowIdx)
|
2024-05-22 19:01:20 +00:00
|
|
|
}
|
2024-05-25 19:36:16 +00:00
|
|
|
shard.rc.addValue(v)
|
2024-05-22 19:01:20 +00:00
|
|
|
}
|
|
|
|
|
2024-05-25 19:36:16 +00:00
|
|
|
br.addResultColumn(&shard.rc)
|
|
|
|
pfp.ppNext.writeBlock(workerID, br)
|
|
|
|
|
|
|
|
shard.a.reset()
|
|
|
|
shard.rc.reset()
|
2024-05-22 19:01:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (pfp *pipeFormatProcessor) flush() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-05-25 19:36:16 +00:00
|
|
|
func (shard *pipeFormatProcessorShard) formatRow(pf *pipeFormat, br *blockResult, rowIdx int) string {
|
2024-05-28 17:29:41 +00:00
|
|
|
b := shard.a.b
|
|
|
|
bLen := len(b)
|
2024-05-22 19:01:20 +00:00
|
|
|
for _, step := range pf.steps {
|
|
|
|
b = append(b, step.prefix...)
|
|
|
|
if step.field != "" {
|
|
|
|
c := br.getColumnByName(step.field)
|
|
|
|
v := c.getValueAtRow(br, rowIdx)
|
2024-06-05 01:18:12 +00:00
|
|
|
switch step.fieldOpt {
|
2025-01-13 05:47:33 +00:00
|
|
|
case "base64decode":
|
|
|
|
result, ok := appendBase64Decode(b, v)
|
|
|
|
if !ok {
|
|
|
|
b = append(b, v...)
|
|
|
|
} else {
|
|
|
|
b = result
|
|
|
|
}
|
|
|
|
case "base64encode":
|
|
|
|
b = appendBase64Encode(b, v)
|
2024-06-05 01:18:12 +00:00
|
|
|
case "duration":
|
|
|
|
nsecs, ok := tryParseInt64(v)
|
|
|
|
if !ok {
|
|
|
|
b = append(b, v...)
|
2025-01-13 05:47:33 +00:00
|
|
|
} else {
|
|
|
|
b = marshalDurationString(b, nsecs)
|
|
|
|
}
|
|
|
|
case "hexdecode":
|
|
|
|
b = appendHexDecode(b, v)
|
|
|
|
case "hexencode":
|
|
|
|
b = appendHexEncode(b, v)
|
|
|
|
case "hexnumdecode":
|
|
|
|
b = appendHexUint64Decode(b, v)
|
|
|
|
case "hexnumencode":
|
|
|
|
n, ok := tryParseUint64(v)
|
|
|
|
if !ok {
|
|
|
|
b = append(b, v...)
|
|
|
|
} else {
|
|
|
|
b = appendHexUint64Encode(b, n)
|
2024-06-05 01:18:12 +00:00
|
|
|
}
|
|
|
|
case "ipv4":
|
|
|
|
ipNum, ok := tryParseUint64(v)
|
|
|
|
if !ok || ipNum > math.MaxUint32 {
|
|
|
|
b = append(b, v...)
|
2025-01-13 05:47:33 +00:00
|
|
|
} else {
|
|
|
|
b = marshalIPv4String(b, uint32(ipNum))
|
2024-06-05 01:18:12 +00:00
|
|
|
}
|
2024-12-04 13:35:31 +00:00
|
|
|
case "lc":
|
|
|
|
b = appendLowercase(b, v)
|
|
|
|
case "time":
|
|
|
|
nsecs, ok := tryParseInt64(v)
|
|
|
|
if !ok {
|
|
|
|
b = append(b, v...)
|
2025-01-13 05:47:33 +00:00
|
|
|
} else {
|
|
|
|
b = marshalTimestampRFC3339NanoString(b, nsecs)
|
2024-12-04 13:35:31 +00:00
|
|
|
}
|
|
|
|
case "q":
|
|
|
|
b = quicktemplate.AppendJSONString(b, v, true)
|
|
|
|
case "uc":
|
|
|
|
b = appendUppercase(b, v)
|
2025-01-13 05:47:33 +00:00
|
|
|
case "urldecode":
|
|
|
|
b = appendURLDecode(b, v)
|
|
|
|
case "urlencode":
|
|
|
|
b = appendURLEncode(b, v)
|
2024-06-05 01:18:12 +00:00
|
|
|
default:
|
2024-05-22 19:01:20 +00:00
|
|
|
b = append(b, v...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-05-28 17:29:41 +00:00
|
|
|
shard.a.b = b
|
2024-05-22 19:01:20 +00:00
|
|
|
|
2024-05-28 17:29:41 +00:00
|
|
|
return bytesutil.ToUnsafeString(b[bLen:])
|
2024-05-22 19:01:20 +00:00
|
|
|
}
|
|
|
|
|
2024-12-06 00:23:11 +00:00
|
|
|
func parsePipeFormat(lex *lexer) (pipe, error) {
|
2024-05-22 19:01:20 +00:00
|
|
|
if !lex.isKeyword("format") {
|
|
|
|
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "format")
|
|
|
|
}
|
|
|
|
lex.nextToken()
|
|
|
|
|
|
|
|
// parse optional if (...)
|
|
|
|
var iff *ifFilter
|
|
|
|
if lex.isKeyword("if") {
|
|
|
|
f, err := parseIfFilter(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
iff = f
|
|
|
|
}
|
|
|
|
|
|
|
|
// parse format
|
|
|
|
formatStr, err := getCompoundToken(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot read 'format': %w", err)
|
|
|
|
}
|
|
|
|
steps, err := parsePatternSteps(formatStr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", formatStr, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// parse optional 'as ...` part
|
|
|
|
resultField := "_msg"
|
|
|
|
if lex.isKeyword("as") {
|
|
|
|
lex.nextToken()
|
|
|
|
field, err := parseFieldName(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse result field after 'format %q as': %w", formatStr, err)
|
|
|
|
}
|
|
|
|
resultField = field
|
|
|
|
}
|
|
|
|
|
2024-05-24 22:30:58 +00:00
|
|
|
keepOriginalFields := false
|
|
|
|
skipEmptyResults := false
|
|
|
|
switch {
|
|
|
|
case lex.isKeyword("keep_original_fields"):
|
|
|
|
lex.nextToken()
|
|
|
|
keepOriginalFields = true
|
|
|
|
case lex.isKeyword("skip_empty_results"):
|
|
|
|
lex.nextToken()
|
|
|
|
skipEmptyResults = true
|
|
|
|
}
|
|
|
|
|
2024-05-22 19:01:20 +00:00
|
|
|
pf := &pipeFormat{
|
2024-05-24 22:30:58 +00:00
|
|
|
formatStr: formatStr,
|
|
|
|
steps: steps,
|
|
|
|
resultField: resultField,
|
|
|
|
keepOriginalFields: keepOriginalFields,
|
|
|
|
skipEmptyResults: skipEmptyResults,
|
|
|
|
iff: iff,
|
2024-05-22 19:01:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return pf, nil
|
|
|
|
}
|
2024-12-04 13:35:31 +00:00
|
|
|
|
|
|
|
func appendUppercase(dst []byte, s string) []byte {
|
|
|
|
for _, r := range s {
|
|
|
|
r = unicode.ToUpper(r)
|
|
|
|
dst = utf8.AppendRune(dst, r)
|
|
|
|
}
|
|
|
|
return dst
|
|
|
|
}
|
|
|
|
|
|
|
|
func appendLowercase(dst []byte, s string) []byte {
|
|
|
|
for _, r := range s {
|
|
|
|
r = unicode.ToLower(r)
|
|
|
|
dst = utf8.AppendRune(dst, r)
|
|
|
|
}
|
|
|
|
return dst
|
|
|
|
}
|
2025-01-13 05:47:33 +00:00
|
|
|
|
|
|
|
func appendURLDecode(dst []byte, s string) []byte {
|
|
|
|
for len(s) > 0 {
|
|
|
|
n := strings.IndexAny(s, "%+")
|
|
|
|
if n < 0 {
|
|
|
|
return append(dst, s...)
|
|
|
|
}
|
|
|
|
dst = append(dst, s[:n]...)
|
|
|
|
ch := s[n]
|
|
|
|
s = s[n+1:]
|
|
|
|
if ch == '+' {
|
|
|
|
dst = append(dst, ' ')
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if len(s) < 2 {
|
|
|
|
dst = append(dst, '%')
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
hi, ok1 := unhexChar(s[0])
|
|
|
|
lo, ok2 := unhexChar(s[1])
|
|
|
|
if !ok1 || !ok2 {
|
|
|
|
dst = append(dst, '%')
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
ch = (hi << 4) | lo
|
|
|
|
dst = append(dst, ch)
|
|
|
|
s = s[2:]
|
|
|
|
}
|
|
|
|
return dst
|
|
|
|
}
|
|
|
|
|
|
|
|
func appendURLEncode(dst []byte, s string) []byte {
|
|
|
|
n := len(s)
|
|
|
|
for i := 0; i < n; i++ {
|
|
|
|
c := s[i]
|
|
|
|
|
|
|
|
// See http://www.w3.org/TR/html5/forms.html#form-submission-algorithm
|
|
|
|
if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' ||
|
|
|
|
c == '-' || c == '.' || c == '_' {
|
|
|
|
dst = append(dst, c)
|
|
|
|
} else {
|
|
|
|
if c == ' ' {
|
|
|
|
dst = append(dst, '+')
|
|
|
|
} else {
|
|
|
|
dst = append(dst, '%', hexCharUpper(c>>4), hexCharUpper(c&15))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return dst
|
|
|
|
}
|
|
|
|
|
|
|
|
func hexCharUpper(c byte) byte {
|
|
|
|
if c < 10 {
|
|
|
|
return '0' + c
|
|
|
|
}
|
|
|
|
return c - 10 + 'A'
|
|
|
|
}
|
|
|
|
|
|
|
|
func unhexChar(c byte) (byte, bool) {
|
|
|
|
if c >= '0' && c <= '9' {
|
|
|
|
return c - '0', true
|
|
|
|
}
|
|
|
|
if c >= 'A' && c <= 'F' {
|
|
|
|
return c - 'A' + 10, true
|
|
|
|
}
|
|
|
|
if c >= 'a' && c <= 'f' {
|
|
|
|
return c - 'a' + 10, true
|
|
|
|
}
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
|
|
|
|
func appendHexUint64Encode(dst []byte, n uint64) []byte {
|
|
|
|
for shift := 60; shift >= 0; shift -= 4 {
|
|
|
|
dst = append(dst, hexCharUpper(byte(n>>shift)&15))
|
|
|
|
}
|
|
|
|
return dst
|
|
|
|
}
|
|
|
|
|
|
|
|
func appendHexUint64Decode(dst []byte, s string) []byte {
|
|
|
|
if len(s) > 16 {
|
|
|
|
return append(dst, s...)
|
|
|
|
}
|
|
|
|
sOrig := s
|
|
|
|
n := uint64(0)
|
|
|
|
for len(s) > 0 {
|
|
|
|
x, ok := unhexChar(s[0])
|
|
|
|
if !ok {
|
|
|
|
return append(dst, sOrig...)
|
|
|
|
}
|
|
|
|
n = (n << 4) | uint64(x)
|
|
|
|
s = s[1:]
|
|
|
|
}
|
|
|
|
return marshalUint64String(dst, n)
|
|
|
|
}
|
|
|
|
|
|
|
|
func appendHexEncode(dst []byte, s string) []byte {
|
|
|
|
for i := 0; i < len(s); i++ {
|
|
|
|
c := s[i]
|
|
|
|
hi := hexCharUpper(c >> 4)
|
|
|
|
lo := hexCharUpper(c & 15)
|
|
|
|
dst = append(dst, hi, lo)
|
|
|
|
}
|
|
|
|
return dst
|
|
|
|
}
|
|
|
|
|
|
|
|
func appendHexDecode(dst []byte, s string) []byte {
|
|
|
|
for len(s) >= 2 {
|
|
|
|
hi, ok1 := unhexChar(s[0])
|
|
|
|
lo, ok2 := unhexChar(s[1])
|
|
|
|
if !ok1 || !ok2 {
|
|
|
|
dst = append(dst, s[0], s[1])
|
|
|
|
} else {
|
|
|
|
ch := (hi << 4) | lo
|
|
|
|
dst = append(dst, ch)
|
|
|
|
}
|
|
|
|
s = s[2:]
|
|
|
|
}
|
|
|
|
return append(dst, s...)
|
|
|
|
}
|
|
|
|
|
|
|
|
func appendBase64Encode(dst []byte, s string) []byte {
|
|
|
|
return base64.StdEncoding.AppendEncode(dst, bytesutil.ToUnsafeBytes(s))
|
|
|
|
}
|
|
|
|
|
|
|
|
func appendBase64Decode(dst []byte, s string) ([]byte, bool) {
|
|
|
|
result, err := base64.StdEncoding.AppendDecode(dst, bytesutil.ToUnsafeBytes(s))
|
|
|
|
if err != nil {
|
|
|
|
return dst, false
|
|
|
|
}
|
|
|
|
return result, true
|
|
|
|
}
|