This commit is contained in:
Aliaksandr Valialkin 2024-05-25 20:05:30 +02:00
parent 38646a0491
commit 41547740f6
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
2 changed files with 69 additions and 21 deletions

View file

@ -23,6 +23,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
* FEATURE: add [`unroll` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unroll-pipe), which can be used for unrolling JSON arrays stored in [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). * FEATURE: add [`unroll` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unroll-pipe), which can be used for unrolling JSON arrays stored in [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
* FEATURE: add [`replace_regexp` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#replace_regexp-pipe), which allows updating [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with regular expressions. * FEATURE: add [`replace_regexp` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#replace_regexp-pipe), which allows updating [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with regular expressions.
* FEATURE: improve performance for [`format`](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe) and [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe) pipes. * FEATURE: improve performance for [`format`](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe) and [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe) pipes.
* FEATURE: improve performance for [`/select/logsql/field_names` HTTP API](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-names).
* BUGFIX: do not return referenced fields if they weren't present in the original logs. For example, `_time:5m | format if (non_existing_field:"") "abc"` could return empty `non_exiting_field`, while it shuldn't be returned because it is missing in the original logs. * BUGFIX: do not return referenced fields if they weren't present in the original logs. For example, `_time:5m | format if (non_existing_field:"") "abc"` could return empty `non_exiting_field`, while it shuldn't be returned because it is missing in the original logs.
* BUGFIX: properly initialize values for [`in(...)` filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter) inside [`filter` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe) if the `in(...)` contains other [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters). For example, `_time:5m | filter ip:in(user_type:admin | fields ip)` now works correctly. * BUGFIX: properly initialize values for [`in(...)` filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter) inside [`filter` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe) if the `in(...)` contains other [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters). For example, `_time:5m | filter ip:in(user_type:admin | fields ip)` now works correctly.

View file

@ -39,6 +39,9 @@ type blockResult struct {
// csInitialized is set to true if cs is properly initialized and can be returned from getColumns(). // csInitialized is set to true if cs is properly initialized and can be returned from getColumns().
csInitialized bool csInitialized bool
fvecs []filteredValuesEncodedCreator
svecs []searchValuesEncodedCreator
} }
func (br *blockResult) reset() { func (br *blockResult) reset() {
@ -59,6 +62,12 @@ func (br *blockResult) reset() {
br.cs = br.cs[:0] br.cs = br.cs[:0]
br.csInitialized = false br.csInitialized = false
clear(br.fvecs)
br.fvecs = br.fvecs[:0]
clear(br.svecs)
br.svecs = br.svecs[:0]
} }
// clone returns a clone of br, which owns its own data. // clone returns a clone of br, which owns its own data.
@ -96,6 +105,8 @@ func (br *blockResult) clone() *blockResult {
// do not clone br.csEmpty - it will be populated by the caller via getColumnByName(). // do not clone br.csEmpty - it will be populated by the caller via getColumnByName().
// do not clone br.fvecs and br.svecs, since they may point to external data.
return brNew return brNew
} }
@ -136,6 +147,9 @@ func (br *blockResult) initFromFilterNeededColumns(brSrc *blockResult, bm *bitma
} }
} }
// appendFilteredColumn adds cSrc with the given bm filter to br.
//
// the br is valid until brSrc, cSrc or bm is updated.
func (br *blockResult) appendFilteredColumn(brSrc *blockResult, cSrc *blockResultColumn, bm *bitmap) { func (br *blockResult) appendFilteredColumn(brSrc *blockResult, cSrc *blockResultColumn, bm *bitmap) {
if len(br.timestamps) == 0 { if len(br.timestamps) == 0 {
return return
@ -154,22 +168,35 @@ func (br *blockResult) appendFilteredColumn(brSrc *blockResult, cSrc *blockResul
cDst.minValue = cSrc.minValue cDst.minValue = cSrc.minValue
cDst.maxValue = cSrc.maxValue cDst.maxValue = cSrc.maxValue
cDst.dictValues = cSrc.dictValues cDst.dictValues = cSrc.dictValues
cDst.newValuesEncodedFunc = func(br *blockResult) []string { br.fvecs = append(br.fvecs, filteredValuesEncodedCreator{
valuesEncodedSrc := cSrc.getValuesEncoded(brSrc) br: brSrc,
c: cSrc,
bm: bm,
})
cDst.valuesEncodedCreator = &br.fvecs[len(br.fvecs)-1]
}
br.csBuf = append(br.csBuf, cDst)
br.csInitialized = false
}
type filteredValuesEncodedCreator struct {
br *blockResult
c *blockResultColumn
bm *bitmap
}
func (fvec *filteredValuesEncodedCreator) newValuesEncoded(br *blockResult) []string {
valuesEncodedSrc := fvec.c.getValuesEncoded(fvec.br)
valuesBuf := br.valuesBuf valuesBuf := br.valuesBuf
valuesBufLen := len(valuesBuf) valuesBufLen := len(valuesBuf)
bm.forEachSetBitReadonly(func(idx int) { fvec.bm.forEachSetBitReadonly(func(idx int) {
valuesBuf = append(valuesBuf, valuesEncodedSrc[idx]) valuesBuf = append(valuesBuf, valuesEncodedSrc[idx])
}) })
br.valuesBuf = valuesBuf br.valuesBuf = valuesBuf
return valuesBuf[valuesBufLen:] return valuesBuf[valuesBufLen:]
}
}
br.csBuf = append(br.csBuf, cDst)
br.csInitialized = false
} }
// cloneValues clones the given values into br and returns the cloned values. // cloneValues clones the given values into br and returns the cloned values.
@ -445,13 +472,28 @@ func (br *blockResult) addColumn(bs *blockSearch, bm *bitmap, ch *columnHeader)
minValue: ch.minValue, minValue: ch.minValue,
maxValue: ch.maxValue, maxValue: ch.maxValue,
dictValues: ch.valuesDict.values, dictValues: ch.valuesDict.values,
newValuesEncodedFunc: func(br *blockResult) []string {
return br.newValuesEncodedFromColumnHeader(bs, bm, ch)
},
}) })
c := &br.csBuf[len(br.csBuf)-1]
br.svecs = append(br.svecs, searchValuesEncodedCreator{
bs: bs,
bm: bm,
ch: ch,
})
c.valuesEncodedCreator = &br.svecs[len(br.svecs)-1]
br.csInitialized = false br.csInitialized = false
} }
type searchValuesEncodedCreator struct {
bs *blockSearch
bm *bitmap
ch *columnHeader
}
func (svec *searchValuesEncodedCreator) newValuesEncoded(br *blockResult) []string {
return br.newValuesEncodedFromColumnHeader(svec.bs, svec.bm, svec.ch)
}
func (br *blockResult) addTimeColumn() { func (br *blockResult) addTimeColumn() {
br.csBuf = append(br.csBuf, blockResultColumn{ br.csBuf = append(br.csBuf, blockResultColumn{
name: "_time", name: "_time",
@ -1481,10 +1523,10 @@ type blockResultColumn struct {
// valuesBucketed contains values after getValuesBucketed() call // valuesBucketed contains values after getValuesBucketed() call
valuesBucketed []string valuesBucketed []string
// newValuesEncodedFunc must return valuesEncoded. // valuesEncodedCreator must return valuesEncoded.
// //
// This func must be set for non-const and non-time columns if valuesEncoded field isn't set. // This interface must be set for non-const and non-time columns if valuesEncoded field isn't set.
newValuesEncodedFunc func(br *blockResult) []string valuesEncodedCreator columnValuesEncodedCreator
// bucketSizeStr contains bucketSizeStr for valuesBucketed // bucketSizeStr contains bucketSizeStr for valuesBucketed
bucketSizeStr string bucketSizeStr string
@ -1493,6 +1535,11 @@ type blockResultColumn struct {
bucketOffsetStr string bucketOffsetStr string
} }
// columnValuesEncodedCreator must return encoded values for the current column.
type columnValuesEncodedCreator interface {
newValuesEncoded(br *blockResult) []string
}
// clone returns a clone of c backed by data from br. // clone returns a clone of c backed by data from br.
// //
// It is expected that c.valuesEncoded is already initialized for non-time column. // It is expected that c.valuesEncoded is already initialized for non-time column.
@ -1521,8 +1568,8 @@ func (c *blockResultColumn) clone(br *blockResult) blockResultColumn {
} }
cNew.valuesBucketed = br.cloneValues(c.valuesBucketed) cNew.valuesBucketed = br.cloneValues(c.valuesBucketed)
// Do not copy c.newValuesEncodedFunc, since it may refer to data, which may change over time. // Do not copy c.valuesEncodedCreator, since it may refer to data, which may change over time.
// We already copied c.valuesEncoded, so cNew.newValuesEncodedFunc must be nil. // We already copied c.valuesEncoded, so cNew.valuesEncodedCreator must be nil.
cNew.bucketSizeStr = c.bucketSizeStr cNew.bucketSizeStr = c.bucketSizeStr
cNew.bucketOffsetStr = c.bucketOffsetStr cNew.bucketOffsetStr = c.bucketOffsetStr
@ -1616,7 +1663,7 @@ func (c *blockResultColumn) getValuesEncoded(br *blockResult) []string {
} }
if c.valuesEncoded == nil { if c.valuesEncoded == nil {
c.valuesEncoded = c.newValuesEncodedFunc(br) c.valuesEncoded = c.valuesEncodedCreator.newValuesEncoded(br)
} }
return c.valuesEncoded return c.valuesEncoded
} }