diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 4e645be87..1ff5f611d 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -23,6 +23,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta * FEATURE: add [`unroll` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unroll-pipe), which can be used for unrolling JSON arrays stored in [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). * FEATURE: add [`replace_regexp` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#replace_regexp-pipe), which allows updating [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with regular expressions. * FEATURE: improve performance for [`format`](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe) and [`extract`](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe) pipes. +* FEATURE: improve performance for [`/select/logsql/field_names` HTTP API](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-names). * BUGFIX: do not return referenced fields if they weren't present in the original logs. For example, `_time:5m | format if (non_existing_field:"") "abc"` could return empty `non_exiting_field`, while it shuldn't be returned because it is missing in the original logs. * BUGFIX: properly initialize values for [`in(...)` filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter) inside [`filter` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe) if the `in(...)` contains other [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters). For example, `_time:5m | filter ip:in(user_type:admin | fields ip)` now works correctly. diff --git a/lib/logstorage/block_result.go b/lib/logstorage/block_result.go index aa2ac5168..0e8c7bf6a 100644 --- a/lib/logstorage/block_result.go +++ b/lib/logstorage/block_result.go @@ -39,6 +39,9 @@ type blockResult struct { // csInitialized is set to true if cs is properly initialized and can be returned from getColumns(). csInitialized bool + + fvecs []filteredValuesEncodedCreator + svecs []searchValuesEncodedCreator } func (br *blockResult) reset() { @@ -59,6 +62,12 @@ func (br *blockResult) reset() { br.cs = br.cs[:0] br.csInitialized = false + + clear(br.fvecs) + br.fvecs = br.fvecs[:0] + + clear(br.svecs) + br.svecs = br.svecs[:0] } // clone returns a clone of br, which owns its own data. @@ -96,6 +105,8 @@ func (br *blockResult) clone() *blockResult { // do not clone br.csEmpty - it will be populated by the caller via getColumnByName(). + // do not clone br.fvecs and br.svecs, since they may point to external data. + return brNew } @@ -136,6 +147,9 @@ func (br *blockResult) initFromFilterNeededColumns(brSrc *blockResult, bm *bitma } } +// appendFilteredColumn adds cSrc with the given bm filter to br. +// +// the br is valid until brSrc, cSrc or bm is updated. func (br *blockResult) appendFilteredColumn(brSrc *blockResult, cSrc *blockResultColumn, bm *bitmap) { if len(br.timestamps) == 0 { return @@ -154,24 +168,37 @@ func (br *blockResult) appendFilteredColumn(brSrc *blockResult, cSrc *blockResul cDst.minValue = cSrc.minValue cDst.maxValue = cSrc.maxValue cDst.dictValues = cSrc.dictValues - cDst.newValuesEncodedFunc = func(br *blockResult) []string { - valuesEncodedSrc := cSrc.getValuesEncoded(brSrc) - - valuesBuf := br.valuesBuf - valuesBufLen := len(valuesBuf) - bm.forEachSetBitReadonly(func(idx int) { - valuesBuf = append(valuesBuf, valuesEncodedSrc[idx]) - }) - br.valuesBuf = valuesBuf - - return valuesBuf[valuesBufLen:] - } + br.fvecs = append(br.fvecs, filteredValuesEncodedCreator{ + br: brSrc, + c: cSrc, + bm: bm, + }) + cDst.valuesEncodedCreator = &br.fvecs[len(br.fvecs)-1] } br.csBuf = append(br.csBuf, cDst) br.csInitialized = false } +type filteredValuesEncodedCreator struct { + br *blockResult + c *blockResultColumn + bm *bitmap +} + +func (fvec *filteredValuesEncodedCreator) newValuesEncoded(br *blockResult) []string { + valuesEncodedSrc := fvec.c.getValuesEncoded(fvec.br) + + valuesBuf := br.valuesBuf + valuesBufLen := len(valuesBuf) + fvec.bm.forEachSetBitReadonly(func(idx int) { + valuesBuf = append(valuesBuf, valuesEncodedSrc[idx]) + }) + br.valuesBuf = valuesBuf + + return valuesBuf[valuesBufLen:] +} + // cloneValues clones the given values into br and returns the cloned values. func (br *blockResult) cloneValues(values []string) []string { if values == nil { @@ -445,13 +472,28 @@ func (br *blockResult) addColumn(bs *blockSearch, bm *bitmap, ch *columnHeader) minValue: ch.minValue, maxValue: ch.maxValue, dictValues: ch.valuesDict.values, - newValuesEncodedFunc: func(br *blockResult) []string { - return br.newValuesEncodedFromColumnHeader(bs, bm, ch) - }, }) + c := &br.csBuf[len(br.csBuf)-1] + + br.svecs = append(br.svecs, searchValuesEncodedCreator{ + bs: bs, + bm: bm, + ch: ch, + }) + c.valuesEncodedCreator = &br.svecs[len(br.svecs)-1] br.csInitialized = false } +type searchValuesEncodedCreator struct { + bs *blockSearch + bm *bitmap + ch *columnHeader +} + +func (svec *searchValuesEncodedCreator) newValuesEncoded(br *blockResult) []string { + return br.newValuesEncodedFromColumnHeader(svec.bs, svec.bm, svec.ch) +} + func (br *blockResult) addTimeColumn() { br.csBuf = append(br.csBuf, blockResultColumn{ name: "_time", @@ -1481,10 +1523,10 @@ type blockResultColumn struct { // valuesBucketed contains values after getValuesBucketed() call valuesBucketed []string - // newValuesEncodedFunc must return valuesEncoded. + // valuesEncodedCreator must return valuesEncoded. // - // This func must be set for non-const and non-time columns if valuesEncoded field isn't set. - newValuesEncodedFunc func(br *blockResult) []string + // This interface must be set for non-const and non-time columns if valuesEncoded field isn't set. + valuesEncodedCreator columnValuesEncodedCreator // bucketSizeStr contains bucketSizeStr for valuesBucketed bucketSizeStr string @@ -1493,6 +1535,11 @@ type blockResultColumn struct { bucketOffsetStr string } +// columnValuesEncodedCreator must return encoded values for the current column. +type columnValuesEncodedCreator interface { + newValuesEncoded(br *blockResult) []string +} + // clone returns a clone of c backed by data from br. // // It is expected that c.valuesEncoded is already initialized for non-time column. @@ -1521,8 +1568,8 @@ func (c *blockResultColumn) clone(br *blockResult) blockResultColumn { } cNew.valuesBucketed = br.cloneValues(c.valuesBucketed) - // Do not copy c.newValuesEncodedFunc, since it may refer to data, which may change over time. - // We already copied c.valuesEncoded, so cNew.newValuesEncodedFunc must be nil. + // Do not copy c.valuesEncodedCreator, since it may refer to data, which may change over time. + // We already copied c.valuesEncoded, so cNew.valuesEncodedCreator must be nil. cNew.bucketSizeStr = c.bucketSizeStr cNew.bucketOffsetStr = c.bucketOffsetStr @@ -1616,7 +1663,7 @@ func (c *blockResultColumn) getValuesEncoded(br *blockResult) []string { } if c.valuesEncoded == nil { - c.valuesEncoded = c.newValuesEncodedFunc(br) + c.valuesEncoded = c.valuesEncodedCreator.newValuesEncoded(br) } return c.valuesEncoded }