From 65b93b17b1a87c970184f026e34ee735613e10b5 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 25 Sep 2024 16:59:58 +0200 Subject: [PATCH] lib/logstorage: lazily read column headers metadata during queries This improves performance for analytical queries, which do not need column headers metadata. For example, the following query doesn't need column headers metadata, since _stream and min(_time) are stored in block header, which is read separately from colum headers metadata: _time:1w | stats by (_stream) min(_time) min_time This commit significantly improves the performance for this query. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7070 --- lib/logstorage/block_result.go | 15 ++++++++------ lib/logstorage/block_search.go | 25 ++++++++++++++++++------ lib/logstorage/filter_and.go | 5 +++-- lib/logstorage/filter_any_case_phrase.go | 5 +++-- lib/logstorage/filter_any_case_prefix.go | 5 +++-- lib/logstorage/filter_exact.go | 5 +++-- lib/logstorage/filter_exact_prefix.go | 5 +++-- lib/logstorage/filter_in.go | 5 +++-- lib/logstorage/filter_ipv4_range.go | 5 +++-- lib/logstorage/filter_len_range.go | 5 +++-- lib/logstorage/filter_or.go | 5 +++-- lib/logstorage/filter_phrase.go | 5 +++-- lib/logstorage/filter_prefix.go | 5 +++-- lib/logstorage/filter_range.go | 5 +++-- lib/logstorage/filter_regexp.go | 5 +++-- lib/logstorage/filter_sequence.go | 5 +++-- lib/logstorage/filter_string_range.go | 5 +++-- 17 files changed, 73 insertions(+), 42 deletions(-) diff --git a/lib/logstorage/block_result.go b/lib/logstorage/block_result.go index a4e14f6950..1d55f94118 100644 --- a/lib/logstorage/block_result.go +++ b/lib/logstorage/block_result.go @@ -305,10 +305,11 @@ func (br *blockResult) initAllColumns() { if !slices.Contains(unneededColumnNames, "_msg") { // Add _msg column - v := br.bs.csh.getConstColumnValue("_msg") + csh := br.bs.getColumnsHeader() + v := csh.getConstColumnValue("_msg") if v != "" { br.addConstColumn("_msg", v) - } else if ch := br.bs.csh.getColumnHeader("_msg"); ch != nil { + } else if ch := csh.getColumnHeader("_msg"); ch != nil { br.addColumn(ch) } else { br.addConstColumn("_msg", "") @@ -316,7 +317,8 @@ func (br *blockResult) initAllColumns() { } // Add other const columns - for _, cc := range br.bs.csh.constColumns { + csh := br.bs.getColumnsHeader() + for _, cc := range csh.constColumns { if isMsgFieldName(cc.Name) { continue } @@ -326,7 +328,7 @@ func (br *blockResult) initAllColumns() { } // Add other non-const columns - chs := br.bs.csh.columnHeaders + chs := csh.columnHeaders for i := range chs { ch := &chs[i] if isMsgFieldName(ch.name) { @@ -355,10 +357,11 @@ func (br *blockResult) initRequestedColumns() { case "_time": br.addTimeColumn() default: - v := br.bs.csh.getConstColumnValue(columnName) + csh := br.bs.getColumnsHeader() + v := csh.getConstColumnValue(columnName) if v != "" { br.addConstColumn(columnName, v) - } else if ch := br.bs.csh.getColumnHeader(columnName); ch != nil { + } else if ch := csh.getColumnHeader(columnName); ch != nil { br.addColumn(ch) } else { br.addConstColumn(columnName, "") diff --git a/lib/logstorage/block_search.go b/lib/logstorage/block_search.go index d260ba2642..80b708a4ce 100644 --- a/lib/logstorage/block_search.go +++ b/lib/logstorage/block_search.go @@ -113,10 +113,15 @@ type blockSearch struct { // sbu is used for unmarshaling local columns sbu stringsBlockUnmarshaler - // csh is the columnsHeader associated with the given block - csh columnsHeader + // cshCached is the columnsHeader associated with the given block + // + // it is initialized lazily by calling getColumnsHeader(). + cshCached columnsHeader - // a is used for storing unmarshaled data in csh + // cshInitialized is set to true if cshCached is initialized. + cshInitialized bool + + // a is used for storing unmarshaled data in cshCached a arena // seenStreams contains seen streamIDs for the recent searches. @@ -146,7 +151,10 @@ func (bs *blockSearch) reset() { } bs.sbu.reset() - bs.csh.reset() + + bs.cshCached.reset() + bs.cshInitialized = false + bs.a.reset() // Do not reset seenStreams, since its' lifetime is managed by blockResult.addStreamColumn() code. @@ -161,8 +169,6 @@ func (bs *blockSearch) search(bsw *blockSearchWork, bm *bitmap) { bs.bsw = bsw - bs.csh.initFromBlockHeader(&bs.a, bsw.p, &bsw.bh) - // search rows matching the given filter bm.init(int(bsw.bh.rowsCount)) bm.setBits() @@ -183,6 +189,13 @@ func (bs *blockSearch) search(bsw *blockSearchWork, bm *bitmap) { } } +func (bs *blockSearch) getColumnsHeader() *columnsHeader { + if !bs.cshInitialized { + bs.cshCached.initFromBlockHeader(&bs.a, bs.bsw.p, &bs.bsw.bh) + } + return &bs.cshCached +} + func (csh *columnsHeader) initFromBlockHeader(a *arena, p *part, bh *blockHeader) { bb := longTermBufPool.Get() columnsHeaderSize := bh.columnsHeaderSize diff --git a/lib/logstorage/filter_and.go b/lib/logstorage/filter_and.go index cf0fa8db28..3d0ac20919 100644 --- a/lib/logstorage/filter_and.go +++ b/lib/logstorage/filter_and.go @@ -81,7 +81,8 @@ func (fa *filterAnd) matchBloomFilters(bs *blockSearch) bool { fieldName := ft.field tokens := ft.tokens - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if matchStringByAllTokens(v, tokens) { continue @@ -89,7 +90,7 @@ func (fa *filterAnd) matchBloomFilters(bs *blockSearch) bool { return false } - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { return false } diff --git a/lib/logstorage/filter_any_case_phrase.go b/lib/logstorage/filter_any_case_phrase.go index 8c3c924c71..63f2ee338e 100644 --- a/lib/logstorage/filter_any_case_phrase.go +++ b/lib/logstorage/filter_any_case_phrase.go @@ -86,7 +86,8 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { phraseLowercase := fp.getPhraseLowercase() // Verify whether fp matches const column - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if !matchAnyCasePhrase(v, phraseLowercase) { bm.resetBits() @@ -95,7 +96,7 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether fp matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. // It matches anything only for empty phrase. diff --git a/lib/logstorage/filter_any_case_prefix.go b/lib/logstorage/filter_any_case_prefix.go index 10561c18ef..1e569612d4 100644 --- a/lib/logstorage/filter_any_case_prefix.go +++ b/lib/logstorage/filter_any_case_prefix.go @@ -90,7 +90,8 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { prefixLowercase := fp.getPrefixLowercase() // Verify whether fp matches const column - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if !matchAnyCasePrefix(v, prefixLowercase) { bm.resetBits() @@ -99,7 +100,7 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether fp matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. bm.resetBits() diff --git a/lib/logstorage/filter_exact.go b/lib/logstorage/filter_exact.go index 70c119aba3..3d73590476 100644 --- a/lib/logstorage/filter_exact.go +++ b/lib/logstorage/filter_exact.go @@ -174,7 +174,8 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) { fieldName := fe.fieldName value := fe.value - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if value != v { bm.resetBits() @@ -183,7 +184,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. // It matches anything only for empty value. diff --git a/lib/logstorage/filter_exact_prefix.go b/lib/logstorage/filter_exact_prefix.go index 7c241841c9..bf894166f3 100644 --- a/lib/logstorage/filter_exact_prefix.go +++ b/lib/logstorage/filter_exact_prefix.go @@ -51,7 +51,8 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { fieldName := fep.fieldName prefix := fep.prefix - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if !matchExactPrefix(v, prefix) { bm.resetBits() @@ -60,7 +61,7 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. if !matchExactPrefix("", prefix) { diff --git a/lib/logstorage/filter_in.go b/lib/logstorage/filter_in.go index a230d3708b..0c062f28d3 100644 --- a/lib/logstorage/filter_in.go +++ b/lib/logstorage/filter_in.go @@ -358,7 +358,8 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { stringValues := fi.getStringValues() if _, ok := stringValues[v]; !ok { @@ -368,7 +369,7 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. // It matches anything only for empty phrase. diff --git a/lib/logstorage/filter_ipv4_range.go b/lib/logstorage/filter_ipv4_range.go index 87eb034d5d..a382cd5deb 100644 --- a/lib/logstorage/filter_ipv4_range.go +++ b/lib/logstorage/filter_ipv4_range.go @@ -102,7 +102,8 @@ func (fr *filterIPv4Range) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if !matchIPv4Range(v, minValue, maxValue) { bm.resetBits() @@ -111,7 +112,7 @@ func (fr *filterIPv4Range) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. bm.resetBits() diff --git a/lib/logstorage/filter_len_range.go b/lib/logstorage/filter_len_range.go index 85a1e2f36d..b80ab877e5 100644 --- a/lib/logstorage/filter_len_range.go +++ b/lib/logstorage/filter_len_range.go @@ -125,7 +125,8 @@ func (fr *filterLenRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if !matchLenRange(v, minLen, maxLen) { bm.resetBits() @@ -134,7 +135,7 @@ func (fr *filterLenRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. if !matchLenRange("", minLen, maxLen) { diff --git a/lib/logstorage/filter_or.go b/lib/logstorage/filter_or.go index 39e49a0d66..7e67717ef4 100644 --- a/lib/logstorage/filter_or.go +++ b/lib/logstorage/filter_or.go @@ -93,7 +93,8 @@ func (fo *filterOr) matchBloomFilters(bs *blockSearch) bool { fieldName := ft.field tokens := ft.tokens - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if matchStringByAllTokens(v, tokens) { return true @@ -101,7 +102,7 @@ func (fo *filterOr) matchBloomFilters(bs *blockSearch) bool { continue } - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { continue } diff --git a/lib/logstorage/filter_phrase.go b/lib/logstorage/filter_phrase.go index aa73d8414c..676711b527 100644 --- a/lib/logstorage/filter_phrase.go +++ b/lib/logstorage/filter_phrase.go @@ -61,7 +61,8 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { phrase := fp.phrase // Verify whether fp matches const column - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if !matchPhrase(v, phrase) { bm.resetBits() @@ -70,7 +71,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether fp matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. // It matches anything only for empty phrase. diff --git a/lib/logstorage/filter_prefix.go b/lib/logstorage/filter_prefix.go index 4d0f75fde0..a6a086c006 100644 --- a/lib/logstorage/filter_prefix.go +++ b/lib/logstorage/filter_prefix.go @@ -59,7 +59,8 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { prefix := fp.prefix // Verify whether fp matches const column - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if !matchPrefix(v, prefix) { bm.resetBits() @@ -68,7 +69,7 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether fp matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. bm.resetBits() diff --git a/lib/logstorage/filter_range.go b/lib/logstorage/filter_range.go index 8776730bb1..b173c2deb9 100644 --- a/lib/logstorage/filter_range.go +++ b/lib/logstorage/filter_range.go @@ -173,7 +173,8 @@ func (fr *filterRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if !matchRange(v, minValue, maxValue) { bm.resetBits() @@ -182,7 +183,7 @@ func (fr *filterRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. bm.resetBits() diff --git a/lib/logstorage/filter_regexp.go b/lib/logstorage/filter_regexp.go index df8e59733c..de9121ea1b 100644 --- a/lib/logstorage/filter_regexp.go +++ b/lib/logstorage/filter_regexp.go @@ -78,7 +78,8 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) { re := fr.re // Verify whether filter matches const column - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if !re.MatchString(v) { bm.resetBits() @@ -87,7 +88,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. if !re.MatchString("") { diff --git a/lib/logstorage/filter_sequence.go b/lib/logstorage/filter_sequence.go index 1fda32d05d..fd8cdccc53 100644 --- a/lib/logstorage/filter_sequence.go +++ b/lib/logstorage/filter_sequence.go @@ -87,7 +87,8 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if !matchSequence(v, phrases) { bm.resetBits() @@ -96,7 +97,7 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { // Fast path - there are no matching columns. // It matches anything only for empty phrase. diff --git a/lib/logstorage/filter_string_range.go b/lib/logstorage/filter_string_range.go index f7518c7236..071ade0be9 100644 --- a/lib/logstorage/filter_string_range.go +++ b/lib/logstorage/filter_string_range.go @@ -52,7 +52,8 @@ func (fr *filterStringRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - v := bs.csh.getConstColumnValue(fieldName) + csh := bs.getColumnsHeader() + v := csh.getConstColumnValue(fieldName) if v != "" { if !matchStringRange(v, minValue, maxValue) { bm.resetBits() @@ -61,7 +62,7 @@ func (fr *filterStringRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) + ch := csh.getColumnHeader(fieldName) if ch == nil { if !matchStringRange("", minValue, maxValue) { bm.resetBits()