lib/logstorage: optimize performance for queries, which select all the log fields for logs containing hundreds of log fields (aka "wide events")

Unpack the full columnsHeader block instead of unpacking meta-information per each individual column when the query, which selects all the columns, is executed. This improves performance when scanning logs with big number of fields. (cherry picked from commit 2023f017b1)
2024-11-21 14:44:00 +00:00 · 2024-10-18 00:21:20 +02:00 · 2024-10-18 00:21:20 +02:00 · 92b9b13df1
commit 92b9b13df1
parent 5d541322c6
3 changed files with 19 additions and 55 deletions
--- a/docs/VictoriaLogs/CHANGELOG.md
+++ b/docs/VictoriaLogs/CHANGELOG.md
@ -18,6 +18,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
 * FEATURE: add basic [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml) for VictoriaLogs process. See details at [monitoring docs](https://docs.victoriametrics.com/victorialogs/index.html#monitoring).
 * FEATURE: improve [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) performance on systems with many CPU cores when `by(...)` fields contain big number of unique values. For example, `_time:1d | stats by (user_id) count() x` should be executed much faster when `user_id` field contains millions of unique values.
 * FEATURE: improve performance for [`top`](https://docs.victoriametrics.com/victorialogs/logsql/#top-pipe), [`uniq`](https://docs.victoriametrics.com/victorialogs/logsql/#uniq-pipe) and [`field_values`](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe) pipes on systems with many CPU cores when it is applied to [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with big number of unique values. For example, `_time:1d | top 5 (user_id)` should be executed much faster when `user_id` field contains millions of unique values.
+* FEATURE: improve performance for [`field_names` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#field_names-pipe) when it is applied to logs with hundreds of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).

 ## [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs)

--- a/lib/logstorage/block_result.go
+++ b/lib/logstorage/block_result.go
@ -316,8 +316,8 @@ func (br *blockResult) initAllColumns() {
 	}

 	// Add other const columns
-	ccs := br.bs.getConstColumns()
-	for _, cc := range ccs {
+	csh := br.bs.getColumnsHeader()
+	for _, cc := range csh.constColumns {
 		if cc.Name == "" {
 			continue
 		}
@ -327,7 +327,7 @@ func (br *blockResult) initAllColumns() {
 	}

 	// Add other non-const columns
-	chs := br.bs.getColumnHeaders()
+	chs := csh.columnHeaders
 	for i := range chs {
 		ch := &chs[i]
 		if ch.name == "" {
--- a/lib/logstorage/block_search.go
+++ b/lib/logstorage/block_search.go
@ -139,7 +139,7 @@ type blockSearch struct {

 	// cshCache is the columnsHeader associated with the given block.
 	//
-	// It is initialized lazily by calling getColumnsHeaderV0().
+	// It is initialized lazily by calling getColumnsHeader().
 	cshCache *columnsHeader

 	// seenStreams contains seen streamIDs for the recent searches.
@ -240,7 +240,7 @@ func (bs *blockSearch) getConstColumnValue(name string) string {
 	}

 	if bs.partFormatVersion() < 1 {
-		csh := bs.getColumnsHeaderV0()
+		csh := bs.getColumnsHeader()
 		for _, cc := range csh.constColumns {
 			if cc.Name == name {
 				return cc.Value
@ -288,7 +288,7 @@ func (bs *blockSearch) getColumnHeader(name string) *columnHeader {
 	}

 	if bs.partFormatVersion() < 1 {
-		csh := bs.getColumnsHeaderV0()
+		csh := bs.getColumnsHeader()
 		chs := csh.columnHeaders
 		for i := range chs {
 			ch := &chs[i]
@ -337,48 +337,6 @@ func (bs *blockSearch) getColumnNameID(name string) (uint64, bool) {
 	return id, ok
 }

-func (bs *blockSearch) getColumnNameByID(id uint64) (string, bool) {
-	columnNames := bs.bsw.p.columnNames
-	if id >= uint64(len(columnNames)) {
-		return "", false
-	}
-	return columnNames[id], true
-}
-
-func (bs *blockSearch) getConstColumns() []Field {
-	if bs.partFormatVersion() < 1 {
-		csh := bs.getColumnsHeaderV0()
-		return csh.constColumns
-	}
-
-	chsIndex := bs.getColumnsHeaderIndex()
-	for _, cr := range chsIndex.constColumnsRefs {
-		columnName, ok := bs.getColumnNameByID(cr.columnNameID)
-		if !ok {
-			logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID)
-		}
-		_ = bs.getConstColumnValue(columnName)
-	}
-	return bs.ccsCache
-}
-
-func (bs *blockSearch) getColumnHeaders() []columnHeader {
-	if bs.partFormatVersion() < 1 {
-		csh := bs.getColumnsHeaderV0()
-		return csh.columnHeaders
-	}
-
-	chsIndex := bs.getColumnsHeaderIndex()
-	for _, cr := range chsIndex.columnHeadersRefs {
-		columnName, ok := bs.getColumnNameByID(cr.columnNameID)
-		if !ok {
-			logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID)
-		}
-		_ = bs.getColumnHeader(columnName)
-	}
-	return bs.chsCache
-}
-
 func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex {
 	if bs.partFormatVersion() < 1 {
 		logger.Panicf("BUG: getColumnsHeaderIndex() can be called only for part encoding v1+, while it has been called for v%d", bs.partFormatVersion())
@ -395,18 +353,23 @@ func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex {
 	return bs.cshIndexCache
 }

-func (bs *blockSearch) getColumnsHeaderV0() *columnsHeader {
-	if bs.partFormatVersion() >= 1 {
-		logger.Panicf("BUG: getColumnsHeaderV0() can be called only for part encoding v0, while it has been called for v%d", bs.partFormatVersion())
-	}
-
+func (bs *blockSearch) getColumnsHeader() *columnsHeader {
 	if bs.cshCache == nil {
 		b := bs.getColumnsHeaderBlock()

-		bs.cshCache = getColumnsHeader()
-		if err := bs.cshCache.unmarshalNoArena(b, 0); err != nil {
+		csh := getColumnsHeader()
+		partFormatVersion := bs.partFormatVersion()
+		if err := csh.unmarshalNoArena(b, partFormatVersion); err != nil {
 			logger.Panicf("FATAL: %s: cannot unmarshal columns header: %s", bs.bsw.p.path, err)
 		}
+		if partFormatVersion >= 1 {
+			cshIndex := bs.getColumnsHeaderIndex()
+			if err := csh.setColumnNames(cshIndex, bs.bsw.p.columnNames); err != nil {
+				logger.Panicf("FATAL: %s: %s", bs.bsw.p.path, err)
+			}
+		}
+
+		bs.cshCache = csh
 	}
 	return bs.cshCache
 }