lib/logstorage: optimize performance for queries, which select all the log fields for logs containing hundreds of log fields (aka "wide events")

Unpack the full columnsHeader block instead of unpacking meta-information per each individual column when the query, which selects all the columns, is executed. This improves performance when scanning logs with big number of fields.
2024-11-21 14:44:00 +00:00 · 2024-10-18 00:21:20 +02:00 · 2024-10-18 00:21:20 +02:00 · 2023f017b1
commit 2023f017b1
parent 78c6fb0883
3 changed files with 19 additions and 55 deletions
--- a/docs/VictoriaLogs/CHANGELOG.md
+++ b/docs/VictoriaLogs/CHANGELOG.md
@ -18,6 +18,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
 * FEATURE: add basic [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml) for VictoriaLogs process. See details at [monitoring docs](https://docs.victoriametrics.com/victorialogs/index.html#monitoring).
 * FEATURE: improve [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) performance on systems with many CPU cores when `by(...)` fields contain big number of unique values. For example, `_time:1d | stats by (user_id) count() x` should be executed much faster when `user_id` field contains millions of unique values.
 * FEATURE: improve performance for [`top`](https://docs.victoriametrics.com/victorialogs/logsql/#top-pipe), [`uniq`](https://docs.victoriametrics.com/victorialogs/logsql/#uniq-pipe) and [`field_values`](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe) pipes on systems with many CPU cores when it is applied to [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with big number of unique values. For example, `_time:1d | top 5 (user_id)` should be executed much faster when `user_id` field contains millions of unique values.
 * FEATURE: improve performance for [`field_names` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#field_names-pipe) when it is applied to logs with hundreds of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
 ## [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs)
--- a/lib/logstorage/block_result.go
+++ b/lib/logstorage/block_result.go
@ -316,8 +316,8 @@ func (br *blockResult) initAllColumns() {
 	}
 	// Add other const columns
-	ccs := br.bs.getConstColumns()
+	csh := br.bs.getColumnsHeader()
-	for _, cc := range ccs {
+	for _, cc := range csh.constColumns {
 		if cc.Name == "" {
 			continue
 		}
@ -327,7 +327,7 @@ func (br *blockResult) initAllColumns() {
 	}
 	// Add other non-const columns
-	chs := br.bs.getColumnHeaders()
+	chs := csh.columnHeaders
 	for i := range chs {
 		ch := &chs[i]
 		if ch.name == "" {
--- a/lib/logstorage/block_search.go
+++ b/lib/logstorage/block_search.go
@ -139,7 +139,7 @@ type blockSearch struct {
 	// cshCache is the columnsHeader associated with the given block.
 	//
-	// It is initialized lazily by calling getColumnsHeaderV0().
+	// It is initialized lazily by calling getColumnsHeader().
 	cshCache *columnsHeader
 	// seenStreams contains seen streamIDs for the recent searches.
@ -240,7 +240,7 @@ func (bs *blockSearch) getConstColumnValue(name string) string {
 	}
 	if bs.partFormatVersion() < 1 {
-		csh := bs.getColumnsHeaderV0()
+		csh := bs.getColumnsHeader()
 		for _, cc := range csh.constColumns {
 			if cc.Name == name {
 				return cc.Value
@ -288,7 +288,7 @@ func (bs *blockSearch) getColumnHeader(name string) *columnHeader {
 	}
 	if bs.partFormatVersion() < 1 {
-		csh := bs.getColumnsHeaderV0()
+		csh := bs.getColumnsHeader()
 		chs := csh.columnHeaders
 		for i := range chs {
 			ch := &chs[i]
@ -337,48 +337,6 @@ func (bs *blockSearch) getColumnNameID(name string) (uint64, bool) {
 	return id, ok
 }
 func (bs *blockSearch) getColumnNameByID(id uint64) (string, bool) {
 	columnNames := bs.bsw.p.columnNames
 	if id >= uint64(len(columnNames)) {
 		return "", false
 	}
 	return columnNames[id], true
 }
 func (bs *blockSearch) getConstColumns() []Field {
 	if bs.partFormatVersion() < 1 {
 		csh := bs.getColumnsHeaderV0()
 		return csh.constColumns
 	}
 	chsIndex := bs.getColumnsHeaderIndex()
 	for _, cr := range chsIndex.constColumnsRefs {
 		columnName, ok := bs.getColumnNameByID(cr.columnNameID)
 		if !ok {
 			logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID)
 		}
 		_ = bs.getConstColumnValue(columnName)
 	}
 	return bs.ccsCache
 }
 func (bs *blockSearch) getColumnHeaders() []columnHeader {
 	if bs.partFormatVersion() < 1 {
 		csh := bs.getColumnsHeaderV0()
 		return csh.columnHeaders
 	}
 	chsIndex := bs.getColumnsHeaderIndex()
 	for _, cr := range chsIndex.columnHeadersRefs {
 		columnName, ok := bs.getColumnNameByID(cr.columnNameID)
 		if !ok {
 			logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID)
 		}
 		_ = bs.getColumnHeader(columnName)
 	}
 	return bs.chsCache
 }
 func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex {
 	if bs.partFormatVersion() < 1 {
 		logger.Panicf("BUG: getColumnsHeaderIndex() can be called only for part encoding v1+, while it has been called for v%d", bs.partFormatVersion())
@ -395,18 +353,23 @@ func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex {
 	return bs.cshIndexCache
 }
-func (bs *blockSearch) getColumnsHeaderV0() *columnsHeader {
+func (bs *blockSearch) getColumnsHeader() *columnsHeader {
 	if bs.partFormatVersion() >= 1 {
 		logger.Panicf("BUG: getColumnsHeaderV0() can be called only for part encoding v0, while it has been called for v%d", bs.partFormatVersion())
 	}
 	if bs.cshCache == nil {
 		b := bs.getColumnsHeaderBlock()
-		bs.cshCache = getColumnsHeader()
+		csh := getColumnsHeader()
-		if err := bs.cshCache.unmarshalNoArena(b, 0); err != nil {
+		partFormatVersion := bs.partFormatVersion()
 		if err := csh.unmarshalNoArena(b, partFormatVersion); err != nil {
 			logger.Panicf("FATAL: %s: cannot unmarshal columns header: %s", bs.bsw.p.path, err)
 		}
 		if partFormatVersion >= 1 {
 			cshIndex := bs.getColumnsHeaderIndex()
 			if err := csh.setColumnNames(cshIndex, bs.bsw.p.columnNames); err != nil {
 				logger.Panicf("FATAL: %s: %s", bs.bsw.p.path, err)
 			}
 		}
 		bs.cshCache = csh
 	}
 	return bs.cshCache
 }