diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 780fa683b..ed66ca498 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -18,6 +18,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta * FEATURE: add basic [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml) for VictoriaLogs process. See details at [monitoring docs](https://docs.victoriametrics.com/victorialogs/index.html#monitoring). * FEATURE: improve [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) performance on systems with many CPU cores when `by(...)` fields contain big number of unique values. For example, `_time:1d | stats by (user_id) count() x` should be executed much faster when `user_id` field contains millions of unique values. * FEATURE: improve performance for [`top`](https://docs.victoriametrics.com/victorialogs/logsql/#top-pipe), [`uniq`](https://docs.victoriametrics.com/victorialogs/logsql/#uniq-pipe) and [`field_values`](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe) pipes on systems with many CPU cores when it is applied to [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with big number of unique values. For example, `_time:1d | top 5 (user_id)` should be executed much faster when `user_id` field contains millions of unique values. +* FEATURE: improve performance for [`field_names` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#field_names-pipe) when it is applied to logs with hundreds of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). ## [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs) diff --git a/lib/logstorage/block_result.go b/lib/logstorage/block_result.go index 8eb2b1c72..76459953a 100644 --- a/lib/logstorage/block_result.go +++ b/lib/logstorage/block_result.go @@ -316,8 +316,8 @@ func (br *blockResult) initAllColumns() { } // Add other const columns - ccs := br.bs.getConstColumns() - for _, cc := range ccs { + csh := br.bs.getColumnsHeader() + for _, cc := range csh.constColumns { if cc.Name == "" { continue } @@ -327,7 +327,7 @@ func (br *blockResult) initAllColumns() { } // Add other non-const columns - chs := br.bs.getColumnHeaders() + chs := csh.columnHeaders for i := range chs { ch := &chs[i] if ch.name == "" { diff --git a/lib/logstorage/block_search.go b/lib/logstorage/block_search.go index cd7804938..c932cbc8d 100644 --- a/lib/logstorage/block_search.go +++ b/lib/logstorage/block_search.go @@ -139,7 +139,7 @@ type blockSearch struct { // cshCache is the columnsHeader associated with the given block. // - // It is initialized lazily by calling getColumnsHeaderV0(). + // It is initialized lazily by calling getColumnsHeader(). cshCache *columnsHeader // seenStreams contains seen streamIDs for the recent searches. @@ -240,7 +240,7 @@ func (bs *blockSearch) getConstColumnValue(name string) string { } if bs.partFormatVersion() < 1 { - csh := bs.getColumnsHeaderV0() + csh := bs.getColumnsHeader() for _, cc := range csh.constColumns { if cc.Name == name { return cc.Value @@ -288,7 +288,7 @@ func (bs *blockSearch) getColumnHeader(name string) *columnHeader { } if bs.partFormatVersion() < 1 { - csh := bs.getColumnsHeaderV0() + csh := bs.getColumnsHeader() chs := csh.columnHeaders for i := range chs { ch := &chs[i] @@ -337,48 +337,6 @@ func (bs *blockSearch) getColumnNameID(name string) (uint64, bool) { return id, ok } -func (bs *blockSearch) getColumnNameByID(id uint64) (string, bool) { - columnNames := bs.bsw.p.columnNames - if id >= uint64(len(columnNames)) { - return "", false - } - return columnNames[id], true -} - -func (bs *blockSearch) getConstColumns() []Field { - if bs.partFormatVersion() < 1 { - csh := bs.getColumnsHeaderV0() - return csh.constColumns - } - - chsIndex := bs.getColumnsHeaderIndex() - for _, cr := range chsIndex.constColumnsRefs { - columnName, ok := bs.getColumnNameByID(cr.columnNameID) - if !ok { - logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID) - } - _ = bs.getConstColumnValue(columnName) - } - return bs.ccsCache -} - -func (bs *blockSearch) getColumnHeaders() []columnHeader { - if bs.partFormatVersion() < 1 { - csh := bs.getColumnsHeaderV0() - return csh.columnHeaders - } - - chsIndex := bs.getColumnsHeaderIndex() - for _, cr := range chsIndex.columnHeadersRefs { - columnName, ok := bs.getColumnNameByID(cr.columnNameID) - if !ok { - logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID) - } - _ = bs.getColumnHeader(columnName) - } - return bs.chsCache -} - func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex { if bs.partFormatVersion() < 1 { logger.Panicf("BUG: getColumnsHeaderIndex() can be called only for part encoding v1+, while it has been called for v%d", bs.partFormatVersion()) @@ -395,18 +353,23 @@ func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex { return bs.cshIndexCache } -func (bs *blockSearch) getColumnsHeaderV0() *columnsHeader { - if bs.partFormatVersion() >= 1 { - logger.Panicf("BUG: getColumnsHeaderV0() can be called only for part encoding v0, while it has been called for v%d", bs.partFormatVersion()) - } - +func (bs *blockSearch) getColumnsHeader() *columnsHeader { if bs.cshCache == nil { b := bs.getColumnsHeaderBlock() - bs.cshCache = getColumnsHeader() - if err := bs.cshCache.unmarshalNoArena(b, 0); err != nil { + csh := getColumnsHeader() + partFormatVersion := bs.partFormatVersion() + if err := csh.unmarshalNoArena(b, partFormatVersion); err != nil { logger.Panicf("FATAL: %s: cannot unmarshal columns header: %s", bs.bsw.p.path, err) } + if partFormatVersion >= 1 { + cshIndex := bs.getColumnsHeaderIndex() + if err := csh.setColumnNames(cshIndex, bs.bsw.p.columnNames); err != nil { + logger.Panicf("FATAL: %s: %s", bs.bsw.p.path, err) + } + } + + bs.cshCache = csh } return bs.cshCache }