lib/logstorage: optimize performance for queries, which select all the log fields for logs containing hundreds of log fields (aka "wide events")

Unpack the full columnsHeader block instead of unpacking meta-information per each individual column
when the query, which selects all the columns, is executed. This improves performance when scanning
logs with big number of fields.
This commit is contained in:
Aliaksandr Valialkin 2024-10-18 00:21:20 +02:00
parent 78c6fb0883
commit 2023f017b1
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
3 changed files with 19 additions and 55 deletions

View file

@ -18,6 +18,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
* FEATURE: add basic [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml) for VictoriaLogs process. See details at [monitoring docs](https://docs.victoriametrics.com/victorialogs/index.html#monitoring). * FEATURE: add basic [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml) for VictoriaLogs process. See details at [monitoring docs](https://docs.victoriametrics.com/victorialogs/index.html#monitoring).
* FEATURE: improve [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) performance on systems with many CPU cores when `by(...)` fields contain big number of unique values. For example, `_time:1d | stats by (user_id) count() x` should be executed much faster when `user_id` field contains millions of unique values. * FEATURE: improve [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) performance on systems with many CPU cores when `by(...)` fields contain big number of unique values. For example, `_time:1d | stats by (user_id) count() x` should be executed much faster when `user_id` field contains millions of unique values.
* FEATURE: improve performance for [`top`](https://docs.victoriametrics.com/victorialogs/logsql/#top-pipe), [`uniq`](https://docs.victoriametrics.com/victorialogs/logsql/#uniq-pipe) and [`field_values`](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe) pipes on systems with many CPU cores when it is applied to [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with big number of unique values. For example, `_time:1d | top 5 (user_id)` should be executed much faster when `user_id` field contains millions of unique values. * FEATURE: improve performance for [`top`](https://docs.victoriametrics.com/victorialogs/logsql/#top-pipe), [`uniq`](https://docs.victoriametrics.com/victorialogs/logsql/#uniq-pipe) and [`field_values`](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe) pipes on systems with many CPU cores when it is applied to [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with big number of unique values. For example, `_time:1d | top 5 (user_id)` should be executed much faster when `user_id` field contains millions of unique values.
* FEATURE: improve performance for [`field_names` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#field_names-pipe) when it is applied to logs with hundreds of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
## [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs) ## [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs)

View file

@ -316,8 +316,8 @@ func (br *blockResult) initAllColumns() {
} }
// Add other const columns // Add other const columns
ccs := br.bs.getConstColumns() csh := br.bs.getColumnsHeader()
for _, cc := range ccs { for _, cc := range csh.constColumns {
if cc.Name == "" { if cc.Name == "" {
continue continue
} }
@ -327,7 +327,7 @@ func (br *blockResult) initAllColumns() {
} }
// Add other non-const columns // Add other non-const columns
chs := br.bs.getColumnHeaders() chs := csh.columnHeaders
for i := range chs { for i := range chs {
ch := &chs[i] ch := &chs[i]
if ch.name == "" { if ch.name == "" {

View file

@ -139,7 +139,7 @@ type blockSearch struct {
// cshCache is the columnsHeader associated with the given block. // cshCache is the columnsHeader associated with the given block.
// //
// It is initialized lazily by calling getColumnsHeaderV0(). // It is initialized lazily by calling getColumnsHeader().
cshCache *columnsHeader cshCache *columnsHeader
// seenStreams contains seen streamIDs for the recent searches. // seenStreams contains seen streamIDs for the recent searches.
@ -240,7 +240,7 @@ func (bs *blockSearch) getConstColumnValue(name string) string {
} }
if bs.partFormatVersion() < 1 { if bs.partFormatVersion() < 1 {
csh := bs.getColumnsHeaderV0() csh := bs.getColumnsHeader()
for _, cc := range csh.constColumns { for _, cc := range csh.constColumns {
if cc.Name == name { if cc.Name == name {
return cc.Value return cc.Value
@ -288,7 +288,7 @@ func (bs *blockSearch) getColumnHeader(name string) *columnHeader {
} }
if bs.partFormatVersion() < 1 { if bs.partFormatVersion() < 1 {
csh := bs.getColumnsHeaderV0() csh := bs.getColumnsHeader()
chs := csh.columnHeaders chs := csh.columnHeaders
for i := range chs { for i := range chs {
ch := &chs[i] ch := &chs[i]
@ -337,48 +337,6 @@ func (bs *blockSearch) getColumnNameID(name string) (uint64, bool) {
return id, ok return id, ok
} }
func (bs *blockSearch) getColumnNameByID(id uint64) (string, bool) {
columnNames := bs.bsw.p.columnNames
if id >= uint64(len(columnNames)) {
return "", false
}
return columnNames[id], true
}
func (bs *blockSearch) getConstColumns() []Field {
if bs.partFormatVersion() < 1 {
csh := bs.getColumnsHeaderV0()
return csh.constColumns
}
chsIndex := bs.getColumnsHeaderIndex()
for _, cr := range chsIndex.constColumnsRefs {
columnName, ok := bs.getColumnNameByID(cr.columnNameID)
if !ok {
logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID)
}
_ = bs.getConstColumnValue(columnName)
}
return bs.ccsCache
}
func (bs *blockSearch) getColumnHeaders() []columnHeader {
if bs.partFormatVersion() < 1 {
csh := bs.getColumnsHeaderV0()
return csh.columnHeaders
}
chsIndex := bs.getColumnsHeaderIndex()
for _, cr := range chsIndex.columnHeadersRefs {
columnName, ok := bs.getColumnNameByID(cr.columnNameID)
if !ok {
logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID)
}
_ = bs.getColumnHeader(columnName)
}
return bs.chsCache
}
func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex { func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex {
if bs.partFormatVersion() < 1 { if bs.partFormatVersion() < 1 {
logger.Panicf("BUG: getColumnsHeaderIndex() can be called only for part encoding v1+, while it has been called for v%d", bs.partFormatVersion()) logger.Panicf("BUG: getColumnsHeaderIndex() can be called only for part encoding v1+, while it has been called for v%d", bs.partFormatVersion())
@ -395,18 +353,23 @@ func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex {
return bs.cshIndexCache return bs.cshIndexCache
} }
func (bs *blockSearch) getColumnsHeaderV0() *columnsHeader { func (bs *blockSearch) getColumnsHeader() *columnsHeader {
if bs.partFormatVersion() >= 1 {
logger.Panicf("BUG: getColumnsHeaderV0() can be called only for part encoding v0, while it has been called for v%d", bs.partFormatVersion())
}
if bs.cshCache == nil { if bs.cshCache == nil {
b := bs.getColumnsHeaderBlock() b := bs.getColumnsHeaderBlock()
bs.cshCache = getColumnsHeader() csh := getColumnsHeader()
if err := bs.cshCache.unmarshalNoArena(b, 0); err != nil { partFormatVersion := bs.partFormatVersion()
if err := csh.unmarshalNoArena(b, partFormatVersion); err != nil {
logger.Panicf("FATAL: %s: cannot unmarshal columns header: %s", bs.bsw.p.path, err) logger.Panicf("FATAL: %s: cannot unmarshal columns header: %s", bs.bsw.p.path, err)
} }
if partFormatVersion >= 1 {
cshIndex := bs.getColumnsHeaderIndex()
if err := csh.setColumnNames(cshIndex, bs.bsw.p.columnNames); err != nil {
logger.Panicf("FATAL: %s: %s", bs.bsw.p.path, err)
}
}
bs.cshCache = csh
} }
return bs.cshCache return bs.cshCache
} }