mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/logstorage: optimize performance for queries, which select all the log fields for logs containing hundreds of log fields (aka "wide events")
Unpack the full columnsHeader block instead of unpacking meta-information per each individual column when the query, which selects all the columns, is executed. This improves performance when scanning logs with big number of fields.
This commit is contained in:
parent
78c6fb0883
commit
2023f017b1
3 changed files with 19 additions and 55 deletions
|
@ -18,6 +18,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
|
||||||
* FEATURE: add basic [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml) for VictoriaLogs process. See details at [monitoring docs](https://docs.victoriametrics.com/victorialogs/index.html#monitoring).
|
* FEATURE: add basic [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml) for VictoriaLogs process. See details at [monitoring docs](https://docs.victoriametrics.com/victorialogs/index.html#monitoring).
|
||||||
* FEATURE: improve [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) performance on systems with many CPU cores when `by(...)` fields contain big number of unique values. For example, `_time:1d | stats by (user_id) count() x` should be executed much faster when `user_id` field contains millions of unique values.
|
* FEATURE: improve [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) performance on systems with many CPU cores when `by(...)` fields contain big number of unique values. For example, `_time:1d | stats by (user_id) count() x` should be executed much faster when `user_id` field contains millions of unique values.
|
||||||
* FEATURE: improve performance for [`top`](https://docs.victoriametrics.com/victorialogs/logsql/#top-pipe), [`uniq`](https://docs.victoriametrics.com/victorialogs/logsql/#uniq-pipe) and [`field_values`](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe) pipes on systems with many CPU cores when it is applied to [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with big number of unique values. For example, `_time:1d | top 5 (user_id)` should be executed much faster when `user_id` field contains millions of unique values.
|
* FEATURE: improve performance for [`top`](https://docs.victoriametrics.com/victorialogs/logsql/#top-pipe), [`uniq`](https://docs.victoriametrics.com/victorialogs/logsql/#uniq-pipe) and [`field_values`](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe) pipes on systems with many CPU cores when it is applied to [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with big number of unique values. For example, `_time:1d | top 5 (user_id)` should be executed much faster when `user_id` field contains millions of unique values.
|
||||||
|
* FEATURE: improve performance for [`field_names` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#field_names-pipe) when it is applied to logs with hundreds of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
|
|
||||||
## [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs)
|
## [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs)
|
||||||
|
|
||||||
|
|
|
@ -316,8 +316,8 @@ func (br *blockResult) initAllColumns() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add other const columns
|
// Add other const columns
|
||||||
ccs := br.bs.getConstColumns()
|
csh := br.bs.getColumnsHeader()
|
||||||
for _, cc := range ccs {
|
for _, cc := range csh.constColumns {
|
||||||
if cc.Name == "" {
|
if cc.Name == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -327,7 +327,7 @@ func (br *blockResult) initAllColumns() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add other non-const columns
|
// Add other non-const columns
|
||||||
chs := br.bs.getColumnHeaders()
|
chs := csh.columnHeaders
|
||||||
for i := range chs {
|
for i := range chs {
|
||||||
ch := &chs[i]
|
ch := &chs[i]
|
||||||
if ch.name == "" {
|
if ch.name == "" {
|
||||||
|
|
|
@ -139,7 +139,7 @@ type blockSearch struct {
|
||||||
|
|
||||||
// cshCache is the columnsHeader associated with the given block.
|
// cshCache is the columnsHeader associated with the given block.
|
||||||
//
|
//
|
||||||
// It is initialized lazily by calling getColumnsHeaderV0().
|
// It is initialized lazily by calling getColumnsHeader().
|
||||||
cshCache *columnsHeader
|
cshCache *columnsHeader
|
||||||
|
|
||||||
// seenStreams contains seen streamIDs for the recent searches.
|
// seenStreams contains seen streamIDs for the recent searches.
|
||||||
|
@ -240,7 +240,7 @@ func (bs *blockSearch) getConstColumnValue(name string) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
if bs.partFormatVersion() < 1 {
|
if bs.partFormatVersion() < 1 {
|
||||||
csh := bs.getColumnsHeaderV0()
|
csh := bs.getColumnsHeader()
|
||||||
for _, cc := range csh.constColumns {
|
for _, cc := range csh.constColumns {
|
||||||
if cc.Name == name {
|
if cc.Name == name {
|
||||||
return cc.Value
|
return cc.Value
|
||||||
|
@ -288,7 +288,7 @@ func (bs *blockSearch) getColumnHeader(name string) *columnHeader {
|
||||||
}
|
}
|
||||||
|
|
||||||
if bs.partFormatVersion() < 1 {
|
if bs.partFormatVersion() < 1 {
|
||||||
csh := bs.getColumnsHeaderV0()
|
csh := bs.getColumnsHeader()
|
||||||
chs := csh.columnHeaders
|
chs := csh.columnHeaders
|
||||||
for i := range chs {
|
for i := range chs {
|
||||||
ch := &chs[i]
|
ch := &chs[i]
|
||||||
|
@ -337,48 +337,6 @@ func (bs *blockSearch) getColumnNameID(name string) (uint64, bool) {
|
||||||
return id, ok
|
return id, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bs *blockSearch) getColumnNameByID(id uint64) (string, bool) {
|
|
||||||
columnNames := bs.bsw.p.columnNames
|
|
||||||
if id >= uint64(len(columnNames)) {
|
|
||||||
return "", false
|
|
||||||
}
|
|
||||||
return columnNames[id], true
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bs *blockSearch) getConstColumns() []Field {
|
|
||||||
if bs.partFormatVersion() < 1 {
|
|
||||||
csh := bs.getColumnsHeaderV0()
|
|
||||||
return csh.constColumns
|
|
||||||
}
|
|
||||||
|
|
||||||
chsIndex := bs.getColumnsHeaderIndex()
|
|
||||||
for _, cr := range chsIndex.constColumnsRefs {
|
|
||||||
columnName, ok := bs.getColumnNameByID(cr.columnNameID)
|
|
||||||
if !ok {
|
|
||||||
logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID)
|
|
||||||
}
|
|
||||||
_ = bs.getConstColumnValue(columnName)
|
|
||||||
}
|
|
||||||
return bs.ccsCache
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bs *blockSearch) getColumnHeaders() []columnHeader {
|
|
||||||
if bs.partFormatVersion() < 1 {
|
|
||||||
csh := bs.getColumnsHeaderV0()
|
|
||||||
return csh.columnHeaders
|
|
||||||
}
|
|
||||||
|
|
||||||
chsIndex := bs.getColumnsHeaderIndex()
|
|
||||||
for _, cr := range chsIndex.columnHeadersRefs {
|
|
||||||
columnName, ok := bs.getColumnNameByID(cr.columnNameID)
|
|
||||||
if !ok {
|
|
||||||
logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID)
|
|
||||||
}
|
|
||||||
_ = bs.getColumnHeader(columnName)
|
|
||||||
}
|
|
||||||
return bs.chsCache
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex {
|
func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex {
|
||||||
if bs.partFormatVersion() < 1 {
|
if bs.partFormatVersion() < 1 {
|
||||||
logger.Panicf("BUG: getColumnsHeaderIndex() can be called only for part encoding v1+, while it has been called for v%d", bs.partFormatVersion())
|
logger.Panicf("BUG: getColumnsHeaderIndex() can be called only for part encoding v1+, while it has been called for v%d", bs.partFormatVersion())
|
||||||
|
@ -395,18 +353,23 @@ func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex {
|
||||||
return bs.cshIndexCache
|
return bs.cshIndexCache
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bs *blockSearch) getColumnsHeaderV0() *columnsHeader {
|
func (bs *blockSearch) getColumnsHeader() *columnsHeader {
|
||||||
if bs.partFormatVersion() >= 1 {
|
|
||||||
logger.Panicf("BUG: getColumnsHeaderV0() can be called only for part encoding v0, while it has been called for v%d", bs.partFormatVersion())
|
|
||||||
}
|
|
||||||
|
|
||||||
if bs.cshCache == nil {
|
if bs.cshCache == nil {
|
||||||
b := bs.getColumnsHeaderBlock()
|
b := bs.getColumnsHeaderBlock()
|
||||||
|
|
||||||
bs.cshCache = getColumnsHeader()
|
csh := getColumnsHeader()
|
||||||
if err := bs.cshCache.unmarshalNoArena(b, 0); err != nil {
|
partFormatVersion := bs.partFormatVersion()
|
||||||
|
if err := csh.unmarshalNoArena(b, partFormatVersion); err != nil {
|
||||||
logger.Panicf("FATAL: %s: cannot unmarshal columns header: %s", bs.bsw.p.path, err)
|
logger.Panicf("FATAL: %s: cannot unmarshal columns header: %s", bs.bsw.p.path, err)
|
||||||
}
|
}
|
||||||
|
if partFormatVersion >= 1 {
|
||||||
|
cshIndex := bs.getColumnsHeaderIndex()
|
||||||
|
if err := csh.setColumnNames(cshIndex, bs.bsw.p.columnNames); err != nil {
|
||||||
|
logger.Panicf("FATAL: %s: %s", bs.bsw.p.path, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bs.cshCache = csh
|
||||||
}
|
}
|
||||||
return bs.cshCache
|
return bs.cshCache
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue