mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/logstorage: optimize performance for queries, which select all the log fields for logs containing hundreds of log fields (aka "wide events")
Unpack the full columnsHeader block instead of unpacking meta-information per each individual column
when the query, which selects all the columns, is executed. This improves performance when scanning
logs with big number of fields.
(cherry picked from commit 2023f017b1
)
This commit is contained in:
parent
5d541322c6
commit
92b9b13df1
3 changed files with 19 additions and 55 deletions
|
@ -18,6 +18,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
|
|||
* FEATURE: add basic [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml) for VictoriaLogs process. See details at [monitoring docs](https://docs.victoriametrics.com/victorialogs/index.html#monitoring).
|
||||
* FEATURE: improve [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) performance on systems with many CPU cores when `by(...)` fields contain big number of unique values. For example, `_time:1d | stats by (user_id) count() x` should be executed much faster when `user_id` field contains millions of unique values.
|
||||
* FEATURE: improve performance for [`top`](https://docs.victoriametrics.com/victorialogs/logsql/#top-pipe), [`uniq`](https://docs.victoriametrics.com/victorialogs/logsql/#uniq-pipe) and [`field_values`](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe) pipes on systems with many CPU cores when it is applied to [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with big number of unique values. For example, `_time:1d | top 5 (user_id)` should be executed much faster when `user_id` field contains millions of unique values.
|
||||
* FEATURE: improve performance for [`field_names` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#field_names-pipe) when it is applied to logs with hundreds of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
|
||||
## [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs)
|
||||
|
||||
|
|
|
@ -316,8 +316,8 @@ func (br *blockResult) initAllColumns() {
|
|||
}
|
||||
|
||||
// Add other const columns
|
||||
ccs := br.bs.getConstColumns()
|
||||
for _, cc := range ccs {
|
||||
csh := br.bs.getColumnsHeader()
|
||||
for _, cc := range csh.constColumns {
|
||||
if cc.Name == "" {
|
||||
continue
|
||||
}
|
||||
|
@ -327,7 +327,7 @@ func (br *blockResult) initAllColumns() {
|
|||
}
|
||||
|
||||
// Add other non-const columns
|
||||
chs := br.bs.getColumnHeaders()
|
||||
chs := csh.columnHeaders
|
||||
for i := range chs {
|
||||
ch := &chs[i]
|
||||
if ch.name == "" {
|
||||
|
|
|
@ -139,7 +139,7 @@ type blockSearch struct {
|
|||
|
||||
// cshCache is the columnsHeader associated with the given block.
|
||||
//
|
||||
// It is initialized lazily by calling getColumnsHeaderV0().
|
||||
// It is initialized lazily by calling getColumnsHeader().
|
||||
cshCache *columnsHeader
|
||||
|
||||
// seenStreams contains seen streamIDs for the recent searches.
|
||||
|
@ -240,7 +240,7 @@ func (bs *blockSearch) getConstColumnValue(name string) string {
|
|||
}
|
||||
|
||||
if bs.partFormatVersion() < 1 {
|
||||
csh := bs.getColumnsHeaderV0()
|
||||
csh := bs.getColumnsHeader()
|
||||
for _, cc := range csh.constColumns {
|
||||
if cc.Name == name {
|
||||
return cc.Value
|
||||
|
@ -288,7 +288,7 @@ func (bs *blockSearch) getColumnHeader(name string) *columnHeader {
|
|||
}
|
||||
|
||||
if bs.partFormatVersion() < 1 {
|
||||
csh := bs.getColumnsHeaderV0()
|
||||
csh := bs.getColumnsHeader()
|
||||
chs := csh.columnHeaders
|
||||
for i := range chs {
|
||||
ch := &chs[i]
|
||||
|
@ -337,48 +337,6 @@ func (bs *blockSearch) getColumnNameID(name string) (uint64, bool) {
|
|||
return id, ok
|
||||
}
|
||||
|
||||
func (bs *blockSearch) getColumnNameByID(id uint64) (string, bool) {
|
||||
columnNames := bs.bsw.p.columnNames
|
||||
if id >= uint64(len(columnNames)) {
|
||||
return "", false
|
||||
}
|
||||
return columnNames[id], true
|
||||
}
|
||||
|
||||
func (bs *blockSearch) getConstColumns() []Field {
|
||||
if bs.partFormatVersion() < 1 {
|
||||
csh := bs.getColumnsHeaderV0()
|
||||
return csh.constColumns
|
||||
}
|
||||
|
||||
chsIndex := bs.getColumnsHeaderIndex()
|
||||
for _, cr := range chsIndex.constColumnsRefs {
|
||||
columnName, ok := bs.getColumnNameByID(cr.columnNameID)
|
||||
if !ok {
|
||||
logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID)
|
||||
}
|
||||
_ = bs.getConstColumnValue(columnName)
|
||||
}
|
||||
return bs.ccsCache
|
||||
}
|
||||
|
||||
func (bs *blockSearch) getColumnHeaders() []columnHeader {
|
||||
if bs.partFormatVersion() < 1 {
|
||||
csh := bs.getColumnsHeaderV0()
|
||||
return csh.columnHeaders
|
||||
}
|
||||
|
||||
chsIndex := bs.getColumnsHeaderIndex()
|
||||
for _, cr := range chsIndex.columnHeadersRefs {
|
||||
columnName, ok := bs.getColumnNameByID(cr.columnNameID)
|
||||
if !ok {
|
||||
logger.Panicf("FATAL: %s: missing column name for id=%d", bs.bsw.p.path, cr.columnNameID)
|
||||
}
|
||||
_ = bs.getColumnHeader(columnName)
|
||||
}
|
||||
return bs.chsCache
|
||||
}
|
||||
|
||||
func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex {
|
||||
if bs.partFormatVersion() < 1 {
|
||||
logger.Panicf("BUG: getColumnsHeaderIndex() can be called only for part encoding v1+, while it has been called for v%d", bs.partFormatVersion())
|
||||
|
@ -395,18 +353,23 @@ func (bs *blockSearch) getColumnsHeaderIndex() *columnsHeaderIndex {
|
|||
return bs.cshIndexCache
|
||||
}
|
||||
|
||||
func (bs *blockSearch) getColumnsHeaderV0() *columnsHeader {
|
||||
if bs.partFormatVersion() >= 1 {
|
||||
logger.Panicf("BUG: getColumnsHeaderV0() can be called only for part encoding v0, while it has been called for v%d", bs.partFormatVersion())
|
||||
}
|
||||
|
||||
func (bs *blockSearch) getColumnsHeader() *columnsHeader {
|
||||
if bs.cshCache == nil {
|
||||
b := bs.getColumnsHeaderBlock()
|
||||
|
||||
bs.cshCache = getColumnsHeader()
|
||||
if err := bs.cshCache.unmarshalNoArena(b, 0); err != nil {
|
||||
csh := getColumnsHeader()
|
||||
partFormatVersion := bs.partFormatVersion()
|
||||
if err := csh.unmarshalNoArena(b, partFormatVersion); err != nil {
|
||||
logger.Panicf("FATAL: %s: cannot unmarshal columns header: %s", bs.bsw.p.path, err)
|
||||
}
|
||||
if partFormatVersion >= 1 {
|
||||
cshIndex := bs.getColumnsHeaderIndex()
|
||||
if err := csh.setColumnNames(cshIndex, bs.bsw.p.columnNames); err != nil {
|
||||
logger.Panicf("FATAL: %s: %s", bs.bsw.p.path, err)
|
||||
}
|
||||
}
|
||||
|
||||
bs.cshCache = csh
|
||||
}
|
||||
return bs.cshCache
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue