mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/logstorage: properly skip filtered out dict values when calculating uniq_values, min, max, row_min and row_max stats functions
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7458
This commit is contained in:
parent
2f1ce74d97
commit
546bf7d579
7 changed files with 24 additions and 23 deletions
|
@ -26,6 +26,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
|
||||||
* FEATURE: [Loki data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/#loki-json-api): show the original request body on parse errors. This should simplify debugging. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7490).
|
* FEATURE: [Loki data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/#loki-json-api): show the original request body on parse errors. This should simplify debugging. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7490).
|
||||||
|
|
||||||
* BUGFIX: [`values` stats function](https://docs.victoriametrics.com/victorialogs/logsql/#values-stats): fix a bug, which could lead to corrupted results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7458).
|
* BUGFIX: [`values` stats function](https://docs.victoriametrics.com/victorialogs/logsql/#values-stats): fix a bug, which could lead to corrupted results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7458).
|
||||||
|
* BUGFIX: [`uniq_values`](https://docs.victoriametrics.com/victorialogs/logsql/#uniq_values-stats), [`min`](https://docs.victoriametrics.com/victorialogs/logsql/#min-stats), [`max`](https://docs.victoriametrics.com/victorialogs/logsql/#max-stats), [`row_min`](https://docs.victoriametrics.com/victorialogs/logsql/#row_min-stats) and [`row_max`](https://docs.victoriametrics.com/victorialogs/logsql/#row_max-stats) stats functions: fix a bug, which could return non-matching field values for these functions. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7458).
|
||||||
* BUGFIX: [HTTP querying APIs](https://docs.victoriametrics.com/victorialogs/querying/#http-api): properly take into account the `end` query arg when calculating time range for [`_time:duration` filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter). Previously the `_time:duration` filter was treated as `_time:[now-duration, now)`, while it should be treated as `_time:[end-duration, end)`.
|
* BUGFIX: [HTTP querying APIs](https://docs.victoriametrics.com/victorialogs/querying/#http-api): properly take into account the `end` query arg when calculating time range for [`_time:duration` filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter). Previously the `_time:duration` filter was treated as `_time:[now-duration, now)`, while it should be treated as `_time:[end-duration, end)`.
|
||||||
|
|
||||||
## [v0.41.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.41.0-victorialogs)
|
## [v0.41.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.41.0-victorialogs)
|
||||||
|
|
|
@ -1726,7 +1726,9 @@ func (c *blockResultColumn) getValuesEncoded(br *blockResult) []string {
|
||||||
return c.valuesEncoded
|
return c.valuesEncoded
|
||||||
}
|
}
|
||||||
|
|
||||||
// forEachDictValue calls f for every value in the column dictionary.
|
// forEachDictValue calls f for every matching value in the column dictionary.
|
||||||
|
//
|
||||||
|
// It properly skips non-matching dict values.
|
||||||
func (c *blockResultColumn) forEachDictValue(br *blockResult, f func(v string)) {
|
func (c *blockResultColumn) forEachDictValue(br *blockResult, f func(v string)) {
|
||||||
if c.valueType != valueTypeDict {
|
if c.valueType != valueTypeDict {
|
||||||
logger.Panicf("BUG: unexpected column valueType=%d; want %d", c.valueType, valueTypeDict)
|
logger.Panicf("BUG: unexpected column valueType=%d; want %d", c.valueType, valueTypeDict)
|
||||||
|
@ -1756,7 +1758,9 @@ func (c *blockResultColumn) forEachDictValue(br *blockResult, f func(v string))
|
||||||
encoding.PutUint64s(a)
|
encoding.PutUint64s(a)
|
||||||
}
|
}
|
||||||
|
|
||||||
// forEachDictValueWithHits calls f for every value in the column dictionary.
|
// forEachDictValueWithHits calls f for every matching value in the column dictionary.
|
||||||
|
//
|
||||||
|
// It properly skips non-matching dict values.
|
||||||
//
|
//
|
||||||
// hits is the number of rows with the given value v in the column.
|
// hits is the number of rows with the given value v in the column.
|
||||||
func (c *blockResultColumn) forEachDictValueWithHits(br *blockResult, f func(v string, hits uint64)) {
|
func (c *blockResultColumn) forEachDictValueWithHits(br *blockResult, f func(v string, hits uint64)) {
|
||||||
|
|
|
@ -114,9 +114,9 @@ func (smp *statsMaxProcessor) updateStateForColumn(br *blockResult, c *blockResu
|
||||||
smp.updateStateString(v)
|
smp.updateStateString(v)
|
||||||
}
|
}
|
||||||
case valueTypeDict:
|
case valueTypeDict:
|
||||||
for _, v := range c.dictValues {
|
c.forEachDictValue(br, func(v string) {
|
||||||
smp.updateStateString(v)
|
smp.updateStateString(v)
|
||||||
}
|
})
|
||||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
|
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
bb.B = marshalUint64String(bb.B[:0], c.maxValue)
|
bb.B = marshalUint64String(bb.B[:0], c.maxValue)
|
||||||
|
|
|
@ -116,9 +116,9 @@ func (smp *statsMinProcessor) updateStateForColumn(br *blockResult, c *blockResu
|
||||||
smp.updateStateString(v)
|
smp.updateStateString(v)
|
||||||
}
|
}
|
||||||
case valueTypeDict:
|
case valueTypeDict:
|
||||||
for _, v := range c.dictValues {
|
c.forEachDictValue(br, func(v string) {
|
||||||
smp.updateStateString(v)
|
smp.updateStateString(v)
|
||||||
}
|
})
|
||||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
|
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
bb.B = marshalUint64String(bb.B[:0], c.minValue)
|
bb.B = marshalUint64String(bb.B[:0], c.minValue)
|
||||||
|
|
|
@ -82,12 +82,11 @@ func (smp *statsRowMaxProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||||
case valueTypeString:
|
case valueTypeString:
|
||||||
needUpdateState = true
|
needUpdateState = true
|
||||||
case valueTypeDict:
|
case valueTypeDict:
|
||||||
for _, v := range c.dictValues {
|
c.forEachDictValue(br, func(v string) {
|
||||||
if smp.needUpdateStateString(v) {
|
if !needUpdateState && smp.needUpdateStateString(v) {
|
||||||
needUpdateState = true
|
needUpdateState = true
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
})
|
||||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
|
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
bb.B = marshalUint64String(bb.B[:0], c.maxValue)
|
bb.B = marshalUint64String(bb.B[:0], c.maxValue)
|
||||||
|
|
|
@ -82,12 +82,11 @@ func (smp *statsRowMinProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||||
case valueTypeString:
|
case valueTypeString:
|
||||||
needUpdateState = true
|
needUpdateState = true
|
||||||
case valueTypeDict:
|
case valueTypeDict:
|
||||||
for _, v := range c.dictValues {
|
c.forEachDictValue(br, func(v string) {
|
||||||
if smp.needUpdateStateString(v) {
|
if !needUpdateState && smp.needUpdateStateString(v) {
|
||||||
needUpdateState = true
|
needUpdateState = true
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
})
|
||||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
|
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
bb.B = marshalUint64String(bb.B[:0], c.minValue)
|
bb.B = marshalUint64String(bb.B[:0], c.minValue)
|
||||||
|
|
|
@ -74,9 +74,9 @@ func (sup *statsUniqValuesProcessor) updateStatsForAllRowsColumn(c *blockResultC
|
||||||
stateSizeIncrease := 0
|
stateSizeIncrease := 0
|
||||||
if c.valueType == valueTypeDict {
|
if c.valueType == valueTypeDict {
|
||||||
// collect unique non-zero c.dictValues
|
// collect unique non-zero c.dictValues
|
||||||
for _, v := range c.dictValues {
|
c.forEachDictValue(br, func(v string) {
|
||||||
stateSizeIncrease += sup.updateState(v)
|
stateSizeIncrease += sup.updateState(v)
|
||||||
}
|
})
|
||||||
return stateSizeIncrease
|
return stateSizeIncrease
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,14 +181,12 @@ func (sup *statsUniqValuesProcessor) updateState(v string) int {
|
||||||
// Skip empty values
|
// Skip empty values
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
if _, ok := sup.m[v]; ok {
|
||||||
stateSizeIncrease := 0
|
return 0
|
||||||
if _, ok := sup.m[v]; !ok {
|
}
|
||||||
vCopy := strings.Clone(v)
|
vCopy := strings.Clone(v)
|
||||||
sup.m[vCopy] = struct{}{}
|
sup.m[vCopy] = struct{}{}
|
||||||
stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
|
return len(vCopy) + int(unsafe.Sizeof(vCopy))
|
||||||
}
|
|
||||||
return stateSizeIncrease
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sup *statsUniqValuesProcessor) limitReached() bool {
|
func (sup *statsUniqValuesProcessor) limitReached() bool {
|
||||||
|
|
Loading…
Reference in a new issue