VictoriaMetrics/lib/logstorage/stats_quantile.go

295 lines
5.7 KiB
Go
Raw Permalink Normal View History

2024-05-15 02:55:44 +00:00
package logstorage
import (
"fmt"
"math"
"slices"
"strconv"
"unsafe"
"github.com/valyala/fastrand"
2024-05-20 02:08:30 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
2024-05-15 02:55:44 +00:00
)
type statsQuantile struct {
2024-05-22 19:01:20 +00:00
fields []string
2024-05-15 02:55:44 +00:00
2024-05-22 19:01:20 +00:00
phi float64
phiStr string
2024-05-15 02:55:44 +00:00
}
func (sq *statsQuantile) String() string {
2024-05-22 19:01:20 +00:00
s := "quantile(" + sq.phiStr
if len(sq.fields) > 0 {
s += ", " + fieldNamesString(sq.fields)
}
s += ")"
return s
2024-05-15 02:55:44 +00:00
}
2024-05-20 02:08:30 +00:00
func (sq *statsQuantile) updateNeededFields(neededFields fieldsSet) {
2024-05-22 19:01:20 +00:00
updateNeededFieldsForStatsFunc(neededFields, sq.fields)
2024-05-15 02:55:44 +00:00
}
func (sq *statsQuantile) newStatsProcessor() (statsProcessor, int) {
sqp := &statsQuantileProcessor{
sq: sq,
}
return sqp, int(unsafe.Sizeof(*sqp))
}
type statsQuantileProcessor struct {
sq *statsQuantile
h histogram
}
func (sqp *statsQuantileProcessor) updateStatsForAllRows(br *blockResult) int {
stateSizeIncrease := 0
2024-05-22 19:01:20 +00:00
fields := sqp.sq.fields
if len(fields) == 0 {
2024-05-15 02:55:44 +00:00
for _, c := range br.getColumns() {
2024-05-20 02:08:30 +00:00
stateSizeIncrease += sqp.updateStateForColumn(br, c)
2024-05-15 02:55:44 +00:00
}
} else {
2024-05-22 19:01:20 +00:00
for _, field := range fields {
2024-05-15 02:55:44 +00:00
c := br.getColumnByName(field)
2024-05-20 02:08:30 +00:00
stateSizeIncrease += sqp.updateStateForColumn(br, c)
2024-05-15 02:55:44 +00:00
}
}
return stateSizeIncrease
}
func (sqp *statsQuantileProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
h := &sqp.h
stateSizeIncrease := 0
2024-05-22 19:01:20 +00:00
fields := sqp.sq.fields
if len(fields) == 0 {
2024-05-15 02:55:44 +00:00
for _, c := range br.getColumns() {
2024-05-20 02:08:30 +00:00
f, ok := c.getFloatValueAtRow(br, rowIdx)
if ok {
2024-05-15 02:55:44 +00:00
stateSizeIncrease += h.update(f)
}
}
} else {
2024-05-22 19:01:20 +00:00
for _, field := range fields {
2024-05-15 02:55:44 +00:00
c := br.getColumnByName(field)
2024-05-20 02:08:30 +00:00
f, ok := c.getFloatValueAtRow(br, rowIdx)
if ok {
stateSizeIncrease += h.update(f)
}
}
}
return stateSizeIncrease
}
func (sqp *statsQuantileProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) int {
h := &sqp.h
stateSizeIncrease := 0
if c.isConst {
f, ok := tryParseFloat64(c.valuesEncoded[0])
if ok {
for i := 0; i < br.rowsLen; i++ {
2024-05-15 02:55:44 +00:00
stateSizeIncrease += h.update(f)
}
}
2024-05-20 02:08:30 +00:00
return stateSizeIncrease
}
if c.isTime {
return 0
}
switch c.valueType {
case valueTypeString:
for _, v := range c.getValues(br) {
f, ok := tryParseFloat64(v)
if ok {
stateSizeIncrease += h.update(f)
}
}
case valueTypeDict:
dictValues := c.dictValues
a := encoding.GetFloat64s(len(dictValues))
for i, v := range dictValues {
f, ok := tryParseFloat64(v)
if !ok {
f = nan
}
a.A[i] = f
}
for _, v := range c.getValuesEncoded(br) {
idx := v[0]
f := a.A[idx]
if !math.IsNaN(f) {
h.update(f)
}
}
encoding.PutFloat64s(a)
case valueTypeUint8:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint8(v)
h.update(float64(n))
}
case valueTypeUint16:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint16(v)
h.update(float64(n))
}
case valueTypeUint32:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint32(v)
h.update(float64(n))
}
case valueTypeUint64:
for _, v := range c.getValuesEncoded(br) {
n := unmarshalUint64(v)
h.update(float64(n))
}
case valueTypeFloat64:
for _, v := range c.getValuesEncoded(br) {
f := unmarshalFloat64(v)
if !math.IsNaN(f) {
h.update(f)
}
}
case valueTypeIPv4:
case valueTypeTimestampISO8601:
default:
logger.Panicf("BUG: unexpected valueType=%d", c.valueType)
2024-05-15 02:55:44 +00:00
}
return stateSizeIncrease
}
func (sqp *statsQuantileProcessor) mergeState(sfp statsProcessor) {
src := sfp.(*statsQuantileProcessor)
sqp.h.mergeState(&src.h)
}
func (sqp *statsQuantileProcessor) finalizeStats() string {
q := sqp.h.quantile(sqp.sq.phi)
return strconv.FormatFloat(q, 'f', -1, 64)
}
func parseStatsQuantile(lex *lexer) (*statsQuantile, error) {
if !lex.isKeyword("quantile") {
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "quantile")
}
lex.nextToken()
fields, err := parseFieldNamesInParens(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'quantile' args: %w", err)
}
2024-05-22 19:01:20 +00:00
if len(fields) < 1 {
return nil, fmt.Errorf("'quantile' must have at least phi arg")
2024-05-15 02:55:44 +00:00
}
// Parse phi
2024-05-22 19:01:20 +00:00
phiStr := fields[0]
phi, ok := tryParseFloat64(phiStr)
2024-05-15 02:55:44 +00:00
if !ok {
2024-05-22 19:01:20 +00:00
return nil, fmt.Errorf("phi arg in 'quantile' must be floating point number; got %q", phiStr)
2024-05-15 02:55:44 +00:00
}
if phi < 0 || phi > 1 {
2024-05-22 19:01:20 +00:00
return nil, fmt.Errorf("phi arg in 'quantile' must be in the range [0..1]; got %q", phiStr)
2024-05-15 02:55:44 +00:00
}
// Parse fields
fields = fields[1:]
if slices.Contains(fields, "*") {
2024-05-22 19:01:20 +00:00
fields = nil
2024-05-15 02:55:44 +00:00
}
sq := &statsQuantile{
2024-05-22 19:01:20 +00:00
fields: fields,
2024-05-15 02:55:44 +00:00
2024-05-22 19:01:20 +00:00
phi: phi,
phiStr: phiStr,
2024-05-15 02:55:44 +00:00
}
return sq, nil
}
type histogram struct {
a []float64
min float64
max float64
count uint64
rng fastrand.RNG
}
func (h *histogram) update(f float64) int {
if h.count == 0 || f < h.min {
h.min = f
}
if h.count == 0 || f > h.max {
h.max = f
}
h.count++
if len(h.a) < maxHistogramSamples {
h.a = append(h.a, f)
return int(unsafe.Sizeof(f))
}
if n := h.rng.Uint32n(uint32(h.count)); n < uint32(len(h.a)) {
h.a[n] = f
}
return 0
}
const maxHistogramSamples = 100_000
func (h *histogram) mergeState(src *histogram) {
if src.count == 0 {
// Nothing to merge
return
}
if h.count == 0 {
h.a = append(h.a, src.a...)
h.min = src.min
h.max = src.max
h.count = src.count
return
}
h.a = append(h.a, src.a...)
if src.min < h.min {
h.min = src.min
}
if src.max > h.max {
h.max = src.max
}
h.count += src.count
}
func (h *histogram) quantile(phi float64) float64 {
if len(h.a) == 0 {
return nan
}
if len(h.a) == 1 {
return h.a[0]
}
if phi <= 0 {
return h.min
}
if phi >= 1 {
return h.max
}
slices.Sort(h.a)
idx := int(phi * float64(len(h.a)))
if idx == len(h.a) {
return h.max
}
return h.a[idx]
}