VictoriaMetrics/lib/logstorage/stats_values.go

209 lines
4.8 KiB
Go
Raw Normal View History

2024-05-11 03:28:36 +00:00
package logstorage
import (
"fmt"
"strings"
"unsafe"
)
type statsValues struct {
2024-05-22 09:25:49 +00:00
fields []string
limit uint64
2024-05-11 03:28:36 +00:00
}
func (sv *statsValues) String() string {
2024-05-22 09:25:49 +00:00
s := "values(" + statsFuncFieldsToString(sv.fields) + ")"
2024-05-11 03:28:36 +00:00
if sv.limit > 0 {
s += fmt.Sprintf(" limit %d", sv.limit)
}
return s
}
2024-05-17 02:11:10 +00:00
func (sv *statsValues) updateNeededFields(neededFields fieldsSet) {
2024-05-22 09:25:49 +00:00
updateNeededFieldsForStatsFunc(neededFields, sv.fields)
2024-05-11 03:28:36 +00:00
}
func (sv *statsValues) newStatsProcessor() (statsProcessor, int) {
svp := &statsValuesProcessor{
sv: sv,
}
return svp, int(unsafe.Sizeof(*svp))
}
type statsValuesProcessor struct {
sv *statsValues
values []string
}
func (svp *statsValuesProcessor) updateStatsForAllRows(br *blockResult) int {
if svp.limitReached() {
// Limit on the number of unique values has been reached
return 0
}
stateSizeIncrease := 0
2024-05-22 09:25:49 +00:00
fields := svp.sv.fields
if len(fields) == 0 {
2024-05-11 03:28:36 +00:00
for _, c := range br.getColumns() {
stateSizeIncrease += svp.updateStatsForAllRowsColumn(c, br)
}
} else {
2024-05-22 09:25:49 +00:00
for _, field := range fields {
2024-05-11 03:28:36 +00:00
c := br.getColumnByName(field)
stateSizeIncrease += svp.updateStatsForAllRowsColumn(c, br)
}
}
return stateSizeIncrease
}
func (svp *statsValuesProcessor) updateStatsForAllRowsColumn(c *blockResultColumn, br *blockResult) int {
stateSizeIncrease := 0
if c.isConst {
2024-05-15 20:19:21 +00:00
v := strings.Clone(c.valuesEncoded[0])
2024-05-11 03:28:36 +00:00
stateSizeIncrease += len(v)
values := svp.values
for range br.timestamps {
values = append(values, v)
}
svp.values = values
stateSizeIncrease += len(br.timestamps) * int(unsafe.Sizeof(values[0]))
return stateSizeIncrease
}
if c.valueType == valueTypeDict {
dictValues := make([]string, len(c.dictValues))
for i, v := range c.dictValues {
dictValues[i] = strings.Clone(v)
stateSizeIncrease += len(v)
}
values := svp.values
2024-05-15 20:19:21 +00:00
for _, encodedValue := range c.getValuesEncoded(br) {
2024-05-11 03:28:36 +00:00
idx := encodedValue[0]
values = append(values, dictValues[idx])
}
svp.values = values
stateSizeIncrease += len(br.timestamps) * int(unsafe.Sizeof(values[0]))
return stateSizeIncrease
}
values := svp.values
for _, v := range c.getValues(br) {
if len(values) == 0 || values[len(values)-1] != v {
v = strings.Clone(v)
stateSizeIncrease += len(v)
}
values = append(values, v)
}
svp.values = values
stateSizeIncrease += len(br.timestamps) * int(unsafe.Sizeof(values[0]))
return stateSizeIncrease
}
func (svp *statsValuesProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
if svp.limitReached() {
// Limit on the number of unique values has been reached
return 0
}
stateSizeIncrease := 0
2024-05-22 09:25:49 +00:00
fields := svp.sv.fields
if len(fields) == 0 {
2024-05-11 03:28:36 +00:00
for _, c := range br.getColumns() {
stateSizeIncrease += svp.updateStatsForRowColumn(c, br, rowIdx)
}
} else {
2024-05-22 09:25:49 +00:00
for _, field := range fields {
2024-05-11 03:28:36 +00:00
c := br.getColumnByName(field)
stateSizeIncrease += svp.updateStatsForRowColumn(c, br, rowIdx)
}
}
return stateSizeIncrease
}
func (svp *statsValuesProcessor) updateStatsForRowColumn(c *blockResultColumn, br *blockResult, rowIdx int) int {
stateSizeIncrease := 0
if c.isConst {
2024-05-15 20:19:21 +00:00
v := strings.Clone(c.valuesEncoded[0])
2024-05-11 03:28:36 +00:00
stateSizeIncrease += len(v)
svp.values = append(svp.values, v)
stateSizeIncrease += int(unsafe.Sizeof(svp.values[0]))
return stateSizeIncrease
}
if c.valueType == valueTypeDict {
// collect unique non-zero c.dictValues
2024-05-15 20:19:21 +00:00
valuesEncoded := c.getValuesEncoded(br)
dictIdx := valuesEncoded[rowIdx][0]
2024-05-11 03:28:36 +00:00
v := strings.Clone(c.dictValues[dictIdx])
stateSizeIncrease += len(v)
svp.values = append(svp.values, v)
stateSizeIncrease += int(unsafe.Sizeof(svp.values[0]))
return stateSizeIncrease
}
// collect unique values for the given rowIdx.
v := c.getValueAtRow(br, rowIdx)
v = strings.Clone(v)
stateSizeIncrease += len(v)
svp.values = append(svp.values, v)
stateSizeIncrease += int(unsafe.Sizeof(svp.values[0]))
return stateSizeIncrease
}
func (svp *statsValuesProcessor) mergeState(sfp statsProcessor) {
if svp.limitReached() {
return
}
src := sfp.(*statsValuesProcessor)
svp.values = append(svp.values, src.values...)
}
func (svp *statsValuesProcessor) finalizeStats() string {
items := svp.values
if len(items) == 0 {
return "[]"
}
if limit := svp.sv.limit; limit > 0 && uint64(len(items)) > limit {
items = items[:limit]
}
return marshalJSONArray(items)
}
func (svp *statsValuesProcessor) limitReached() bool {
limit := svp.sv.limit
return limit > 0 && uint64(len(svp.values)) >= limit
}
func parseStatsValues(lex *lexer) (*statsValues, error) {
2024-05-22 09:25:49 +00:00
fields, err := parseStatsFuncFields(lex, "values")
2024-05-11 03:28:36 +00:00
if err != nil {
return nil, err
}
sv := &statsValues{
2024-05-22 09:25:49 +00:00
fields: fields,
2024-05-11 03:28:36 +00:00
}
if lex.isKeyword("limit") {
lex.nextToken()
n, ok := tryParseUint64(lex.token)
if !ok {
return nil, fmt.Errorf("cannot parse 'limit %s' for 'values': %w", lex.token, err)
}
lex.nextToken()
sv.limit = n
}
return sv, nil
}