VictoriaMetrics/lib/logstorage/stats_uniq_values.go

228 lines
5.2 KiB
Go
Raw Normal View History

2024-05-03 10:54:37 +00:00
package logstorage
import (
2024-05-03 11:44:57 +00:00
"slices"
2024-05-03 10:54:37 +00:00
"sort"
"strconv"
"strings"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
)
2024-05-07 21:38:09 +00:00
type statsUniqValues struct {
2024-05-03 11:44:57 +00:00
fields []string
containsStar bool
2024-05-03 10:54:37 +00:00
}
2024-05-07 21:38:09 +00:00
func (su *statsUniqValues) String() string {
return "uniq_values(" + fieldNamesString(su.fields) + ")"
2024-05-03 10:54:37 +00:00
}
2024-05-07 21:38:09 +00:00
func (su *statsUniqValues) neededFields() []string {
2024-05-03 11:44:57 +00:00
return su.fields
2024-05-03 10:54:37 +00:00
}
2024-05-07 21:38:09 +00:00
func (su *statsUniqValues) newStatsProcessor() (statsProcessor, int) {
sup := &statsUniqValuesProcessor{
2024-05-03 10:54:37 +00:00
su: su,
m: make(map[string]struct{}),
}
return sup, int(unsafe.Sizeof(*sup))
}
2024-05-07 21:38:09 +00:00
type statsUniqValuesProcessor struct {
su *statsUniqValues
2024-05-03 10:54:37 +00:00
m map[string]struct{}
}
2024-05-07 21:38:09 +00:00
func (sup *statsUniqValuesProcessor) updateStatsForAllRows(br *blockResult) int {
2024-05-03 11:44:57 +00:00
stateSizeIncrease := 0
2024-05-03 12:03:17 +00:00
if sup.su.containsStar {
2024-05-03 11:44:57 +00:00
columns := br.getColumns()
for i := range columns {
stateSizeIncrease += sup.updateStatsForAllRowsColumn(&columns[i], br)
}
} else {
2024-05-03 12:03:17 +00:00
for _, field := range sup.su.fields {
2024-05-03 11:44:57 +00:00
c := br.getColumnByName(field)
stateSizeIncrease += sup.updateStatsForAllRowsColumn(&c, br)
}
}
return stateSizeIncrease
}
2024-05-03 10:54:37 +00:00
2024-05-07 21:38:09 +00:00
func (sup *statsUniqValuesProcessor) updateStatsForAllRowsColumn(c *blockResultColumn, br *blockResult) int {
2024-05-03 11:44:57 +00:00
m := sup.m
2024-05-03 10:54:37 +00:00
stateSizeIncrease := 0
if c.isConst {
// collect unique const values
v := c.encodedValues[0]
if v == "" {
// skip empty values
return stateSizeIncrease
}
if _, ok := m[v]; !ok {
vCopy := strings.Clone(v)
m[vCopy] = struct{}{}
stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
}
return stateSizeIncrease
}
if c.valueType == valueTypeDict {
// collect unique non-zero c.dictValues
for _, v := range c.dictValues {
if v == "" {
// skip empty values
continue
}
if _, ok := m[v]; !ok {
vCopy := strings.Clone(v)
m[vCopy] = struct{}{}
stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
}
}
return stateSizeIncrease
}
// slow path - collect unique values across all rows
values := c.getValues(br)
for i, v := range values {
if v == "" {
// skip empty values
continue
}
if i > 0 && values[i-1] == v {
// This value has been already counted.
continue
}
if _, ok := m[v]; !ok {
vCopy := strings.Clone(v)
m[vCopy] = struct{}{}
stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
}
}
return stateSizeIncrease
}
2024-05-07 21:38:09 +00:00
func (sup *statsUniqValuesProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
2024-05-03 10:54:37 +00:00
stateSizeIncrease := 0
2024-05-03 12:03:17 +00:00
if sup.su.containsStar {
2024-05-03 11:44:57 +00:00
columns := br.getColumns()
for i := range columns {
stateSizeIncrease += sup.updateStatsForRowColumn(&columns[i], br, rowIdx)
}
} else {
2024-05-03 12:03:17 +00:00
for _, field := range sup.su.fields {
2024-05-03 11:44:57 +00:00
c := br.getColumnByName(field)
stateSizeIncrease += sup.updateStatsForRowColumn(&c, br, rowIdx)
}
}
return stateSizeIncrease
}
2024-05-07 21:38:09 +00:00
func (sup *statsUniqValuesProcessor) updateStatsForRowColumn(c *blockResultColumn, br *blockResult, rowIdx int) int {
2024-05-03 11:44:57 +00:00
m := sup.m
stateSizeIncrease := 0
2024-05-03 10:54:37 +00:00
if c.isConst {
// collect unique const values
v := c.encodedValues[0]
if v == "" {
// skip empty values
return stateSizeIncrease
}
if _, ok := m[v]; !ok {
vCopy := strings.Clone(v)
m[vCopy] = struct{}{}
stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
}
return stateSizeIncrease
}
if c.valueType == valueTypeDict {
// collect unique non-zero c.dictValues
dictIdx := c.encodedValues[rowIdx][0]
v := c.dictValues[dictIdx]
if v == "" {
// skip empty values
return stateSizeIncrease
}
if _, ok := m[v]; !ok {
vCopy := strings.Clone(v)
m[vCopy] = struct{}{}
stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
}
return stateSizeIncrease
}
// collect unique values for the given rowIdx.
v := c.getValueAtRow(br, rowIdx)
if v == "" {
// skip empty values
return stateSizeIncrease
}
if _, ok := m[v]; !ok {
vCopy := strings.Clone(v)
m[vCopy] = struct{}{}
stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
}
return stateSizeIncrease
}
2024-05-07 21:38:09 +00:00
func (sup *statsUniqValuesProcessor) mergeState(sfp statsProcessor) {
src := sfp.(*statsUniqValuesProcessor)
2024-05-03 10:54:37 +00:00
m := sup.m
for k := range src.m {
if _, ok := m[k]; !ok {
m[k] = struct{}{}
}
}
}
2024-05-07 21:38:09 +00:00
func (sup *statsUniqValuesProcessor) finalizeStats() string {
2024-05-03 10:54:37 +00:00
if len(sup.m) == 0 {
return "[]"
}
// Sort unique items
items := make([]string, 0, len(sup.m))
for k := range sup.m {
items = append(items, k)
}
sort.Strings(items)
// Marshal items into JSON array.
// Pre-allocate buffer for serialized items.
// Assume that there is no need in quoting items. Otherwise additional reallocations
// for the allocated buffer are possible.
bufSize := len(items) + 1
for _, item := range items {
bufSize += len(item)
}
b := make([]byte, 0, bufSize)
b = append(b, '[')
b = strconv.AppendQuote(b, items[0])
for _, item := range items[1:] {
b = append(b, ',')
b = strconv.AppendQuote(b, item)
}
b = append(b, ']')
return bytesutil.ToUnsafeString(b)
}
2024-05-07 21:38:09 +00:00
func parseStatsUniqValues(lex *lexer) (*statsUniqValues, error) {
fields, err := parseFieldNamesForStatsFunc(lex, "uniq_values")
2024-05-03 10:54:37 +00:00
if err != nil {
return nil, err
}
2024-05-07 21:38:09 +00:00
su := &statsUniqValues{
2024-05-03 11:44:57 +00:00
fields: fields,
containsStar: slices.Contains(fields, "*"),
2024-05-03 10:54:37 +00:00
}
return su, nil
}