2024-05-12 14:33:29 +00:00
|
|
|
package logstorage
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"slices"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"unsafe"
|
|
|
|
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
|
|
)
|
|
|
|
|
|
|
|
type statsUniqValues struct {
|
2024-05-22 19:01:20 +00:00
|
|
|
fields []string
|
|
|
|
limit uint64
|
2024-05-12 14:33:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (su *statsUniqValues) String() string {
|
2024-05-22 19:01:20 +00:00
|
|
|
s := "uniq_values(" + statsFuncFieldsToString(su.fields) + ")"
|
2024-05-12 14:33:29 +00:00
|
|
|
if su.limit > 0 {
|
|
|
|
s += fmt.Sprintf(" limit %d", su.limit)
|
|
|
|
}
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2024-05-20 02:08:30 +00:00
|
|
|
func (su *statsUniqValues) updateNeededFields(neededFields fieldsSet) {
|
2024-05-22 19:01:20 +00:00
|
|
|
updateNeededFieldsForStatsFunc(neededFields, su.fields)
|
2024-05-12 14:33:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (su *statsUniqValues) newStatsProcessor() (statsProcessor, int) {
|
|
|
|
sup := &statsUniqValuesProcessor{
|
|
|
|
su: su,
|
|
|
|
|
|
|
|
m: make(map[string]struct{}),
|
|
|
|
}
|
|
|
|
return sup, int(unsafe.Sizeof(*sup))
|
|
|
|
}
|
|
|
|
|
|
|
|
type statsUniqValuesProcessor struct {
|
|
|
|
su *statsUniqValues
|
|
|
|
|
|
|
|
m map[string]struct{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sup *statsUniqValuesProcessor) updateStatsForAllRows(br *blockResult) int {
|
|
|
|
if sup.limitReached() {
|
|
|
|
// Limit on the number of unique values has been reached
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
stateSizeIncrease := 0
|
2024-05-22 19:01:20 +00:00
|
|
|
fields := sup.su.fields
|
|
|
|
if len(fields) == 0 {
|
2024-05-12 14:33:29 +00:00
|
|
|
for _, c := range br.getColumns() {
|
|
|
|
stateSizeIncrease += sup.updateStatsForAllRowsColumn(c, br)
|
|
|
|
}
|
|
|
|
} else {
|
2024-05-22 19:01:20 +00:00
|
|
|
for _, field := range fields {
|
2024-05-12 14:33:29 +00:00
|
|
|
c := br.getColumnByName(field)
|
|
|
|
stateSizeIncrease += sup.updateStatsForAllRowsColumn(c, br)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sup *statsUniqValuesProcessor) updateStatsForAllRowsColumn(c *blockResultColumn, br *blockResult) int {
|
|
|
|
stateSizeIncrease := 0
|
|
|
|
if c.isConst {
|
|
|
|
// collect unique const values
|
2024-05-20 02:08:30 +00:00
|
|
|
v := c.valuesEncoded[0]
|
2024-05-12 14:33:29 +00:00
|
|
|
if v == "" {
|
|
|
|
// skip empty values
|
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
2024-05-22 19:01:20 +00:00
|
|
|
stateSizeIncrease += sup.updateState(v)
|
2024-05-12 14:33:29 +00:00
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
|
|
|
if c.valueType == valueTypeDict {
|
|
|
|
// collect unique non-zero c.dictValues
|
|
|
|
for _, v := range c.dictValues {
|
|
|
|
if v == "" {
|
|
|
|
// skip empty values
|
|
|
|
continue
|
|
|
|
}
|
2024-05-22 19:01:20 +00:00
|
|
|
stateSizeIncrease += sup.updateState(v)
|
2024-05-12 14:33:29 +00:00
|
|
|
}
|
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
|
|
|
|
|
|
|
// slow path - collect unique values across all rows
|
|
|
|
values := c.getValues(br)
|
|
|
|
for i, v := range values {
|
|
|
|
if v == "" {
|
|
|
|
// skip empty values
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if i > 0 && values[i-1] == v {
|
|
|
|
// This value has been already counted.
|
|
|
|
continue
|
|
|
|
}
|
2024-05-22 19:01:20 +00:00
|
|
|
stateSizeIncrease += sup.updateState(v)
|
2024-05-12 14:33:29 +00:00
|
|
|
}
|
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sup *statsUniqValuesProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
|
|
|
|
if sup.limitReached() {
|
|
|
|
// Limit on the number of unique values has been reached
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
stateSizeIncrease := 0
|
2024-05-22 19:01:20 +00:00
|
|
|
fields := sup.su.fields
|
|
|
|
if len(fields) == 0 {
|
2024-05-12 14:33:29 +00:00
|
|
|
for _, c := range br.getColumns() {
|
|
|
|
stateSizeIncrease += sup.updateStatsForRowColumn(c, br, rowIdx)
|
|
|
|
}
|
|
|
|
} else {
|
2024-05-22 19:01:20 +00:00
|
|
|
for _, field := range fields {
|
2024-05-12 14:33:29 +00:00
|
|
|
c := br.getColumnByName(field)
|
|
|
|
stateSizeIncrease += sup.updateStatsForRowColumn(c, br, rowIdx)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sup *statsUniqValuesProcessor) updateStatsForRowColumn(c *blockResultColumn, br *blockResult, rowIdx int) int {
|
|
|
|
stateSizeIncrease := 0
|
|
|
|
if c.isConst {
|
|
|
|
// collect unique const values
|
2024-05-20 02:08:30 +00:00
|
|
|
v := c.valuesEncoded[0]
|
2024-05-12 14:33:29 +00:00
|
|
|
if v == "" {
|
|
|
|
// skip empty values
|
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
2024-05-22 19:01:20 +00:00
|
|
|
stateSizeIncrease += sup.updateState(v)
|
2024-05-12 14:33:29 +00:00
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
|
|
|
if c.valueType == valueTypeDict {
|
|
|
|
// collect unique non-zero c.dictValues
|
2024-05-20 02:08:30 +00:00
|
|
|
valuesEncoded := c.getValuesEncoded(br)
|
|
|
|
dictIdx := valuesEncoded[rowIdx][0]
|
2024-05-12 14:33:29 +00:00
|
|
|
v := c.dictValues[dictIdx]
|
|
|
|
if v == "" {
|
|
|
|
// skip empty values
|
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
2024-05-22 19:01:20 +00:00
|
|
|
stateSizeIncrease += sup.updateState(v)
|
2024-05-12 14:33:29 +00:00
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
|
|
|
|
|
|
|
// collect unique values for the given rowIdx.
|
|
|
|
v := c.getValueAtRow(br, rowIdx)
|
|
|
|
if v == "" {
|
|
|
|
// skip empty values
|
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
2024-05-22 19:01:20 +00:00
|
|
|
stateSizeIncrease += sup.updateState(v)
|
2024-05-12 14:33:29 +00:00
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sup *statsUniqValuesProcessor) mergeState(sfp statsProcessor) {
|
|
|
|
if sup.limitReached() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
src := sfp.(*statsUniqValuesProcessor)
|
|
|
|
for k := range src.m {
|
2024-05-22 19:01:20 +00:00
|
|
|
if _, ok := sup.m[k]; !ok {
|
|
|
|
sup.m[k] = struct{}{}
|
2024-05-12 14:33:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sup *statsUniqValuesProcessor) finalizeStats() string {
|
|
|
|
if len(sup.m) == 0 {
|
|
|
|
return "[]"
|
|
|
|
}
|
|
|
|
|
|
|
|
items := make([]string, 0, len(sup.m))
|
|
|
|
for k := range sup.m {
|
|
|
|
items = append(items, k)
|
|
|
|
}
|
2024-05-22 19:01:20 +00:00
|
|
|
sortStrings(items)
|
2024-05-12 14:33:29 +00:00
|
|
|
|
|
|
|
if limit := sup.su.limit; limit > 0 && uint64(len(items)) > limit {
|
|
|
|
items = items[:limit]
|
|
|
|
}
|
|
|
|
|
|
|
|
return marshalJSONArray(items)
|
|
|
|
}
|
|
|
|
|
2024-05-22 19:01:20 +00:00
|
|
|
func sortStrings(a []string) {
|
|
|
|
slices.SortFunc(a, func(x, y string) int {
|
|
|
|
if x == y {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
if lessString(x, y) {
|
|
|
|
return -1
|
|
|
|
}
|
|
|
|
return 1
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sup *statsUniqValuesProcessor) updateState(v string) int {
|
|
|
|
stateSizeIncrease := 0
|
|
|
|
if _, ok := sup.m[v]; !ok {
|
|
|
|
vCopy := strings.Clone(v)
|
|
|
|
sup.m[vCopy] = struct{}{}
|
|
|
|
stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
|
|
|
|
}
|
|
|
|
return stateSizeIncrease
|
|
|
|
}
|
|
|
|
|
2024-05-12 14:33:29 +00:00
|
|
|
func (sup *statsUniqValuesProcessor) limitReached() bool {
|
|
|
|
limit := sup.su.limit
|
|
|
|
return limit > 0 && uint64(len(sup.m)) >= limit
|
|
|
|
}
|
|
|
|
|
|
|
|
func marshalJSONArray(items []string) string {
|
|
|
|
// Pre-allocate buffer for serialized items.
|
|
|
|
// Assume that there is no need in quoting items. Otherwise additional reallocations
|
|
|
|
// for the allocated buffer are possible.
|
|
|
|
bufSize := len(items) + 1
|
|
|
|
for _, item := range items {
|
|
|
|
bufSize += len(item)
|
|
|
|
}
|
|
|
|
b := make([]byte, 0, bufSize)
|
|
|
|
|
|
|
|
b = append(b, '[')
|
|
|
|
b = strconv.AppendQuote(b, items[0])
|
|
|
|
for _, item := range items[1:] {
|
|
|
|
b = append(b, ',')
|
|
|
|
b = strconv.AppendQuote(b, item)
|
|
|
|
}
|
|
|
|
b = append(b, ']')
|
|
|
|
|
|
|
|
return bytesutil.ToUnsafeString(b)
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseStatsUniqValues(lex *lexer) (*statsUniqValues, error) {
|
2024-05-22 19:01:20 +00:00
|
|
|
fields, err := parseStatsFuncFields(lex, "uniq_values")
|
2024-05-12 14:33:29 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
su := &statsUniqValues{
|
2024-05-22 19:01:20 +00:00
|
|
|
fields: fields,
|
2024-05-12 14:33:29 +00:00
|
|
|
}
|
|
|
|
if lex.isKeyword("limit") {
|
|
|
|
lex.nextToken()
|
|
|
|
n, ok := tryParseUint64(lex.token)
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'limit %s' for 'uniq_values': %w", lex.token, err)
|
|
|
|
}
|
|
|
|
lex.nextToken()
|
|
|
|
su.limit = n
|
|
|
|
}
|
|
|
|
return su, nil
|
|
|
|
}
|