From d9fdbf907c3e5e085514a370607f18f74a853e85 Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Mon, 29 Apr 2024 03:20:43 +0200
Subject: [PATCH] wip

---
 lib/logstorage/pipes.go        | 478 +--------------------------------
 lib/logstorage/stats_count.go  | 114 ++++++++
 lib/logstorage/stats_sum.go    | 127 +++++++++
 lib/logstorage/stats_unique.go | 255 ++++++++++++++++++
 4 files changed, 499 insertions(+), 475 deletions(-)
 create mode 100644 lib/logstorage/stats_count.go
 create mode 100644 lib/logstorage/stats_sum.go
 create mode 100644 lib/logstorage/stats_unique.go

diff --git a/lib/logstorage/pipes.go b/lib/logstorage/pipes.go
index 74ecd56be..39eb09ef1 100644
--- a/lib/logstorage/pipes.go
+++ b/lib/logstorage/pipes.go
@@ -2,9 +2,7 @@ package logstorage
 
 import (
 	"fmt"
-	"math"
 	"slices"
-	"strconv"
 	"strings"
 	"sync/atomic"
 	"unsafe"
@@ -593,19 +591,19 @@ func parseStatsPipe(lex *lexer) (*statsPipe, error) {
 func parseStatsFunc(lex *lexer) (statsFunc, error) {
 	switch {
 	case lex.isKeyword("count"):
-		sfc, err := parseStatsFuncCount(lex)
+		sfc, err := parseStatsCount(lex)
 		if err != nil {
 			return nil, fmt.Errorf("cannot parse 'count' func: %w", err)
 		}
 		return sfc, nil
 	case lex.isKeyword("uniq"):
-		sfu, err := parseStatsFuncUniq(lex)
+		sfu, err := parseStatsUniq(lex)
 		if err != nil {
 			return nil, fmt.Errorf("cannot parse 'uniq' func: %w", err)
 		}
 		return sfu, nil
 	case lex.isKeyword("sum"):
-		sfs, err := parseStatsFuncSum(lex)
+		sfs, err := parseStatsSum(lex)
 		if err != nil {
 			return nil, fmt.Errorf("cannot parse 'sum' func: %w", err)
 		}
@@ -615,476 +613,6 @@ func parseStatsFunc(lex *lexer) (statsFunc, error) {
 	}
 }
 
-type statsFuncCount struct {
-	fields       []string
-	containsStar bool
-
-	resultName string
-}
-
-func (sfc *statsFuncCount) String() string {
-	return "count(" + fieldNamesString(sfc.fields) + ") as " + quoteTokenIfNeeded(sfc.resultName)
-}
-
-func (sfc *statsFuncCount) neededFields() []string {
-	return getFieldsIgnoreStar(sfc.fields)
-}
-
-func (sfc *statsFuncCount) newStatsFuncProcessor() (statsFuncProcessor, int) {
-	sfcp := &statsFuncCountProcessor{
-		sfc: sfc,
-	}
-	return sfcp, int(unsafe.Sizeof(*sfcp))
-}
-
-type statsFuncCountProcessor struct {
-	sfc *statsFuncCount
-
-	rowsCount uint64
-}
-
-func (sfcp *statsFuncCountProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) int {
-	fields := sfcp.sfc.fields
-	if len(fields) == 0 || sfcp.sfc.containsStar {
-		// Fast path - count all the columns.
-		sfcp.rowsCount += uint64(len(timestamps))
-		return 0
-	}
-
-	// Slow path - count rows containing at least a single non-empty value for the fields enumerated inside count().
-	bm := getFilterBitmap(len(timestamps))
-	defer putFilterBitmap(bm)
-
-	bm.setBits()
-	for _, f := range fields {
-		if idx := getBlockColumnIndex(columns, f); idx >= 0 {
-			values := columns[idx].Values
-			bm.forEachSetBit(func(i int) bool {
-				return values[i] == ""
-			})
-		}
-	}
-
-	emptyValues := 0
-	bm.forEachSetBit(func(i int) bool {
-		emptyValues++
-		return true
-	})
-
-	sfcp.rowsCount += uint64(len(timestamps) - emptyValues)
-	return 0
-}
-
-func (sfcp *statsFuncCountProcessor) updateStatsForRow(_ []int64, columns []BlockColumn, rowIdx int) int {
-	fields := sfcp.sfc.fields
-	if len(fields) == 0 || sfcp.sfc.containsStar {
-		// Fast path - count the given column
-		sfcp.rowsCount++
-		return 0
-	}
-
-	// Slow path - count the row at rowIdx if at least a single field enumerated inside count() is non-empty
-	for _, f := range fields {
-		if idx := getBlockColumnIndex(columns, f); idx >= 0 && columns[idx].Values[rowIdx] != "" {
-			sfcp.rowsCount++
-			return 0
-		}
-	}
-	return 0
-}
-
-func (sfcp *statsFuncCountProcessor) mergeState(sfp statsFuncProcessor) {
-	src := sfp.(*statsFuncCountProcessor)
-	sfcp.rowsCount += src.rowsCount
-}
-
-func (sfcp *statsFuncCountProcessor) finalizeStats() (string, string) {
-	value := strconv.FormatUint(sfcp.rowsCount, 10)
-	return sfcp.sfc.resultName, value
-}
-
-func parseStatsFuncCount(lex *lexer) (*statsFuncCount, error) {
-	lex.nextToken()
-	fields, err := parseFieldNamesInParens(lex)
-	if err != nil {
-		return nil, fmt.Errorf("cannot parse 'count' args: %w", err)
-	}
-	resultName, err := parseResultName(lex)
-	if err != nil {
-		return nil, fmt.Errorf("cannot parse result name: %w", err)
-	}
-	sfc := &statsFuncCount{
-		fields:       fields,
-		containsStar: slices.Contains(fields, "*"),
-		resultName:   resultName,
-	}
-	return sfc, nil
-}
-
-type statsFuncSum struct {
-	fields       []string
-	containsStar bool
-	resultName   string
-}
-
-func (sfs *statsFuncSum) String() string {
-	return "sum(" + fieldNamesString(sfs.fields) + ") as " + quoteTokenIfNeeded(sfs.resultName)
-}
-
-func (sfs *statsFuncSum) neededFields() []string {
-	return sfs.fields
-}
-
-func (sfs *statsFuncSum) newStatsFuncProcessor() (statsFuncProcessor, int) {
-	sfsp := &statsFuncSumProcessor{
-		sfs: sfs,
-	}
-	return sfsp, int(unsafe.Sizeof(*sfsp))
-}
-
-type statsFuncSumProcessor struct {
-	sfs *statsFuncSum
-
-	sum float64
-}
-
-func (sfsp *statsFuncSumProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) int {
-	if sfsp.sfs.containsStar {
-		// Sum all the columns
-		for _, c := range columns {
-			sfsp.sum += sumValues(c.Values)
-		}
-		return 0
-	}
-
-	// Sum the requested columns
-	for _, field := range sfsp.sfs.fields {
-		if idx := getBlockColumnIndex(columns, field); idx >= 0 {
-			sfsp.sum += sumValues(columns[idx].Values)
-		}
-	}
-	return 0
-}
-
-func sumValues(values []string) float64 {
-	sum := float64(0)
-	f := float64(0)
-	for i, v := range values {
-		if i == 0 || values[i-1] != v {
-			f, _ = tryParseFloat64(v)
-			if math.IsNaN(f) {
-				// Ignore NaN values, since this is the expected behaviour by most users.
-				f = 0
-			}
-		}
-		sum += f
-	}
-	return sum
-}
-
-func (sfsp *statsFuncSumProcessor) updateStatsForRow(_ []int64, columns []BlockColumn, rowIdx int) int {
-	if sfsp.sfs.containsStar {
-		// Sum all the fields for the given row
-		for _, c := range columns {
-			v := c.Values[rowIdx]
-			f, _ := tryParseFloat64(v)
-			if !math.IsNaN(f) {
-				sfsp.sum += f
-			}
-		}
-		return 0
-	}
-
-	// Sum only the given fields for the given row
-	for _, field := range sfsp.sfs.fields {
-		if idx := getBlockColumnIndex(columns, field); idx >= 0 {
-			v := columns[idx].Values[rowIdx]
-			f, _ := tryParseFloat64(v)
-			if !math.IsNaN(f) {
-				sfsp.sum += f
-			}
-		}
-	}
-	return 0
-}
-
-func (sfsp *statsFuncSumProcessor) mergeState(sfp statsFuncProcessor) {
-	src := sfp.(*statsFuncSumProcessor)
-	sfsp.sum += src.sum
-}
-
-func (sfsp *statsFuncSumProcessor) finalizeStats() (string, string) {
-	value := strconv.FormatFloat(sfsp.sum, 'g', -1, 64)
-	return sfsp.sfs.resultName, value
-}
-
-func parseStatsFuncSum(lex *lexer) (*statsFuncSum, error) {
-	lex.nextToken()
-	fields, err := parseFieldNamesInParens(lex)
-	if err != nil {
-		return nil, fmt.Errorf("cannot parse 'sum' args: %w", err)
-	}
-	if len(fields) == 0 {
-		return nil, fmt.Errorf("'sum' must contain at least one arg")
-	}
-	resultName, err := parseResultName(lex)
-	if err != nil {
-		return nil, fmt.Errorf("cannot parse result name: %w", err)
-	}
-	sfs := &statsFuncSum{
-		fields:       fields,
-		containsStar: slices.Contains(fields, "*"),
-		resultName:   resultName,
-	}
-	return sfs, nil
-}
-
-type statsFuncUniq struct {
-	fields       []string
-	containsStar bool
-	resultName   string
-}
-
-func (sfu *statsFuncUniq) String() string {
-	return "uniq(" + fieldNamesString(sfu.fields) + ") as " + quoteTokenIfNeeded(sfu.resultName)
-}
-
-func (sfu *statsFuncUniq) neededFields() []string {
-	return sfu.fields
-}
-
-func (sfu *statsFuncUniq) newStatsFuncProcessor() (statsFuncProcessor, int) {
-	sfup := &statsFuncUniqProcessor{
-		sfu: sfu,
-
-		m: make(map[string]struct{}),
-	}
-	return sfup, int(unsafe.Sizeof(*sfup))
-}
-
-type statsFuncUniqProcessor struct {
-	sfu *statsFuncUniq
-
-	m map[string]struct{}
-
-	columnValues [][]string
-	keyBuf       []byte
-}
-
-func (sfup *statsFuncUniqProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) int {
-	fields := sfup.sfu.fields
-	m := sfup.m
-
-	stateSizeIncrease := 0
-	if len(fields) == 0 || sfup.sfu.containsStar {
-		// Count unique rows
-		keyBuf := sfup.keyBuf
-		for i := range timestamps {
-			seenKey := true
-			for _, c := range columns {
-				values := c.Values
-				if i == 0 || values[i-1] != values[i] {
-					seenKey = false
-					break
-				}
-			}
-			if seenKey {
-				continue
-			}
-
-			allEmptyValues := true
-			keyBuf = keyBuf[:0]
-			for _, c := range columns {
-				v := c.Values[i]
-				if v != "" {
-					allEmptyValues = false
-				}
-				// Put column name into key, since every block can contain different set of columns for '*' selector.
-				keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.Name))
-				keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
-			}
-			if allEmptyValues {
-				// Do not count empty values
-				continue
-			}
-			if _, ok := m[string(keyBuf)]; !ok {
-				m[string(keyBuf)] = struct{}{}
-				stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
-			}
-		}
-		sfup.keyBuf = keyBuf
-		return stateSizeIncrease
-	}
-	if len(fields) == 1 {
-		// Fast path for a single column
-		if idx := getBlockColumnIndex(columns, fields[0]); idx >= 0 {
-			values := columns[idx].Values
-			for i, v := range values {
-				if v == "" {
-					// Do not count empty values
-					continue
-				}
-				if i > 0 && values[i-1] == v {
-					continue
-				}
-				if _, ok := m[v]; !ok {
-					vCopy := strings.Clone(v)
-					m[vCopy] = struct{}{}
-					stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
-				}
-			}
-		}
-		return stateSizeIncrease
-	}
-
-	// Slow path for multiple columns.
-
-	// Pre-calculate column values for byFields in order to speed up building group key in the loop below.
-	sfup.columnValues = appendBlockColumnValues(sfup.columnValues[:0], columns, fields, len(timestamps))
-	columnValues := sfup.columnValues
-
-	keyBuf := sfup.keyBuf
-	for i := range timestamps {
-		seenKey := true
-		for _, values := range columnValues {
-			if i == 0 || values[i-1] != values[i] {
-				seenKey = false
-			}
-		}
-		if seenKey {
-			continue
-		}
-
-		allEmptyValues := true
-		keyBuf = keyBuf[:0]
-		for _, values := range columnValues {
-			v := values[i]
-			if v != "" {
-				allEmptyValues = false
-			}
-			keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
-		}
-		if allEmptyValues {
-			// Do not count empty values
-			continue
-		}
-		if _, ok := m[string(keyBuf)]; !ok {
-			m[string(keyBuf)] = struct{}{}
-			stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
-		}
-	}
-	sfup.keyBuf = keyBuf
-	return stateSizeIncrease
-}
-
-func (sfup *statsFuncUniqProcessor) updateStatsForRow(timestamps []int64, columns []BlockColumn, rowIdx int) int {
-	fields := sfup.sfu.fields
-	m := sfup.m
-
-	stateSizeIncrease := 0
-	if len(fields) == 0 || sfup.sfu.containsStar {
-		// Count unique rows
-		allEmptyValues := true
-		keyBuf := sfup.keyBuf[:0]
-		for _, c := range columns {
-			v := c.Values[rowIdx]
-			if v != "" {
-				allEmptyValues = false
-			}
-			// Put column name into key, since every block can contain different set of columns for '*' selector.
-			keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.Name))
-			keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
-		}
-		sfup.keyBuf = keyBuf
-
-		if allEmptyValues {
-			// Do not count empty values
-			return stateSizeIncrease
-		}
-		if _, ok := m[string(keyBuf)]; !ok {
-			m[string(keyBuf)] = struct{}{}
-			stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
-		}
-		return stateSizeIncrease
-	}
-	if len(fields) == 1 {
-		// Fast path for a single column
-		if idx := getBlockColumnIndex(columns, fields[0]); idx >= 0 {
-			v := columns[idx].Values[rowIdx]
-			if v == "" {
-				// Do not count empty values
-				return stateSizeIncrease
-			}
-			if _, ok := m[v]; !ok {
-				vCopy := strings.Clone(v)
-				m[vCopy] = struct{}{}
-				stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
-			}
-		}
-		return stateSizeIncrease
-	}
-
-	// Slow path for multiple columns.
-	allEmptyValues := true
-	keyBuf := sfup.keyBuf[:0]
-	for _, f := range fields {
-		v := ""
-		if idx := getBlockColumnIndex(columns, f); idx >= 0 {
-			v = columns[idx].Values[rowIdx]
-		}
-		if v != "" {
-			allEmptyValues = false
-		}
-		keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
-	}
-	sfup.keyBuf = keyBuf
-
-	if allEmptyValues {
-		// Do not count empty values
-		return stateSizeIncrease
-	}
-	if _, ok := m[string(keyBuf)]; !ok {
-		m[string(keyBuf)] = struct{}{}
-		stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
-	}
-	return stateSizeIncrease
-}
-
-func (sfup *statsFuncUniqProcessor) mergeState(sfp statsFuncProcessor) {
-	src := sfp.(*statsFuncUniqProcessor)
-	m := sfup.m
-	for k := range src.m {
-		m[k] = struct{}{}
-	}
-}
-
-func (sfup *statsFuncUniqProcessor) finalizeStats() (string, string) {
-	n := uint64(len(sfup.m))
-	value := strconv.FormatUint(n, 10)
-	return sfup.sfu.resultName, value
-}
-
-func parseStatsFuncUniq(lex *lexer) (*statsFuncUniq, error) {
-	lex.nextToken()
-	fields, err := parseFieldNamesInParens(lex)
-	if err != nil {
-		return nil, fmt.Errorf("cannot parse 'uniq' args: %w", err)
-	}
-	if len(fields) == 0 {
-		return nil, fmt.Errorf("'uniq' must contain at least a single arg")
-	}
-	resultName, err := parseResultName(lex)
-	if err != nil {
-		return nil, fmt.Errorf("cannot parse result name: %w", err)
-	}
-	sfu := &statsFuncUniq{
-		fields:       fields,
-		containsStar: slices.Contains(fields, "*"),
-		resultName:   resultName,
-	}
-	return sfu, nil
-}
-
 func parseResultName(lex *lexer) (string, error) {
 	if lex.isKeyword("as") {
 		if !lex.mustNextToken() {
diff --git a/lib/logstorage/stats_count.go b/lib/logstorage/stats_count.go
new file mode 100644
index 000000000..22406b24f
--- /dev/null
+++ b/lib/logstorage/stats_count.go
@@ -0,0 +1,114 @@
+package logstorage
+
+import (
+	"fmt"
+	"slices"
+	"strconv"
+	"unsafe"
+)
+
+type statsCount struct {
+	fields       []string
+	containsStar bool
+
+	resultName string
+}
+
+func (sc *statsCount) String() string {
+	return "count(" + fieldNamesString(sc.fields) + ") as " + quoteTokenIfNeeded(sc.resultName)
+}
+
+func (sc *statsCount) neededFields() []string {
+	return getFieldsIgnoreStar(sc.fields)
+}
+
+func (sc *statsCount) newStatsFuncProcessor() (statsFuncProcessor, int) {
+	scp := &statsCountProcessor{
+		sc: sc,
+	}
+	return scp, int(unsafe.Sizeof(*scp))
+}
+
+type statsCountProcessor struct {
+	sc *statsCount
+
+	rowsCount uint64
+}
+
+func (scp *statsCountProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) int {
+	fields := scp.sc.fields
+	if len(fields) == 0 || scp.sc.containsStar {
+		// Fast path - count all the columns.
+		scp.rowsCount += uint64(len(timestamps))
+		return 0
+	}
+
+	// Slow path - count rows containing at least a single non-empty value for the fields enumerated inside count().
+	bm := getFilterBitmap(len(timestamps))
+	defer putFilterBitmap(bm)
+
+	bm.setBits()
+	for _, f := range fields {
+		if idx := getBlockColumnIndex(columns, f); idx >= 0 {
+			values := columns[idx].Values
+			bm.forEachSetBit(func(i int) bool {
+				return values[i] == ""
+			})
+		}
+	}
+
+	emptyValues := 0
+	bm.forEachSetBit(func(i int) bool {
+		emptyValues++
+		return true
+	})
+
+	scp.rowsCount += uint64(len(timestamps) - emptyValues)
+	return 0
+}
+
+func (scp *statsCountProcessor) updateStatsForRow(_ []int64, columns []BlockColumn, rowIdx int) int {
+	fields := scp.sc.fields
+	if len(fields) == 0 || scp.sc.containsStar {
+		// Fast path - count the given column
+		scp.rowsCount++
+		return 0
+	}
+
+	// Slow path - count the row at rowIdx if at least a single field enumerated inside count() is non-empty
+	for _, f := range fields {
+		if idx := getBlockColumnIndex(columns, f); idx >= 0 && columns[idx].Values[rowIdx] != "" {
+			scp.rowsCount++
+			return 0
+		}
+	}
+	return 0
+}
+
+func (scp *statsCountProcessor) mergeState(sfp statsFuncProcessor) {
+	src := sfp.(*statsCountProcessor)
+	scp.rowsCount += src.rowsCount
+}
+
+func (scp *statsCountProcessor) finalizeStats() (string, string) {
+	value := strconv.FormatUint(scp.rowsCount, 10)
+	return scp.sc.resultName, value
+}
+
+func parseStatsCount(lex *lexer) (*statsCount, error) {
+	lex.nextToken()
+	fields, err := parseFieldNamesInParens(lex)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse 'count' args: %w", err)
+	}
+	resultName, err := parseResultName(lex)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse result name: %w", err)
+	}
+	sc := &statsCount{
+		fields:       fields,
+		containsStar: slices.Contains(fields, "*"),
+		resultName:   resultName,
+	}
+	return sc, nil
+}
diff --git a/lib/logstorage/stats_sum.go b/lib/logstorage/stats_sum.go
new file mode 100644
index 000000000..fd6a73c82
--- /dev/null
+++ b/lib/logstorage/stats_sum.go
@@ -0,0 +1,127 @@
+package logstorage
+
+import (
+	"fmt"
+	"math"
+	"slices"
+	"strconv"
+	"unsafe"
+)
+
+type statsSum struct {
+	fields       []string
+	containsStar bool
+	resultName   string
+}
+
+func (ss *statsSum) String() string {
+	return "sum(" + fieldNamesString(ss.fields) + ") as " + quoteTokenIfNeeded(ss.resultName)
+}
+
+func (ss *statsSum) neededFields() []string {
+	return ss.fields
+}
+
+func (ss *statsSum) newStatsFuncProcessor() (statsFuncProcessor, int) {
+	ssp := &statsSumProcessor{
+		ss: ss,
+	}
+	return ssp, int(unsafe.Sizeof(*ssp))
+}
+
+type statsSumProcessor struct {
+	ss *statsSum
+
+	sum float64
+}
+
+func (ssp *statsSumProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) int {
+	if ssp.ss.containsStar {
+		// Sum all the columns
+		for _, c := range columns {
+			ssp.sum += sumValues(c.Values)
+		}
+		return 0
+	}
+
+	// Sum the requested columns
+	for _, field := range ssp.ss.fields {
+		if idx := getBlockColumnIndex(columns, field); idx >= 0 {
+			ssp.sum += sumValues(columns[idx].Values)
+		}
+	}
+	return 0
+}
+
+func sumValues(values []string) float64 {
+	sum := float64(0)
+	f := float64(0)
+	for i, v := range values {
+		if i == 0 || values[i-1] != v {
+			f, _ = tryParseFloat64(v)
+			if math.IsNaN(f) {
+				// Ignore NaN values, since this is the expected behaviour by most users.
+				f = 0
+			}
+		}
+		sum += f
+	}
+	return sum
+}
+
+func (ssp *statsSumProcessor) updateStatsForRow(_ []int64, columns []BlockColumn, rowIdx int) int {
+	if ssp.ss.containsStar {
+		// Sum all the fields for the given row
+		for _, c := range columns {
+			v := c.Values[rowIdx]
+			f, _ := tryParseFloat64(v)
+			if !math.IsNaN(f) {
+				ssp.sum += f
+			}
+		}
+		return 0
+	}
+
+	// Sum only the given fields for the given row
+	for _, field := range ssp.ss.fields {
+		if idx := getBlockColumnIndex(columns, field); idx >= 0 {
+			v := columns[idx].Values[rowIdx]
+			f, _ := tryParseFloat64(v)
+			if !math.IsNaN(f) {
+				ssp.sum += f
+			}
+		}
+	}
+	return 0
+}
+
+func (ssp *statsSumProcessor) mergeState(sfp statsFuncProcessor) {
+	src := sfp.(*statsSumProcessor)
+	ssp.sum += src.sum
+}
+
+func (ssp *statsSumProcessor) finalizeStats() (string, string) {
+	value := strconv.FormatFloat(ssp.sum, 'g', -1, 64)
+	return ssp.ss.resultName, value
+}
+
+func parseStatsSum(lex *lexer) (*statsSum, error) {
+	lex.nextToken()
+	fields, err := parseFieldNamesInParens(lex)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse 'sum' args: %w", err)
+	}
+	if len(fields) == 0 {
+		return nil, fmt.Errorf("'sum' must contain at least one arg")
+	}
+	resultName, err := parseResultName(lex)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse result name: %w", err)
+	}
+	ss := &statsSum{
+		fields:       fields,
+		containsStar: slices.Contains(fields, "*"),
+		resultName:   resultName,
+	}
+	return ss, nil
+}
diff --git a/lib/logstorage/stats_unique.go b/lib/logstorage/stats_unique.go
new file mode 100644
index 000000000..3fa571f60
--- /dev/null
+++ b/lib/logstorage/stats_unique.go
@@ -0,0 +1,255 @@
+package logstorage
+
+import (
+	"fmt"
+	"slices"
+	"strconv"
+	"strings"
+	"unsafe"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+)
+
+type statsUniq struct {
+	fields       []string
+	containsStar bool
+	resultName   string
+}
+
+func (su *statsUniq) String() string {
+	return "uniq(" + fieldNamesString(su.fields) + ") as " + quoteTokenIfNeeded(su.resultName)
+}
+
+func (su *statsUniq) neededFields() []string {
+	return su.fields
+}
+
+func (su *statsUniq) newStatsFuncProcessor() (statsFuncProcessor, int) {
+	sup := &statsUniqProcessor{
+		su: su,
+
+		m: make(map[string]struct{}),
+	}
+	return sup, int(unsafe.Sizeof(*sup))
+}
+
+type statsUniqProcessor struct {
+	su *statsUniq
+
+	m map[string]struct{}
+
+	columnValues [][]string
+	keyBuf       []byte
+}
+
+func (sup *statsUniqProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) int {
+	fields := sup.su.fields
+	m := sup.m
+
+	stateSizeIncrease := 0
+	if len(fields) == 0 || sup.su.containsStar {
+		// Count unique rows
+		keyBuf := sup.keyBuf
+		for i := range timestamps {
+			seenKey := true
+			for _, c := range columns {
+				values := c.Values
+				if i == 0 || values[i-1] != values[i] {
+					seenKey = false
+					break
+				}
+			}
+			if seenKey {
+				continue
+			}
+
+			allEmptyValues := true
+			keyBuf = keyBuf[:0]
+			for _, c := range columns {
+				v := c.Values[i]
+				if v != "" {
+					allEmptyValues = false
+				}
+				// Put column name into key, since every block can contain different set of columns for '*' selector.
+				keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.Name))
+				keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
+			}
+			if allEmptyValues {
+				// Do not count empty values
+				continue
+			}
+			if _, ok := m[string(keyBuf)]; !ok {
+				m[string(keyBuf)] = struct{}{}
+				stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
+			}
+		}
+		sup.keyBuf = keyBuf
+		return stateSizeIncrease
+	}
+	if len(fields) == 1 {
+		// Fast path for a single column
+		if idx := getBlockColumnIndex(columns, fields[0]); idx >= 0 {
+			values := columns[idx].Values
+			for i, v := range values {
+				if v == "" {
+					// Do not count empty values
+					continue
+				}
+				if i > 0 && values[i-1] == v {
+					continue
+				}
+				if _, ok := m[v]; !ok {
+					vCopy := strings.Clone(v)
+					m[vCopy] = struct{}{}
+					stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
+				}
+			}
+		}
+		return stateSizeIncrease
+	}
+
+	// Slow path for multiple columns.
+
+	// Pre-calculate column values for byFields in order to speed up building group key in the loop below.
+	sup.columnValues = appendBlockColumnValues(sup.columnValues[:0], columns, fields, len(timestamps))
+	columnValues := sup.columnValues
+
+	keyBuf := sup.keyBuf
+	for i := range timestamps {
+		seenKey := true
+		for _, values := range columnValues {
+			if i == 0 || values[i-1] != values[i] {
+				seenKey = false
+			}
+		}
+		if seenKey {
+			continue
+		}
+
+		allEmptyValues := true
+		keyBuf = keyBuf[:0]
+		for _, values := range columnValues {
+			v := values[i]
+			if v != "" {
+				allEmptyValues = false
+			}
+			keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
+		}
+		if allEmptyValues {
+			// Do not count empty values
+			continue
+		}
+		if _, ok := m[string(keyBuf)]; !ok {
+			m[string(keyBuf)] = struct{}{}
+			stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
+		}
+	}
+	sup.keyBuf = keyBuf
+	return stateSizeIncrease
+}
+
+func (sup *statsUniqProcessor) updateStatsForRow(timestamps []int64, columns []BlockColumn, rowIdx int) int {
+	fields := sup.su.fields
+	m := sup.m
+
+	stateSizeIncrease := 0
+	if len(fields) == 0 || sup.su.containsStar {
+		// Count unique rows
+		allEmptyValues := true
+		keyBuf := sup.keyBuf[:0]
+		for _, c := range columns {
+			v := c.Values[rowIdx]
+			if v != "" {
+				allEmptyValues = false
+			}
+			// Put column name into key, since every block can contain different set of columns for '*' selector.
+			keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.Name))
+			keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
+		}
+		sup.keyBuf = keyBuf
+
+		if allEmptyValues {
+			// Do not count empty values
+			return stateSizeIncrease
+		}
+		if _, ok := m[string(keyBuf)]; !ok {
+			m[string(keyBuf)] = struct{}{}
+			stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
+		}
+		return stateSizeIncrease
+	}
+	if len(fields) == 1 {
+		// Fast path for a single column
+		if idx := getBlockColumnIndex(columns, fields[0]); idx >= 0 {
+			v := columns[idx].Values[rowIdx]
+			if v == "" {
+				// Do not count empty values
+				return stateSizeIncrease
+			}
+			if _, ok := m[v]; !ok {
+				vCopy := strings.Clone(v)
+				m[vCopy] = struct{}{}
+				stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
+			}
+		}
+		return stateSizeIncrease
+	}
+
+	// Slow path for multiple columns.
+	allEmptyValues := true
+	keyBuf := sup.keyBuf[:0]
+	for _, f := range fields {
+		v := ""
+		if idx := getBlockColumnIndex(columns, f); idx >= 0 {
+			v = columns[idx].Values[rowIdx]
+		}
+		if v != "" {
+			allEmptyValues = false
+		}
+		keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
+	}
+	sup.keyBuf = keyBuf
+
+	if allEmptyValues {
+		// Do not count empty values
+		return stateSizeIncrease
+	}
+	if _, ok := m[string(keyBuf)]; !ok {
+		m[string(keyBuf)] = struct{}{}
+		stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
+	}
+	return stateSizeIncrease
+}
+
+func (sup *statsUniqProcessor) mergeState(sfp statsFuncProcessor) {
+	src := sfp.(*statsUniqProcessor)
+	m := sup.m
+	for k := range src.m {
+		m[k] = struct{}{}
+	}
+}
+
+func (sup *statsUniqProcessor) finalizeStats() (string, string) {
+	n := uint64(len(sup.m))
+	value := strconv.FormatUint(n, 10)
+	return sup.su.resultName, value
+}
+
+func parseStatsUniq(lex *lexer) (*statsUniq, error) {
+	lex.nextToken()
+	fields, err := parseFieldNamesInParens(lex)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse 'uniq' args: %w", err)
+	}
+	resultName, err := parseResultName(lex)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse result name: %w", err)
+	}
+	su := &statsUniq{
+		fields:       fields,
+		containsStar: slices.Contains(fields, "*"),
+		resultName:   resultName,
+	}
+	return su, nil
+}