VictoriaMetrics/lib/logstorage/stats_count.go

227 lines
5.3 KiB
Go
Raw Normal View History

2024-04-29 01:20:43 +00:00
package logstorage
import (
"fmt"
"slices"
"strconv"
"unsafe"
2024-04-30 21:03:34 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
2024-04-29 01:20:43 +00:00
)
type statsCount struct {
fields []string
containsStar bool
resultName string
}
func (sc *statsCount) String() string {
return "count(" + fieldNamesString(sc.fields) + ") as " + quoteTokenIfNeeded(sc.resultName)
}
func (sc *statsCount) neededFields() []string {
return getFieldsIgnoreStar(sc.fields)
}
2024-04-29 01:23:41 +00:00
func (sc *statsCount) newStatsProcessor() (statsProcessor, int) {
2024-04-29 01:20:43 +00:00
scp := &statsCountProcessor{
sc: sc,
}
return scp, int(unsafe.Sizeof(*scp))
}
type statsCountProcessor struct {
sc *statsCount
rowsCount uint64
}
2024-04-30 21:03:34 +00:00
func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
2024-04-29 01:20:43 +00:00
fields := scp.sc.fields
if len(fields) == 0 || scp.sc.containsStar {
2024-04-30 21:03:34 +00:00
// Fast path - unconditionally count all the columns.
scp.rowsCount += uint64(len(br.timestamps))
2024-04-29 01:20:43 +00:00
return 0
}
2024-04-30 21:03:34 +00:00
if len(fields) == 1 {
// Fast path for count(single_column)
c := br.getColumnByName(fields[0])
if c.isConst {
if c.encodedValues[0] != "" {
scp.rowsCount += uint64(len(br.timestamps))
}
return 0
}
if c.isTime {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
switch c.valueType {
case valueTypeString:
for _, v := range c.encodedValues {
if v != "" {
scp.rowsCount++
}
}
return 0
case valueTypeDict:
zeroDictIdx := slices.Index(c.dictValues, "")
if zeroDictIdx < 0 {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
for _, v := range c.encodedValues {
if int(v[0]) != zeroDictIdx {
scp.rowsCount++
}
}
return 0
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
scp.rowsCount += uint64(len(br.timestamps))
return 0
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
return 0
}
}
2024-04-29 01:20:43 +00:00
// Slow path - count rows containing at least a single non-empty value for the fields enumerated inside count().
2024-04-30 21:03:34 +00:00
bm := getBitmap(len(br.timestamps))
2024-04-29 01:47:25 +00:00
defer putBitmap(bm)
2024-04-29 01:20:43 +00:00
bm.setBits()
for _, f := range fields {
2024-04-30 21:03:34 +00:00
c := br.getColumnByName(f)
if c.isConst {
if c.encodedValues[0] != "" {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
continue
}
if c.isTime {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
switch c.valueType {
case valueTypeString:
bm.forEachSetBit(func(i int) bool {
return c.encodedValues[i] == ""
})
case valueTypeDict:
if !slices.Contains(c.dictValues, "") {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
2024-04-29 01:20:43 +00:00
bm.forEachSetBit(func(i int) bool {
2024-04-30 21:03:34 +00:00
dictIdx := c.encodedValues[i][0]
return c.dictValues[dictIdx] == ""
2024-04-29 01:20:43 +00:00
})
2024-04-30 21:03:34 +00:00
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
scp.rowsCount += uint64(len(br.timestamps))
return 0
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
return 0
2024-04-29 01:20:43 +00:00
}
}
2024-04-30 21:03:34 +00:00
scp.rowsCount += uint64(len(br.timestamps))
2024-04-29 01:20:43 +00:00
bm.forEachSetBit(func(i int) bool {
2024-04-30 21:03:34 +00:00
scp.rowsCount--
2024-04-29 01:20:43 +00:00
return true
})
return 0
}
2024-04-30 21:03:34 +00:00
func (scp *statsCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
2024-04-29 01:20:43 +00:00
fields := scp.sc.fields
if len(fields) == 0 || scp.sc.containsStar {
2024-04-30 21:03:34 +00:00
// Fast path - unconditionally count the given column
2024-04-29 01:20:43 +00:00
scp.rowsCount++
return 0
}
2024-04-30 21:03:34 +00:00
if len(fields) == 1 {
// Fast path for count(single_column)
c := br.getColumnByName(fields[0])
if c.isConst {
if c.encodedValues[0] != "" {
scp.rowsCount++
}
return 0
}
if c.isTime {
scp.rowsCount++
return 0
}
switch c.valueType {
case valueTypeString:
if v := c.encodedValues[rowIdx]; v != "" {
scp.rowsCount++
}
return 0
case valueTypeDict:
dictIdx := c.encodedValues[rowIdx][0]
if v := c.dictValues[dictIdx]; v != "" {
scp.rowsCount++
}
return 0
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
scp.rowsCount++
return 0
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
return 0
}
}
2024-04-29 01:20:43 +00:00
// Slow path - count the row at rowIdx if at least a single field enumerated inside count() is non-empty
for _, f := range fields {
2024-04-30 21:03:34 +00:00
c := br.getColumnByName(f)
if v := c.getValueAtRow(br, rowIdx); v != "" {
2024-04-29 01:20:43 +00:00
scp.rowsCount++
return 0
}
}
return 0
}
2024-04-29 01:23:41 +00:00
func (scp *statsCountProcessor) mergeState(sfp statsProcessor) {
2024-04-29 01:20:43 +00:00
src := sfp.(*statsCountProcessor)
scp.rowsCount += src.rowsCount
}
func (scp *statsCountProcessor) finalizeStats() (string, string) {
value := strconv.FormatUint(scp.rowsCount, 10)
return scp.sc.resultName, value
}
func parseStatsCount(lex *lexer) (*statsCount, error) {
lex.nextToken()
fields, err := parseFieldNamesInParens(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'count' args: %w", err)
}
resultName, err := parseResultName(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse result name: %w", err)
}
sc := &statsCount{
fields: fields,
containsStar: slices.Contains(fields, "*"),
resultName: resultName,
}
return sc, nil
}
2024-04-29 01:44:54 +00:00
func getFieldsIgnoreStar(fields []string) []string {
var result []string
for _, f := range fields {
if f != "*" {
result = append(result, f)
}
}
return result
}