2024-04-29 01:20:43 +00:00
|
|
|
package logstorage
|
|
|
|
|
|
|
|
import (
|
|
|
|
"slices"
|
|
|
|
"strconv"
|
|
|
|
"unsafe"
|
2024-04-30 21:03:34 +00:00
|
|
|
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
2024-04-29 01:20:43 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type statsCount struct {
|
|
|
|
fields []string
|
|
|
|
containsStar bool
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sc *statsCount) String() string {
|
2024-04-30 23:19:22 +00:00
|
|
|
return "count(" + fieldNamesString(sc.fields) + ")"
|
2024-04-29 01:20:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (sc *statsCount) neededFields() []string {
|
|
|
|
return getFieldsIgnoreStar(sc.fields)
|
|
|
|
}
|
|
|
|
|
2024-04-29 01:23:41 +00:00
|
|
|
func (sc *statsCount) newStatsProcessor() (statsProcessor, int) {
|
2024-04-29 01:20:43 +00:00
|
|
|
scp := &statsCountProcessor{
|
|
|
|
sc: sc,
|
|
|
|
}
|
|
|
|
return scp, int(unsafe.Sizeof(*scp))
|
|
|
|
}
|
|
|
|
|
|
|
|
type statsCountProcessor struct {
|
|
|
|
sc *statsCount
|
|
|
|
|
|
|
|
rowsCount uint64
|
|
|
|
}
|
|
|
|
|
2024-04-30 21:03:34 +00:00
|
|
|
func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
|
2024-04-29 01:20:43 +00:00
|
|
|
fields := scp.sc.fields
|
2024-05-03 12:03:17 +00:00
|
|
|
if scp.sc.containsStar {
|
2024-04-30 21:03:34 +00:00
|
|
|
// Fast path - unconditionally count all the columns.
|
|
|
|
scp.rowsCount += uint64(len(br.timestamps))
|
2024-04-29 01:20:43 +00:00
|
|
|
return 0
|
|
|
|
}
|
2024-04-30 21:03:34 +00:00
|
|
|
if len(fields) == 1 {
|
|
|
|
// Fast path for count(single_column)
|
|
|
|
c := br.getColumnByName(fields[0])
|
|
|
|
if c.isConst {
|
|
|
|
if c.encodedValues[0] != "" {
|
|
|
|
scp.rowsCount += uint64(len(br.timestamps))
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
if c.isTime {
|
|
|
|
scp.rowsCount += uint64(len(br.timestamps))
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
switch c.valueType {
|
|
|
|
case valueTypeString:
|
|
|
|
for _, v := range c.encodedValues {
|
|
|
|
if v != "" {
|
|
|
|
scp.rowsCount++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
case valueTypeDict:
|
|
|
|
zeroDictIdx := slices.Index(c.dictValues, "")
|
|
|
|
if zeroDictIdx < 0 {
|
|
|
|
scp.rowsCount += uint64(len(br.timestamps))
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
for _, v := range c.encodedValues {
|
|
|
|
if int(v[0]) != zeroDictIdx {
|
|
|
|
scp.rowsCount++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
|
|
|
scp.rowsCount += uint64(len(br.timestamps))
|
|
|
|
return 0
|
|
|
|
default:
|
|
|
|
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
}
|
2024-04-29 01:20:43 +00:00
|
|
|
|
|
|
|
// Slow path - count rows containing at least a single non-empty value for the fields enumerated inside count().
|
2024-04-30 21:03:34 +00:00
|
|
|
bm := getBitmap(len(br.timestamps))
|
2024-04-29 01:47:25 +00:00
|
|
|
defer putBitmap(bm)
|
2024-04-29 01:20:43 +00:00
|
|
|
|
|
|
|
bm.setBits()
|
|
|
|
for _, f := range fields {
|
2024-04-30 21:03:34 +00:00
|
|
|
c := br.getColumnByName(f)
|
|
|
|
if c.isConst {
|
|
|
|
if c.encodedValues[0] != "" {
|
|
|
|
scp.rowsCount += uint64(len(br.timestamps))
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if c.isTime {
|
|
|
|
scp.rowsCount += uint64(len(br.timestamps))
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
switch c.valueType {
|
|
|
|
case valueTypeString:
|
|
|
|
bm.forEachSetBit(func(i int) bool {
|
|
|
|
return c.encodedValues[i] == ""
|
|
|
|
})
|
|
|
|
case valueTypeDict:
|
|
|
|
if !slices.Contains(c.dictValues, "") {
|
|
|
|
scp.rowsCount += uint64(len(br.timestamps))
|
|
|
|
return 0
|
|
|
|
}
|
2024-04-29 01:20:43 +00:00
|
|
|
bm.forEachSetBit(func(i int) bool {
|
2024-04-30 21:03:34 +00:00
|
|
|
dictIdx := c.encodedValues[i][0]
|
|
|
|
return c.dictValues[dictIdx] == ""
|
2024-04-29 01:20:43 +00:00
|
|
|
})
|
2024-04-30 21:03:34 +00:00
|
|
|
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
|
|
|
scp.rowsCount += uint64(len(br.timestamps))
|
|
|
|
return 0
|
|
|
|
default:
|
|
|
|
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
|
|
|
|
return 0
|
2024-04-29 01:20:43 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-30 21:03:34 +00:00
|
|
|
scp.rowsCount += uint64(len(br.timestamps))
|
2024-04-29 01:20:43 +00:00
|
|
|
bm.forEachSetBit(func(i int) bool {
|
2024-04-30 21:03:34 +00:00
|
|
|
scp.rowsCount--
|
2024-04-29 01:20:43 +00:00
|
|
|
return true
|
|
|
|
})
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
2024-04-30 21:03:34 +00:00
|
|
|
func (scp *statsCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
|
2024-04-29 01:20:43 +00:00
|
|
|
fields := scp.sc.fields
|
2024-05-03 12:03:17 +00:00
|
|
|
if scp.sc.containsStar {
|
2024-04-30 21:03:34 +00:00
|
|
|
// Fast path - unconditionally count the given column
|
2024-04-29 01:20:43 +00:00
|
|
|
scp.rowsCount++
|
|
|
|
return 0
|
|
|
|
}
|
2024-04-30 21:03:34 +00:00
|
|
|
if len(fields) == 1 {
|
|
|
|
// Fast path for count(single_column)
|
|
|
|
c := br.getColumnByName(fields[0])
|
|
|
|
if c.isConst {
|
|
|
|
if c.encodedValues[0] != "" {
|
|
|
|
scp.rowsCount++
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
if c.isTime {
|
|
|
|
scp.rowsCount++
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
switch c.valueType {
|
|
|
|
case valueTypeString:
|
|
|
|
if v := c.encodedValues[rowIdx]; v != "" {
|
|
|
|
scp.rowsCount++
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
case valueTypeDict:
|
|
|
|
dictIdx := c.encodedValues[rowIdx][0]
|
|
|
|
if v := c.dictValues[dictIdx]; v != "" {
|
|
|
|
scp.rowsCount++
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
|
|
|
scp.rowsCount++
|
|
|
|
return 0
|
|
|
|
default:
|
|
|
|
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
}
|
2024-04-29 01:20:43 +00:00
|
|
|
|
|
|
|
// Slow path - count the row at rowIdx if at least a single field enumerated inside count() is non-empty
|
|
|
|
for _, f := range fields {
|
2024-04-30 21:03:34 +00:00
|
|
|
c := br.getColumnByName(f)
|
|
|
|
if v := c.getValueAtRow(br, rowIdx); v != "" {
|
2024-04-29 01:20:43 +00:00
|
|
|
scp.rowsCount++
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
2024-04-29 01:23:41 +00:00
|
|
|
func (scp *statsCountProcessor) mergeState(sfp statsProcessor) {
|
2024-04-29 01:20:43 +00:00
|
|
|
src := sfp.(*statsCountProcessor)
|
|
|
|
scp.rowsCount += src.rowsCount
|
|
|
|
}
|
|
|
|
|
2024-04-30 23:19:22 +00:00
|
|
|
func (scp *statsCountProcessor) finalizeStats() string {
|
|
|
|
return strconv.FormatUint(scp.rowsCount, 10)
|
2024-04-29 01:20:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func parseStatsCount(lex *lexer) (*statsCount, error) {
|
2024-05-03 12:03:17 +00:00
|
|
|
fields, err := parseFieldNamesForStatsFunc(lex, "count")
|
2024-04-29 01:20:43 +00:00
|
|
|
if err != nil {
|
2024-05-03 09:15:09 +00:00
|
|
|
return nil, err
|
2024-04-29 01:20:43 +00:00
|
|
|
}
|
|
|
|
sc := &statsCount{
|
|
|
|
fields: fields,
|
|
|
|
containsStar: slices.Contains(fields, "*"),
|
|
|
|
}
|
|
|
|
return sc, nil
|
|
|
|
}
|
2024-04-29 01:44:54 +00:00
|
|
|
|
|
|
|
func getFieldsIgnoreStar(fields []string) []string {
|
|
|
|
var result []string
|
|
|
|
for _, f := range fields {
|
|
|
|
if f != "*" {
|
|
|
|
result = append(result, f)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|