This commit is contained in:
Aliaksandr Valialkin 2024-04-28 23:19:40 +02:00
parent 0850e13eb3
commit 24f07dfdc3
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB

View file

@ -298,7 +298,7 @@ type statsPipeProcessorShardNopad struct {
m map[string]*statsPipeGroup m map[string]*statsPipeGroup
funcs []statsFunc funcs []statsFunc
columnIdxs []int columnValues [][]string
keyBuf []byte keyBuf []byte
stateSizeBudget int stateSizeBudget int
@ -343,8 +343,9 @@ func (spp *statsPipeProcessor) writeBlock(workerID uint, timestamps []int64, col
shard.stateSizeBudget += stateSizeBudgetChunk shard.stateSizeBudget += stateSizeBudgetChunk
} }
if len(spp.sp.byFields) == 0 { byFields := spp.sp.byFields
// Fast path - pass all the rows to a single group if len(byFields) == 0 || len(byFields) == 1 && getBlockColumnIndex(columns, byFields[0]) < 0 {
// Fast path - pass all the rows to a single group with empty key.
spg := shard.getStatsPipeGroup(nil) spg := shard.getStatsPipeGroup(nil)
for _, sfp := range spg.sfps { for _, sfp := range spg.sfps {
shard.stateSizeBudget -= sfp.updateStatsForAllRows(timestamps, columns) shard.stateSizeBudget -= sfp.updateStatsForAllRows(timestamps, columns)
@ -354,20 +355,43 @@ func (spp *statsPipeProcessor) writeBlock(workerID uint, timestamps []int64, col
// Slow path - update per-row stats // Slow path - update per-row stats
// Pre-calculate column indexes for byFields in order to speed up building group key in the loop below. if len(byFields) == 1 {
shard.columnIdxs = appendBlockColumnIndexes(shard.columnIdxs[:0], columns, spp.sp.byFields) // Special case for grouping by a single column.
columnIdxs := shard.columnIdxs idx := getBlockColumnIndex(columns, byFields[0])
if idx < 0 {
logger.Panicf("BUG: columnIdx must be positive")
}
values := columns[idx].Values
keyBuf := shard.keyBuf
var spg *statsPipeGroup var spg *statsPipeGroup
keyBuf := shard.keyBuf
for i := range timestamps {
if i <= 0 || values[i-1] != values[i] {
keyBuf = encoding.MarshalBytes(keyBuf[:0], bytesutil.ToUnsafeBytes(values[i]))
spg = shard.getStatsPipeGroup(keyBuf)
}
for _, sfp := range spg.sfps {
shard.stateSizeBudget -= sfp.updateStatsForRow(timestamps, columns, i)
}
}
shard.keyBuf = keyBuf
return
}
// The slowest path - group by multiple columns.
// Pre-calculate column values for byFields in order to speed up building group key in the loop below.
shard.columnValues = appendBlockColumnValues(shard.columnValues[:0], columns, spp.sp.byFields)
columnValues := shard.columnValues
var spg *statsPipeGroup
keyBuf := shard.keyBuf
for i := range timestamps { for i := range timestamps {
// verify whether the key for 'by (...)' fields equals the previous key // verify whether the key for 'by (...)' fields equals the previous key
sameValue := spg != nil sameValue := spg != nil
for _, idx := range columnIdxs { for _, values := range columnValues {
if idx < 0 { if values == nil {
continue continue
} }
values := columns[idx].Values
if i <= 0 || values[i-1] != values[i] { if i <= 0 || values[i-1] != values[i] {
sameValue = false sameValue = false
break break
@ -376,10 +400,10 @@ func (spp *statsPipeProcessor) writeBlock(workerID uint, timestamps []int64, col
if !sameValue { if !sameValue {
// Construct new key for the 'by (...)' fields // Construct new key for the 'by (...)' fields
keyBuf = keyBuf[:0] keyBuf = keyBuf[:0]
for _, idx := range columnIdxs { for _, values := range columnValues {
v := "" v := ""
if idx >= 0 { if values != nil {
v = columns[idx].Values[i] v = values[i]
} }
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v)) keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
} }
@ -667,7 +691,7 @@ type statsFuncUniqProcessor struct {
m map[string]struct{} m map[string]struct{}
columnIdxs []int columnValues [][]string
keyBuf []byte keyBuf []byte
} }
@ -696,18 +720,18 @@ func (sfup *statsFuncUniqProcessor) updateStatsForAllRows(timestamps []int64, co
// Slow path for multiple columns. // Slow path for multiple columns.
// Pre-calculate column indexes for byFields in order to speed up building group key in the loop below. // Pre-calculate column values for byFields in order to speed up building group key in the loop below.
sfup.columnIdxs = appendBlockColumnIndexes(sfup.columnIdxs[:0], columns, fields) sfup.columnValues = appendBlockColumnValues(sfup.columnValues[:0], columns, fields)
columnIdxs := sfup.columnIdxs columnValues := sfup.columnValues
keyBuf := sfup.keyBuf keyBuf := sfup.keyBuf
for i := range timestamps { for i := range timestamps {
allEmptyValues := true allEmptyValues := true
keyBuf = keyBuf[:0] keyBuf = keyBuf[:0]
for _, idx := range columnIdxs { for _, values := range columnValues {
v := "" v := ""
if idx >= 0 { if values != nil {
v = columns[idx].Values[i] v = values[i]
} }
if v != "" { if v != "" {
allEmptyValues = false allEmptyValues = false
@ -1042,10 +1066,14 @@ func getFieldsIgnoreStar(fields []string) []string {
return result return result
} }
func appendBlockColumnIndexes(dst []int, columns []BlockColumn, fields []string) []int { func appendBlockColumnValues(dst [][]string, columns []BlockColumn, fields []string) [][]string {
for _, f := range fields { for _, f := range fields {
idx := getBlockColumnIndex(columns, f) idx := getBlockColumnIndex(columns, f)
dst = append(dst, idx) var values []string
if idx >= 0 {
values = columns[idx].Values
}
dst = append(dst, values)
} }
return dst return dst
} }