wip

2025-01-30 15:22:07 +00:00 · 2024-04-28 23:19:40 +02:00 · 2024-04-28 23:19:40 +02:00 · 24f07dfdc3
commit 24f07dfdc3
parent 0850e13eb3
1 changed files with 52 additions and 24 deletions
--- a/lib/logstorage/pipes.go
+++ b/lib/logstorage/pipes.go
@ -298,8 +298,8 @@ type statsPipeProcessorShardNopad struct {
 	m     map[string]*statsPipeGroup
 	funcs []statsFunc
-	columnIdxs []int
+	columnValues [][]string
-	keyBuf     []byte
+	keyBuf       []byte
 	stateSizeBudget int
 }
@ -343,8 +343,9 @@ func (spp *statsPipeProcessor) writeBlock(workerID uint, timestamps []int64, col
 		shard.stateSizeBudget += stateSizeBudgetChunk
 	}
-	if len(spp.sp.byFields) == 0 {
+	byFields := spp.sp.byFields
-		// Fast path - pass all the rows to a single group
+	if len(byFields) == 0 || len(byFields) == 1 && getBlockColumnIndex(columns, byFields[0]) < 0 {
 		// Fast path - pass all the rows to a single group with empty key.
 		spg := shard.getStatsPipeGroup(nil)
 		for _, sfp := range spg.sfps {
 			shard.stateSizeBudget -= sfp.updateStatsForAllRows(timestamps, columns)
@ -354,20 +355,43 @@ func (spp *statsPipeProcessor) writeBlock(workerID uint, timestamps []int64, col
 	// Slow path - update per-row stats
-	// Pre-calculate column indexes for byFields in order to speed up building group key in the loop below.
+	if len(byFields) == 1 {
-	shard.columnIdxs = appendBlockColumnIndexes(shard.columnIdxs[:0], columns, spp.sp.byFields)
+		// Special case for grouping by a single column.
-	columnIdxs := shard.columnIdxs
+		idx := getBlockColumnIndex(columns, byFields[0])
 		if idx < 0 {
 			logger.Panicf("BUG: columnIdx must be positive")
 		}
 		values := columns[idx].Values
 		var spg *statsPipeGroup
 		keyBuf := shard.keyBuf
 		for i := range timestamps {
 			if i <= 0 || values[i-1] != values[i] {
 				keyBuf = encoding.MarshalBytes(keyBuf[:0], bytesutil.ToUnsafeBytes(values[i]))
 				spg = shard.getStatsPipeGroup(keyBuf)
 			}
 			for _, sfp := range spg.sfps {
 				shard.stateSizeBudget -= sfp.updateStatsForRow(timestamps, columns, i)
 			}
 		}
 		shard.keyBuf = keyBuf
 		return
 	}
 	// The slowest path - group by multiple columns.
 	// Pre-calculate column values for byFields in order to speed up building group key in the loop below.
 	shard.columnValues = appendBlockColumnValues(shard.columnValues[:0], columns, spp.sp.byFields)
 	columnValues := shard.columnValues
 	keyBuf := shard.keyBuf
 	var spg *statsPipeGroup
 	keyBuf := shard.keyBuf
 	for i := range timestamps {
 		// verify whether the key for 'by (...)' fields equals the previous key
 		sameValue := spg != nil
-		for _, idx := range columnIdxs {
+		for _, values := range columnValues {
-			if idx < 0 {
+			if values == nil {
 				continue
 			}
 			values := columns[idx].Values
 			if i <= 0 || values[i-1] != values[i] {
 				sameValue = false
 				break
@ -376,10 +400,10 @@ func (spp *statsPipeProcessor) writeBlock(workerID uint, timestamps []int64, col
 		if !sameValue {
 			// Construct new key for the 'by (...)' fields
 			keyBuf = keyBuf[:0]
-			for _, idx := range columnIdxs {
+			for _, values := range columnValues {
 				v := ""
-				if idx >= 0 {
+				if values != nil {
-					v = columns[idx].Values[i]
+					v = values[i]
 				}
 				keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
 			}
@ -667,8 +691,8 @@ type statsFuncUniqProcessor struct {
 	m map[string]struct{}
-	columnIdxs []int
+	columnValues [][]string
-	keyBuf     []byte
+	keyBuf       []byte
 }
 func (sfup *statsFuncUniqProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) int {
@ -696,18 +720,18 @@ func (sfup *statsFuncUniqProcessor) updateStatsForAllRows(timestamps []int64, co
 	// Slow path for multiple columns.
-	// Pre-calculate column indexes for byFields in order to speed up building group key in the loop below.
+	// Pre-calculate column values for byFields in order to speed up building group key in the loop below.
-	sfup.columnIdxs = appendBlockColumnIndexes(sfup.columnIdxs[:0], columns, fields)
+	sfup.columnValues = appendBlockColumnValues(sfup.columnValues[:0], columns, fields)
-	columnIdxs := sfup.columnIdxs
+	columnValues := sfup.columnValues
 	keyBuf := sfup.keyBuf
 	for i := range timestamps {
 		allEmptyValues := true
 		keyBuf = keyBuf[:0]
-		for _, idx := range columnIdxs {
+		for _, values := range columnValues {
 			v := ""
-			if idx >= 0 {
+			if values != nil {
-				v = columns[idx].Values[i]
+				v = values[i]
 			}
 			if v != "" {
 				allEmptyValues = false
@ -1042,10 +1066,14 @@ func getFieldsIgnoreStar(fields []string) []string {
 	return result
 }
-func appendBlockColumnIndexes(dst []int, columns []BlockColumn, fields []string) []int {
+func appendBlockColumnValues(dst [][]string, columns []BlockColumn, fields []string) [][]string {
 	for _, f := range fields {
 		idx := getBlockColumnIndex(columns, f)
-		dst = append(dst, idx)
+		var values []string
 		if idx >= 0 {
 			values = columns[idx].Values
 		}
 		dst = append(dst, values)
 	}
 	return dst
 }