This commit is contained in:
Aliaksandr Valialkin 2024-05-19 13:23:27 +02:00
parent 5e20724e1a
commit 5175d99d49
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
3 changed files with 71 additions and 46 deletions

View file

@ -208,9 +208,22 @@ func (br *blockResult) sizeBytes() int {
return n return n
} }
// addResultColumns adds the given rcs to br.
//
// The br is valid only until rcs are modified.
func (br *blockResult) addResultColumns(rcs []resultColumn) {
if len(rcs) == 0 || len(rcs[0].values) == 0 {
return
}
for i := range rcs {
br.addResultColumn(&rcs[i])
}
}
// setResultColumns sets the given rcs as br columns. // setResultColumns sets the given rcs as br columns.
// //
// The returned result is valid only until rcs are modified. // The br is valid only until rcs are modified.
func (br *blockResult) setResultColumns(rcs []resultColumn) { func (br *blockResult) setResultColumns(rcs []resultColumn) {
br.reset() br.reset()
@ -220,24 +233,29 @@ func (br *blockResult) setResultColumns(rcs []resultColumn) {
br.timestamps = fastnum.AppendInt64Zeros(br.timestamps[:0], len(rcs[0].values)) br.timestamps = fastnum.AppendInt64Zeros(br.timestamps[:0], len(rcs[0].values))
csBuf := br.csBuf for i := range rcs {
for _, rc := range rcs { br.addResultColumn(&rcs[i])
}
}
func (br *blockResult) addResultColumn(rc *resultColumn) {
if len(rc.values) != len(br.timestamps) {
logger.Panicf("BUG: column %q must contain %d rows, but it contains %d rows", rc.name, len(br.timestamps), len(rc.values))
}
if areConstValues(rc.values) { if areConstValues(rc.values) {
// This optimization allows reducing memory usage after br cloning // This optimization allows reducing memory usage after br cloning
csBuf = append(csBuf, blockResultColumn{ br.csBuf = append(br.csBuf, blockResultColumn{
name: br.a.copyString(rc.name), name: rc.name,
isConst: true, isConst: true,
valuesEncoded: rc.values[:1], valuesEncoded: rc.values[:1],
}) })
} else { } else {
csBuf = append(csBuf, blockResultColumn{ br.csBuf = append(br.csBuf, blockResultColumn{
name: br.a.copyString(rc.name), name: rc.name,
valueType: valueTypeString, valueType: valueTypeString,
valuesEncoded: rc.values, valuesEncoded: rc.values,
}) })
} }
}
br.csBuf = csBuf
br.csInitialized = false br.csInitialized = false
} }
@ -1765,37 +1783,25 @@ func (c *blockResultColumn) sumValues(br *blockResult) (float64, int) {
} }
} }
// resultColumn represents a column with result values // resultColumn represents a column with result values.
//
// It doesn't own the result values.
type resultColumn struct { type resultColumn struct {
// name is column name. // name is column name.
name string name string
// a contains values data. // values is the result values.
a arena
// values is the result values. They are backed by data inside a.
values []string values []string
} }
func (rc *resultColumn) sizeBytes() int {
return len(rc.name) + rc.a.sizeBytes() + len(rc.values)*int(unsafe.Sizeof(rc.values[0]))
}
func (rc *resultColumn) resetKeepName() { func (rc *resultColumn) resetKeepName() {
rc.a.reset()
clear(rc.values) clear(rc.values)
rc.values = rc.values[:0] rc.values = rc.values[:0]
} }
// addValue adds the given values v to rc. // addValue adds the given values v to rc.
func (rc *resultColumn) addValue(v string) { func (rc *resultColumn) addValue(v string) {
values := rc.values rc.values = append(rc.values, v)
if len(values) > 0 && string(v) == values[len(values)-1] {
v = values[len(values)-1]
} else {
v = rc.a.copyString(v)
}
rc.values = append(values, v)
} }
func truncateTimestampToMonth(timestamp int64) int64 { func truncateTimestampToMonth(timestamp int64) int64 {

View file

@ -37,9 +37,15 @@ func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet
func (pe *pipeExtract) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor { func (pe *pipeExtract) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
shards := make([]pipeExtractProcessorShard, workersCount) shards := make([]pipeExtractProcessorShard, workersCount)
for i := range shards { for i := range shards {
ef := newExtractFormat(pe.steps)
rcs := make([]resultColumn, len(ef.fields))
for j := range rcs {
rcs[j].name = ef.fields[j].name
}
shards[i] = pipeExtractProcessorShard{ shards[i] = pipeExtractProcessorShard{
pipeExtractProcessorShardNopad: pipeExtractProcessorShardNopad{ pipeExtractProcessorShardNopad: pipeExtractProcessorShardNopad{
ef: newExtractFormat(pe.steps), ef: ef,
rcs: rcs,
}, },
} }
} }
@ -71,6 +77,8 @@ type pipeExtractProcessorShard struct {
type pipeExtractProcessorShardNopad struct { type pipeExtractProcessorShardNopad struct {
ef *extractFormat ef *extractFormat
rcs []resultColumn
} }
func (pep *pipeExtractProcessor) writeBlock(workerID uint, br *blockResult) { func (pep *pipeExtractProcessor) writeBlock(workerID uint, br *blockResult) {
@ -83,12 +91,18 @@ func (pep *pipeExtractProcessor) writeBlock(workerID uint, br *blockResult) {
values := c.getValues(br) values := c.getValues(br)
ef := shard.ef ef := shard.ef
rcs := shard.rcs
for _, v := range values { for _, v := range values {
ef.apply(v) ef.apply(v)
/* for i, result := range ef.results { for i, f := range ef.fields {
rcs[i].addValue(result) rcs[i].addValue(*f.value)
} }
*/ }
br.addResultColumns(rcs)
pep.ppBase.writeBlock(workerID, br)
for i := range rcs {
rcs[i].resetKeepName()
} }
} }

View file

@ -11,7 +11,6 @@ import (
"sync/atomic" "sync/atomic"
"unsafe" "unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory" "github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
) )
@ -211,7 +210,8 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
shard.columnValues = columnValues shard.columnValues = columnValues
// Generate byColumns // Generate byColumns
var rc resultColumn valuesEncoded := make([]string, len(br.timestamps))
shard.stateSizeBudget -= len(valuesEncoded) * int(unsafe.Sizeof(valuesEncoded[0]))
bb := bbPool.Get() bb := bbPool.Get()
for rowIdx := range br.timestamps { for rowIdx := range br.timestamps {
@ -223,7 +223,12 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
bb.B = marshalJSONKeyValue(bb.B, cs[i].name, v) bb.B = marshalJSONKeyValue(bb.B, cs[i].name, v)
bb.B = append(bb.B, ',') bb.B = append(bb.B, ',')
} }
rc.addValue(bytesutil.ToUnsafeString(bb.B)) if rowIdx > 0 && valuesEncoded[rowIdx-1] == string(bb.B) {
valuesEncoded[rowIdx] = valuesEncoded[rowIdx-1]
} else {
valuesEncoded[rowIdx] = string(bb.B)
shard.stateSizeBudget -= len(bb.B)
}
} }
bbPool.Put(bb) bbPool.Put(bb)
@ -236,13 +241,13 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
{ {
c: &blockResultColumn{ c: &blockResultColumn{
valueType: valueTypeString, valueType: valueTypeString,
valuesEncoded: rc.values, valuesEncoded: valuesEncoded,
}, },
i64Values: i64Values, i64Values: i64Values,
f64Values: f64Values, f64Values: f64Values,
}, },
} }
shard.stateSizeBudget -= rc.sizeBytes() + int(unsafe.Sizeof(byColumns[0])+unsafe.Sizeof(*byColumns[0].c)) shard.stateSizeBudget -= int(unsafe.Sizeof(byColumns[0]) + unsafe.Sizeof(*byColumns[0].c))
// Append br to shard.blocks. // Append br to shard.blocks.
shard.blocks = append(shard.blocks, sortBlock{ shard.blocks = append(shard.blocks, sortBlock{