lib/streamaggr: flush dedup state and aggregation state in parallel on all the available CPU cores

This should reduce the time needed for aggregation state flush on systems with many CPU cores
This commit is contained in:
Aliaksandr Valialkin 2024-03-04 01:21:39 +02:00
parent 3c06b3af92
commit 0422ae01ba
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
16 changed files with 151 additions and 78 deletions

View file

@ -54,14 +54,14 @@ func (as *avgAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
} }
} }
func (as *avgAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *avgAggrState) flushState(ctx *flushCtx) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000 currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m m := &as.m
m.Range(func(k, v interface{}) bool { m.Range(func(k, v interface{}) bool {

View file

@ -51,14 +51,14 @@ func (as *countSamplesAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
} }
} }
func (as *countSamplesAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *countSamplesAggrState) flushState(ctx *flushCtx) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000 currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m m := &as.m
m.Range(func(k, v interface{}) bool { m.Range(func(k, v interface{}) bool {

View file

@ -61,14 +61,14 @@ func (as *countSeriesAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
} }
} }
func (as *countSeriesAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *countSeriesAggrState) flushState(ctx *flushCtx) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000 currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m m := &as.m
m.Range(func(k, v interface{}) bool { m.Range(func(k, v interface{}) bool {

View file

@ -89,6 +89,21 @@ func (da *dedupAggr) pushSamples(samples []pushSample) {
putPerShardSamples(pss) putPerShardSamples(pss)
} }
func getDedupFlushCtx() *dedupFlushCtx {
v := dedupFlushCtxPool.Get()
if v == nil {
return &dedupFlushCtx{}
}
return v.(*dedupFlushCtx)
}
func putDedupFlushCtx(ctx *dedupFlushCtx) {
ctx.reset()
dedupFlushCtxPool.Put(ctx)
}
var dedupFlushCtxPool sync.Pool
type dedupFlushCtx struct { type dedupFlushCtx struct {
samples []pushSample samples []pushSample
} }
@ -99,12 +114,22 @@ func (ctx *dedupFlushCtx) reset() {
} }
func (da *dedupAggr) flush(f func(samples []pushSample)) { func (da *dedupAggr) flush(f func(samples []pushSample)) {
ctx := &dedupFlushCtx{} var wg sync.WaitGroup
shards := da.shards for i := range da.shards {
for i := range shards { flushConcurrencyCh <- struct{}{}
ctx.reset() wg.Add(1)
shards[i].flush(ctx, f) go func(shard *dedupAggrShard) {
defer func() {
<-flushConcurrencyCh
wg.Done()
}()
ctx := getDedupFlushCtx()
shard.flush(ctx, f)
putDedupFlushCtx(ctx)
}(&da.shards[i])
} }
wg.Wait()
} }
type perShardSamples struct { type perShardSamples struct {
@ -178,8 +203,14 @@ func (das *dedupAggrShard) flush(ctx *dedupFlushCtx, f func(samples []pushSample
key: key, key: key,
value: s.value, value: s.value,
}) })
}
ctx.samples = dstSamples
// Limit the number of samples per each flush in order to limit memory usage.
if len(dstSamples) >= 100_000 {
f(dstSamples) f(dstSamples)
clear(dstSamples)
dstSamples = dstSamples[:0]
}
}
f(dstSamples)
ctx.samples = dstSamples
} }

View file

@ -33,11 +33,14 @@ func TestDedupAggrSerial(t *testing.T) {
} }
flushedSamplesMap := make(map[string]pushSample) flushedSamplesMap := make(map[string]pushSample)
var mu sync.Mutex
flushSamples := func(samples []pushSample) { flushSamples := func(samples []pushSample) {
mu.Lock()
for _, sample := range samples { for _, sample := range samples {
sample.key = strings.Clone(sample.key) sample.key = strings.Clone(sample.key)
flushedSamplesMap[sample.key] = sample flushedSamplesMap[sample.key] = sample
} }
mu.Unlock()
} }
da.flush(flushSamples) da.flush(flushSamples)

View file

@ -59,7 +59,7 @@ func (as *histogramBucketAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
@ -86,7 +86,7 @@ func (as *histogramBucketAggrState) removeOldEntries(currentTime uint64) {
}) })
} }
func (as *histogramBucketAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *histogramBucketAggrState) flushState(ctx *flushCtx) {
currentTime := fasttime.UnixTimestamp() currentTime := fasttime.UnixTimestamp()
currentTimeMsec := int64(currentTime) * 1000 currentTimeMsec := int64(currentTime) * 1000

View file

@ -51,14 +51,14 @@ func (as *lastAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
} }
} }
func (as *lastAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *lastAggrState) flushState(ctx *flushCtx) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000 currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m m := &as.m
m.Range(func(k, v interface{}) bool { m.Range(func(k, v interface{}) bool {

View file

@ -53,14 +53,14 @@ func (as *maxAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
} }
} }
func (as *maxAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *maxAggrState) flushState(ctx *flushCtx) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000 currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m m := &as.m
m.Range(func(k, v interface{}) bool { m.Range(func(k, v interface{}) bool {

View file

@ -53,14 +53,14 @@ func (as *minAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
} }
} }
func (as *minAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *minAggrState) flushState(ctx *flushCtx) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000 currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m m := &as.m
m.Range(func(k, v interface{}) bool { m.Range(func(k, v interface{}) bool {

View file

@ -58,14 +58,14 @@ func (as *quantilesAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
} }
} }
func (as *quantilesAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *quantilesAggrState) flushState(ctx *flushCtx) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000 currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m m := &as.m
phis := as.phis phis := as.phis

View file

@ -54,14 +54,14 @@ func (as *stddevAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
} }
} }
func (as *stddevAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *stddevAggrState) flushState(ctx *flushCtx) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000 currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m m := &as.m
m.Range(func(k, v interface{}) bool { m.Range(func(k, v interface{}) bool {

View file

@ -53,14 +53,14 @@ func (as *stdvarAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
} }
} }
func (as *stdvarAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *stdvarAggrState) flushState(ctx *flushCtx) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000 currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m m := &as.m
m.Range(func(k, v interface{}) bool { m.Range(func(k, v interface{}) bool {

View file

@ -304,6 +304,7 @@ type aggregator struct {
// lc is used for compressing series keys before passing them to dedupAggr and aggrState. // lc is used for compressing series keys before passing them to dedupAggr and aggrState.
lc promutils.LabelsCompressor lc promutils.LabelsCompressor
// pushFunc is the callback, which is called by aggrState when flushing its state.
pushFunc PushFunc pushFunc PushFunc
// suffix contains a suffix, which should be added to aggregate metric names // suffix contains a suffix, which should be added to aggregate metric names
@ -329,7 +330,7 @@ type aggregator struct {
type aggrState interface { type aggrState interface {
pushSamples(samples []pushSample) pushSamples(samples []pushSample)
appendSeriesForFlush(ctx *flushCtx) flushState(ctx *flushCtx)
} }
// PushFunc is called by Aggregators when it needs to push its state to metrics storage // PushFunc is called by Aggregators when it needs to push its state to metrics storage
@ -544,11 +545,7 @@ func (a *aggregator) runFlusher(interval, dedupInterval time.Duration) {
return return
case <-tickerFlush.C: case <-tickerFlush.C:
startTime := time.Now() startTime := time.Now()
flushConcurrencyCh <- struct{}{}
a.flush() a.flush()
<-flushConcurrencyCh
d := time.Since(startTime) d := time.Since(startTime)
a.flushDuration.Update(d.Seconds()) a.flushDuration.Update(d.Seconds())
if d > interval { if d > interval {
@ -559,11 +556,7 @@ func (a *aggregator) runFlusher(interval, dedupInterval time.Duration) {
} }
case <-dedupTickerCh: case <-dedupTickerCh:
startTime := time.Now() startTime := time.Now()
flushConcurrencyCh <- struct{}{}
a.dedupFlush() a.dedupFlush()
<-flushConcurrencyCh
d := time.Since(startTime) d := time.Since(startTime)
a.dedupFlushDuration.Update(d.Seconds()) a.dedupFlushDuration.Update(d.Seconds())
if d > dedupInterval { if d > dedupInterval {
@ -576,49 +569,32 @@ func (a *aggregator) runFlusher(interval, dedupInterval time.Duration) {
} }
} }
var flushConcurrencyCh = make(chan struct{}, cgroup.AvailableCPUs())
func (a *aggregator) dedupFlush() { func (a *aggregator) dedupFlush() {
a.da.flush(a.pushSamples) a.da.flush(a.pushSamples)
} }
func (a *aggregator) flush() { func (a *aggregator) flush() {
ctx := &flushCtx{ var wg sync.WaitGroup
a: a,
}
for _, as := range a.aggrStates { for _, as := range a.aggrStates {
ctx.reset() flushConcurrencyCh <- struct{}{}
as.appendSeriesForFlush(ctx) wg.Add(1)
go func(as aggrState) {
defer func() {
<-flushConcurrencyCh
wg.Done()
}()
tss := ctx.tss ctx := getFlushCtx(a)
as.flushState(ctx)
if a.outputRelabeling == nil { ctx.flushSeries()
// Fast path - push the output metrics. putFlushCtx(ctx)
a.pushFunc(tss) }(as)
continue
}
// Slower path - apply output relabeling and then push the output metrics.
auxLabels := promutils.GetLabels()
dstLabels := auxLabels.Labels[:0]
dst := tss[:0]
for _, ts := range tss {
dstLabelsLen := len(dstLabels)
dstLabels = append(dstLabels, ts.Labels...)
dstLabels = a.outputRelabeling.Apply(dstLabels, dstLabelsLen)
if len(dstLabels) == dstLabelsLen {
// The metric has been deleted by the relabeling
continue
}
ts.Labels = dstLabels[dstLabelsLen:]
dst = append(dst, ts)
}
a.pushFunc(dst)
auxLabels.Labels = dstLabels
promutils.PutLabels(auxLabels)
} }
wg.Wait()
} }
var flushConcurrencyCh = make(chan struct{}, cgroup.AvailableCPUs())
// MustStop stops the aggregator. // MustStop stops the aggregator.
// //
// The aggregator stops pushing the aggregated metrics after this call. // The aggregator stops pushing the aggregated metrics after this call.
@ -631,12 +607,10 @@ func (a *aggregator) MustStop() {
} }
// Flush the remaining data from the last interval if needed. // Flush the remaining data from the last interval if needed.
flushConcurrencyCh <- struct{}{}
if a.da != nil { if a.da != nil {
a.dedupFlush() a.dedupFlush()
} }
a.flush() a.flush()
<-flushConcurrencyCh
} }
// Push pushes tss to a. // Push pushes tss to a.
@ -796,6 +770,23 @@ func getInputOutputLabels(dstInput, dstOutput, labels []prompbmarshal.Label, by,
return dstInput, dstOutput return dstInput, dstOutput
} }
func getFlushCtx(a *aggregator) *flushCtx {
v := flushCtxPool.Get()
if v == nil {
v = &flushCtx{}
}
ctx := v.(*flushCtx)
ctx.a = a
return ctx
}
func putFlushCtx(ctx *flushCtx) {
ctx.reset()
flushCtxPool.Put(ctx)
}
var flushCtxPool sync.Pool
type flushCtx struct { type flushCtx struct {
a *aggregator a *aggregator
@ -805,12 +796,55 @@ type flushCtx struct {
} }
func (ctx *flushCtx) reset() { func (ctx *flushCtx) reset() {
ctx.a = nil
ctx.resetSeries()
}
func (ctx *flushCtx) resetSeries() {
ctx.tss = prompbmarshal.ResetTimeSeries(ctx.tss) ctx.tss = prompbmarshal.ResetTimeSeries(ctx.tss)
promrelabel.CleanLabels(ctx.labels)
clear(ctx.labels)
ctx.labels = ctx.labels[:0] ctx.labels = ctx.labels[:0]
ctx.samples = ctx.samples[:0] ctx.samples = ctx.samples[:0]
} }
func (ctx *flushCtx) flushSeries() {
tss := ctx.tss
if len(tss) == 0 {
// nothing to flush
return
}
outputRelabeling := ctx.a.outputRelabeling
if outputRelabeling == nil {
// Fast path - push the output metrics.
ctx.a.pushFunc(tss)
return
}
// Slow path - apply output relabeling and then push the output metrics.
auxLabels := promutils.GetLabels()
dstLabels := auxLabels.Labels[:0]
dst := tss[:0]
for _, ts := range tss {
dstLabelsLen := len(dstLabels)
dstLabels = append(dstLabels, ts.Labels...)
dstLabels = outputRelabeling.Apply(dstLabels, dstLabelsLen)
if len(dstLabels) == dstLabelsLen {
// The metric has been deleted by the relabeling
continue
}
ts.Labels = dstLabels[dstLabelsLen:]
dst = append(dst, ts)
}
ctx.a.pushFunc(dst)
auxLabels.Labels = dstLabels
promutils.PutLabels(auxLabels)
ctx.resetSeries()
}
func (ctx *flushCtx) appendSeries(key, suffix string, timestamp int64, value float64) { func (ctx *flushCtx) appendSeries(key, suffix string, timestamp int64, value float64) {
labelsLen := len(ctx.labels) labelsLen := len(ctx.labels)
samplesLen := len(ctx.samples) samplesLen := len(ctx.samples)
@ -826,6 +860,11 @@ func (ctx *flushCtx) appendSeries(key, suffix string, timestamp int64, value flo
Labels: ctx.labels[labelsLen:], Labels: ctx.labels[labelsLen:],
Samples: ctx.samples[samplesLen:], Samples: ctx.samples[samplesLen:],
}) })
// Limit the maximum length of ctx.tss in order to limit memory usage.
if len(ctx.tss) >= 10_000 {
ctx.flushSeries()
}
} }
func (ctx *flushCtx) appendSeriesWithExtraLabel(key, suffix string, timestamp int64, value float64, extraName, extraValue string) { func (ctx *flushCtx) appendSeriesWithExtraLabel(key, suffix string, timestamp int64, value float64, extraName, extraValue string) {

View file

@ -51,14 +51,14 @@ func (as *sumSamplesAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
} }
} }
func (as *sumSamplesAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *sumSamplesAggrState) flushState(ctx *flushCtx) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000 currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m m := &as.m
m.Range(func(k, v interface{}) bool { m.Range(func(k, v interface{}) bool {

View file

@ -90,7 +90,7 @@ func (as *totalAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
@ -125,7 +125,7 @@ func (as *totalAggrState) removeOldEntries(currentTime uint64) {
}) })
} }
func (as *totalAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *totalAggrState) flushState(ctx *flushCtx) {
currentTime := fasttime.UnixTimestamp() currentTime := fasttime.UnixTimestamp()
currentTimeMsec := int64(currentTime) * 1000 currentTimeMsec := int64(currentTime) * 1000

View file

@ -55,14 +55,14 @@ func (as *uniqueSamplesAggrState) pushSamples(samples []pushSample) {
} }
sv.mu.Unlock() sv.mu.Unlock()
if deleted { if deleted {
// The entry has been deleted by the concurrent call to appendSeriesForFlush // The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again. // Try obtaining and updating the entry again.
goto again goto again
} }
} }
} }
func (as *uniqueSamplesAggrState) appendSeriesForFlush(ctx *flushCtx) { func (as *uniqueSamplesAggrState) flushState(ctx *flushCtx) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000 currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m m := &as.m
m.Range(func(k, v interface{}) bool { m.Range(func(k, v interface{}) bool {