lib/streamaggr: do not flush dedup shards in parallel

This significantly increases CPU usage on systems with many CPU cores, while doesn't reduce flush latency too much
This commit is contained in:
Aliaksandr Valialkin 2024-03-04 16:59:19 +02:00
parent 946814afee
commit eb40395a1c
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
2 changed files with 8 additions and 14 deletions

View file

@ -113,22 +113,14 @@ func (ctx *dedupFlushCtx) reset() {
}
func (da *dedupAggr) flush(f func(samples []pushSample)) {
var wg sync.WaitGroup
// Do not flush shards in parallel, since this significantly increases CPU usage
// on systems with many CPU cores, while doesn't improve flush latency too much.
ctx := getDedupFlushCtx()
for i := range da.shards {
flushConcurrencyCh <- struct{}{}
wg.Add(1)
go func(shard *dedupAggrShard) {
defer func() {
<-flushConcurrencyCh
wg.Done()
}()
ctx := getDedupFlushCtx()
shard.flush(ctx, f)
putDedupFlushCtx(ctx)
}(&da.shards[i])
ctx.reset()
da.shards[i].flush(ctx, f)
}
wg.Wait()
putDedupFlushCtx(ctx)
}
type perShardSamples struct {

View file

@ -670,7 +670,9 @@ func (a *aggregator) dedupFlush(dedupInterval time.Duration) {
startTime := time.Now()
flushConcurrencyCh <- struct{}{}
a.da.flush(a.pushSamples)
<-flushConcurrencyCh
d := time.Since(startTime)
a.dedupFlushDuration.Update(d.Seconds())