lib/streamaggr: do not flush dedup shards in parallel

This significantly increases CPU usage on systems with many CPU cores, while doesn't reduce flush latency too much
This commit is contained in:
Aliaksandr Valialkin 2024-03-04 16:59:19 +02:00
parent 946814afee
commit eb40395a1c
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
2 changed files with 8 additions and 14 deletions

View file

@ -113,22 +113,14 @@ func (ctx *dedupFlushCtx) reset() {
} }
func (da *dedupAggr) flush(f func(samples []pushSample)) { func (da *dedupAggr) flush(f func(samples []pushSample)) {
var wg sync.WaitGroup // Do not flush shards in parallel, since this significantly increases CPU usage
// on systems with many CPU cores, while doesn't improve flush latency too much.
ctx := getDedupFlushCtx()
for i := range da.shards { for i := range da.shards {
flushConcurrencyCh <- struct{}{} ctx.reset()
wg.Add(1) da.shards[i].flush(ctx, f)
go func(shard *dedupAggrShard) {
defer func() {
<-flushConcurrencyCh
wg.Done()
}()
ctx := getDedupFlushCtx()
shard.flush(ctx, f)
putDedupFlushCtx(ctx)
}(&da.shards[i])
} }
wg.Wait() putDedupFlushCtx(ctx)
} }
type perShardSamples struct { type perShardSamples struct {

View file

@ -670,7 +670,9 @@ func (a *aggregator) dedupFlush(dedupInterval time.Duration) {
startTime := time.Now() startTime := time.Now()
flushConcurrencyCh <- struct{}{}
a.da.flush(a.pushSamples) a.da.flush(a.pushSamples)
<-flushConcurrencyCh
d := time.Since(startTime) d := time.Since(startTime)
a.dedupFlushDuration.Update(d.Seconds()) a.dedupFlushDuration.Update(d.Seconds())