Fix Date metricid cache consistency under concurrent use (#6534)

### Describe Your Changes

Fix Date metricid cache consistency under concurrent use.
When one goroutine calls Has() and does not find the cache entry in the
immutable map it will acquire a lock and check the mutable map. And it
is possible that before that lock is acquired, the entry is moved from
the mutable map to the immutable map by another goroutine causing a
cache miss.

The fix is to check the immutable map again once the lock is acquired. 

### Checklist

The following checks are **mandatory**:

- [x ] My change adheres [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/contributing/).

---------

Signed-off-by: Artem Fetishev <wwctrsrx@gmail.com>
Co-authored-by: Nikolay <nik@victoriametrics.com>
This commit is contained in:
rtm0 2024-06-26 17:33:38 +02:00 committed by GitHub
parent 3ccce17b94
commit a42bd59ee4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 46 additions and 7 deletions

1
.gitignore vendored
View file

@ -23,3 +23,4 @@ Gemfile.lock
_site
*.tmp
/docs/.jekyll-metadata
coverage.txt

View file

@ -32,6 +32,9 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
* FEATURE: [dashboards](https://grafana.com/orgs/victoriametrics): add [Grafana dashboard](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/vmauth.json) and [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmauth.yml) for [vmauth](https://docs.victoriametrics.com/vmauth/) dashboard. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4313) for details.
* BUGFIX: [docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#docker-compose-environment-for-victoriametrics): fix incorrect link to vmui from [VictoriaMetrics plugin in Grafana](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#grafana).
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Fix the dateMetricIDCache consistency issue that leads to duplicate per-day index entries when new time series are inserted concurrently. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6534) for details.
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix input cursor position reset in modal settings. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6530).
## [v1.102.0-rc2](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.0-rc2)

View file

@ -2328,16 +2328,28 @@ func (dmc *dateMetricIDCache) SizeBytes() uint64 {
}
func (dmc *dateMetricIDCache) Has(generation, date, metricID uint64) bool {
if byDate := dmc.byDate.Load(); byDate.get(generation, date).Has(metricID) {
// Fast path. The majority of calls must go here.
return true
}
// Slow path. Acquire the lock and search the immutable map again and then
// also search the mutable map.
return dmc.hasSlow(generation, date, metricID)
}
func (dmc *dateMetricIDCache) hasSlow(generation, date, metricID uint64) bool {
dmc.mu.Lock()
defer dmc.mu.Unlock()
// First, check immutable map again because the entry may have been moved to
// the immutable map by the time the caller acquires the lock.
byDate := dmc.byDate.Load()
v := byDate.get(generation, date)
if v.Has(metricID) {
// Fast path.
// The majority of calls must go here.
return true
}
// Slow path. Check mutable map.
dmc.mu.Lock()
// Then check immutable map.
vMutable := dmc.byDateMutable.get(generation, date)
ok := vMutable.Has(metricID)
if ok {
@ -2348,8 +2360,6 @@ func (dmc *dateMetricIDCache) Has(generation, date, metricID uint64) bool {
dmc.slowHits = 0
}
}
dmc.mu.Unlock()
return ok
}

View file

@ -8,6 +8,7 @@ import (
"reflect"
"sort"
"strings"
"sync"
"testing"
"testing/quick"
"time"
@ -156,6 +157,30 @@ func testDateMetricIDCache(c *dateMetricIDCache, concurrent bool) error {
return nil
}
func TestDateMetricIDCacheIsConsistent(_ *testing.T) {
const (
generation = 1
date = 1
concurrency = 2
numMetrics = 100000
)
dmc := newDateMetricIDCache()
var wg sync.WaitGroup
for i := range concurrency {
wg.Add(1)
go func() {
defer wg.Done()
for id := uint64(i * numMetrics); id < uint64((i+1)*numMetrics); id++ {
dmc.Set(generation, date, id)
if !dmc.Has(generation, date, id) {
panic(fmt.Errorf("dmc.Has(metricID=%d): unexpected cache miss after adding the entry to cache", id))
}
}
}()
}
wg.Wait()
}
func TestUpdateCurrHourMetricIDs(t *testing.T) {
newStorage := func() *Storage {
var s Storage