mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
app/vmauth: properly check for backend health before leastLoaded policy (#7489)
Previously, vmauth could have pick `buMin` as least loaded backend without checking its status. In result, vmauth could have respond to the user with an error even if there were healthy backends. That could happen if healthy backends already had non-zero amount of concurrent requests executing at the moment of least-loaded backend choosing logic. Steps to reproduce: 1. Setup vmauth with two backends: healthy and non-healthy 2. Execute a bunch of concurrent requests against vmauth (i.e. Grafana dash reload) 3. Observe that some requests will fail with message that all backends are unavailable Addresses https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3061 --- Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
b399f1c656
commit
a0a154511a
3 changed files with 28 additions and 2 deletions
|
@ -462,13 +462,17 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
|
|||
|
||||
// Slow path - select other backend urls.
|
||||
n := atomicCounter.Add(1) - 1
|
||||
|
||||
buMin := bus[n%uint32(len(bus))]
|
||||
for i := uint32(0); i < uint32(len(bus)); i++ {
|
||||
idx := (n + i) % uint32(len(bus))
|
||||
bu := bus[idx]
|
||||
if bu.isBroken() {
|
||||
continue
|
||||
}
|
||||
if buMin.isBroken() {
|
||||
// verify that buMin isn't set as broken
|
||||
buMin = bu
|
||||
}
|
||||
if bu.concurrentRequests.Load() == 0 {
|
||||
// Fast path - return the backend with zero concurrently executed requests.
|
||||
// Do not use CompareAndSwap() instead of Load(), since it is much slower on systems with many CPU cores.
|
||||
|
@ -478,7 +482,6 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
|
|||
}
|
||||
|
||||
// Slow path - return the backend with the minimum number of concurrently executed requests.
|
||||
buMin := bus[n%uint32(len(bus))]
|
||||
minRequests := buMin.concurrentRequests.Load()
|
||||
for _, bu := range bus {
|
||||
if bu.isBroken() {
|
||||
|
|
|
@ -777,6 +777,28 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
|
|||
fn(7, 7, 7)
|
||||
}
|
||||
|
||||
func TestBrokenBackend(t *testing.T) {
|
||||
up := mustParseURLs([]string{
|
||||
"http://node1:343",
|
||||
"http://node2:343",
|
||||
"http://node3:343",
|
||||
})
|
||||
up.loadBalancingPolicy = "least_loaded"
|
||||
pbus := up.bus.Load()
|
||||
bus := *pbus
|
||||
|
||||
// explicitly mark one of the backends as broken
|
||||
bus[1].setBroken()
|
||||
|
||||
// broken backend should never return while there are healthy backends
|
||||
for i := 0; i < 1e3; i++ {
|
||||
b := up.getBackendURL()
|
||||
if b.isBroken() {
|
||||
t.Fatalf("unexpected broken backend %q", b.url)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getRegexs(paths []string) []*Regex {
|
||||
var sps []*Regex
|
||||
for _, path := range paths {
|
||||
|
|
|
@ -21,6 +21,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
|
|||
* BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl/): drop rows that do not belong to the current series during import. The dropped rows should belong to another series whose tags are a superset of the current series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7301) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7330). Thanks to @dpedu for reporting and cooperating with the test.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/), `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): keep the order of resulting time series when `limit_offset` is applied. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7068).
|
||||
* BUGFIX: [graphite](https://docs.victoriametrics.com/#graphite-render-api-usage): properly handle xFilesFactor=0 for `transformRemoveEmptySeries` function. See [this PR](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7337) for details.
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth): properly check availability of all the backends before giving up when proxying requests. Previously, vmauth could return an error even if there were healthy backends available. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3061) for details.
|
||||
|
||||
## [v1.106.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.106.0)
|
||||
|
||||
|
|
Loading…
Reference in a new issue