From 08de733924b640fd036478e1fdc7bf7744d2cca6 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 27 Jun 2022 12:21:23 +0300 Subject: [PATCH] app/vmselect/netstorage: assume the response is full if up to -replicationFactor-1 vmstorage nodes are unavailable This is a follow-up for ee5c50244617f62469b2db5049e6658fb7a2fce7 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767 --- app/vmselect/netstorage/netstorage.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/app/vmselect/netstorage/netstorage.go b/app/vmselect/netstorage/netstorage.go index 8f6f5698f..23bdb9a1f 100644 --- a/app/vmselect/netstorage/netstorage.go +++ b/app/vmselect/netstorage/netstorage.go @@ -1407,7 +1407,9 @@ func (snr *storageNodesRequest) collectResults(partialResultsCounter *metrics.Co return false, nil } } - if len(errsPartial) == 0 { + if len(errsPartial) < *replicationFactor { + // Assume that the result is full if the the number of failing vmstorage nodes + // is smaller than the -replicationFactor. return false, nil } if len(errsPartial) == len(storageNodes) { @@ -1417,13 +1419,16 @@ func (snr *storageNodesRequest) collectResults(partialResultsCounter *metrics.Co } // Return partial results. // This allows gracefully degrade vmselect in the case - // if a part of storageNodes are temporarily unavailable. + // if a part of vmstorage nodes are temporarily unavailable. + partialResultsCounter.Inc() // Do not return the error, since it may spam logs on busy vmselect // serving high amounts of requests. - partialResultsCounter.Inc() + partialErrorsLogger.Warnf("%d out of %d vmstorage nodes were unavailable during the query; a sample error: %s", len(errsPartial), len(storageNodes), errsPartial[0]) return true, nil } +var partialErrorsLogger = logger.WithThrottler("partialErrors", 10*time.Second) + type storageNode struct { connPool *netutil.ConnPool