app/vmselect/netstorage: assume the response is full if up to -replicationFactor-1 vmstorage nodes are unavailable

This is a follow-up for ee5c502446

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767
This commit is contained in:
Aliaksandr Valialkin 2022-06-27 12:21:23 +03:00
parent 572db17857
commit 08de733924
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1

View file

@ -1407,7 +1407,9 @@ func (snr *storageNodesRequest) collectResults(partialResultsCounter *metrics.Co
return false, nil
}
}
if len(errsPartial) == 0 {
if len(errsPartial) < *replicationFactor {
// Assume that the result is full if the the number of failing vmstorage nodes
// is smaller than the -replicationFactor.
return false, nil
}
if len(errsPartial) == len(storageNodes) {
@ -1417,13 +1419,16 @@ func (snr *storageNodesRequest) collectResults(partialResultsCounter *metrics.Co
}
// Return partial results.
// This allows gracefully degrade vmselect in the case
// if a part of storageNodes are temporarily unavailable.
// if a part of vmstorage nodes are temporarily unavailable.
partialResultsCounter.Inc()
// Do not return the error, since it may spam logs on busy vmselect
// serving high amounts of requests.
partialResultsCounter.Inc()
partialErrorsLogger.Warnf("%d out of %d vmstorage nodes were unavailable during the query; a sample error: %s", len(errsPartial), len(storageNodes), errsPartial[0])
return true, nil
}
var partialErrorsLogger = logger.WithThrottler("partialErrors", 10*time.Second)
type storageNode struct {
connPool *netutil.ConnPool