app/vminsert: fix uneven distribution of time series among storage nodes

Use distinct seed for distribution hash calculations on the second level of vminsert nodes.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1672
This commit is contained in:
Aliaksandr Valialkin 2021-10-07 12:21:42 +03:00
parent cea79d2013
commit 64b6f3f1c8
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
4 changed files with 19 additions and 2 deletions

View file

@ -79,7 +79,14 @@ func main() {
if len(*storageNodes) == 0 {
logger.Fatalf("missing -storageNode arg")
}
netstorage.InitStorageNodes(*storageNodes)
hashSeed := byte(0)
if *clusternativeListenAddr != "" {
// Use different hash seed for the second level of vminsert nodes in multi-level cluster setup.
// This should fix uneven distribution of time series among storage nodes.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1672
hashSeed = 42
}
netstorage.InitStorageNodes(*storageNodes, hashSeed)
logger.Infof("successfully initialized netstorage in %.3f seconds", time.Since(startTime).Seconds())
relabel.Init()

View file

@ -162,6 +162,9 @@ func (ctx *InsertCtx) GetStorageNodeIdx(at *auth.Token, labels []prompb.Label) i
}
buf := ctx.labelsBuf[:0]
if hashSeed != 0 {
buf = append(buf, hashSeed)
}
buf = encoding.MarshalUint32(buf, at.AccountID)
buf = encoding.MarshalUint32(buf, at.ProjectID)
for i := range labels {

View file

@ -400,11 +400,17 @@ var storageNodesWG sync.WaitGroup
var storageNodesStopCh = make(chan struct{})
// hashSeed is a seed for distributing time series amont storage nodes.
var hashSeed byte
// InitStorageNodes initializes vmstorage nodes' connections to the given addrs.
func InitStorageNodes(addrs []string) {
//
// eed is used for changing the distribution of input time series among addrs.
func InitStorageNodes(addrs []string, seed byte) {
if len(addrs) == 0 {
logger.Panicf("BUG: addrs must be non-empty")
}
hashSeed = seed
// Sort addrs in order to guarantee identical series->vmstorage mapping across all the vminsert nodes.
addrsCopy := append([]string{}, addrs...)

View file

@ -16,6 +16,7 @@ sort: 15
* BUGFIX: align behavior of the queries `a or on (labels) b`, `a and on (labels) b` and `a unless on (labels) b` where `b` has multiple time series with the given `labels` to Prometheus behavior. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1643).
* BUGFIX: vmagent: fix `openstack_sd_config` service discovery when both `domain_name` and `project_id` config options are set. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1655).
* BUGFIX: return proper values (zeroes) from [stddev_over_time](https://docs.victoriametrics.com/MetricsQL.html#stddev_over_time) and [stdvar_over_time](https://docs.victoriametrics.com/MetricsQL.html#stdvar_over_time) functions when the lookbehind window in square brackets contains only a single sample. Previously the sample value was incorrectly returned in this case.
* BUGFIX: vminsert: fix uneven distribution of time series among storage nodes in [multi-level cluster setup](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multi-level-cluster-setup). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1672).
## [v1.66.2](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.66.2)