mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-03-21 15:45:01 +00:00
app/vminsert: add vm_rpc_send_duration_seconds_total metric per each vminsert->vmstorage
link
This metric is useful for determining high link saturation with the following alerting rule: rate(vm_rpc_send_duration_seconds_total) > 0.9s
This commit is contained in:
parent
3df6550153
commit
9eb828b2c2
2 changed files with 9 additions and 0 deletions
|
@ -245,7 +245,10 @@ func (sn *storageNode) sendBufRowsNonblocking(br *bufRows) bool {
|
||||||
// sn.dial() should be called by sn.checkHealth() on unsuccessful call to sendBufToReplicasNonblocking().
|
// sn.dial() should be called by sn.checkHealth() on unsuccessful call to sendBufToReplicasNonblocking().
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
startTime := time.Now()
|
||||||
err := sendToConn(sn.bc, br.buf)
|
err := sendToConn(sn.bc, br.buf)
|
||||||
|
duration := time.Since(startTime)
|
||||||
|
sn.sendDurationSeconds.Add(duration.Seconds())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
// Successfully sent buf to bc.
|
// Successfully sent buf to bc.
|
||||||
sn.rowsSent.Add(br.rows)
|
sn.rowsSent.Add(br.rows)
|
||||||
|
@ -383,6 +386,10 @@ type storageNode struct {
|
||||||
// The number of rows rerouted to the given vmstorage node
|
// The number of rows rerouted to the given vmstorage node
|
||||||
// from other nodes when they were unhealthy.
|
// from other nodes when they were unhealthy.
|
||||||
rowsReroutedToHere *metrics.Counter
|
rowsReroutedToHere *metrics.Counter
|
||||||
|
|
||||||
|
// The total duration spent for sending data to vmstorage node.
|
||||||
|
// This metric is useful for determining the saturation of vminsert->vmstorage link.
|
||||||
|
sendDurationSeconds *metrics.FloatCounter
|
||||||
}
|
}
|
||||||
|
|
||||||
// storageNodes contains a list of vmstorage node clients.
|
// storageNodes contains a list of vmstorage node clients.
|
||||||
|
@ -418,6 +425,7 @@ func InitStorageNodes(addrs []string) {
|
||||||
rowsSent: metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_sent_total{name="vminsert", addr=%q}`, addr)),
|
rowsSent: metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_sent_total{name="vminsert", addr=%q}`, addr)),
|
||||||
rowsReroutedFromHere: metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_rerouted_from_here_total{name="vminsert", addr=%q}`, addr)),
|
rowsReroutedFromHere: metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_rerouted_from_here_total{name="vminsert", addr=%q}`, addr)),
|
||||||
rowsReroutedToHere: metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_rerouted_to_here_total{name="vminsert", addr=%q}`, addr)),
|
rowsReroutedToHere: metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_rerouted_to_here_total{name="vminsert", addr=%q}`, addr)),
|
||||||
|
sendDurationSeconds: metrics.NewFloatCounter(fmt.Sprintf(`vm_rpc_send_duration_seconds_total{name="vminsert", addr=%q}`, addr)),
|
||||||
}
|
}
|
||||||
sn.brCond = sync.NewCond(&sn.brLock)
|
sn.brCond = sync.NewCond(&sn.brLock)
|
||||||
_ = metrics.NewGauge(fmt.Sprintf(`vm_rpc_rows_pending{name="vminsert", addr=%q}`, addr), func() float64 {
|
_ = metrics.NewGauge(fmt.Sprintf(`vm_rpc_rows_pending{name="vminsert", addr=%q}`, addr), func() float64 {
|
||||||
|
|
|
@ -14,6 +14,7 @@ sort: 15
|
||||||
* FEATURE: add `-search.maxSamplesPerQuery` command-line flag for limiting the number of raw samples a single query can process across all the time series. This option can protect from heavy queries, which select too big number of raw samples. Thanks to @jiangxinlingdu for [the initial pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1478).
|
* FEATURE: add `-search.maxSamplesPerQuery` command-line flag for limiting the number of raw samples a single query can process across all the time series. This option can protect from heavy queries, which select too big number of raw samples. Thanks to @jiangxinlingdu for [the initial pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1478).
|
||||||
* FEATURE: improve performance for queries that process big number of time series and/or samples on systems with big number of CPU cores.
|
* FEATURE: improve performance for queries that process big number of time series and/or samples on systems with big number of CPU cores.
|
||||||
* FEATURE: vmalert: expose `vmalert_alerting_rules_last_evaluation_samples` and `vmalert_recording_rules_last_evaluation_samples` metrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1494).
|
* FEATURE: vmalert: expose `vmalert_alerting_rules_last_evaluation_samples` and `vmalert_recording_rules_last_evaluation_samples` metrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1494).
|
||||||
|
* FEATURE: vminsert: expose `vm_rpc_send_duration_seconds_total` counter, which can be used for determining high saturation of every `vminsert -> vmstorage` link with an alerting query `rate(vm_rpc_send_duration_seconds_total) > 0.9s`. This query triggers when the link is saturated by more than 90%.
|
||||||
|
|
||||||
* BUGFIX: fix corner cases for queries on time ranges exceeding 40 days. Previously some series can be missing in query results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1486).
|
* BUGFIX: fix corner cases for queries on time ranges exceeding 40 days. Previously some series can be missing in query results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1486).
|
||||||
* BUGFIX: vmselect: return dummy response at `/rules` page in the same way as for `/api/v1/rules` page. The `/rules` page is requested by Grafana 8. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1493) for details.
|
* BUGFIX: vmselect: return dummy response at `/rules` page in the same way as for `/api/v1/rules` page. The `/rules` page is requested by Grafana 8. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1493) for details.
|
||||||
|
|
Loading…
Reference in a new issue