app/vmagent: properly increase vmagent_remotewrite_samples_dropped_total when scraped samples cannot be sent to the remote storage and -remoteWrite.dropSamplesOnOverload is set

This is a follow-up for 5034aa0773
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2110
This commit is contained in:
Aliaksandr Valialkin 2023-11-25 14:42:37 +02:00
parent e36f29080d
commit 5ccc22d66d
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
3 changed files with 21 additions and 6 deletions

View file

@ -37,7 +37,6 @@ import (
opentsdbhttpserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
@ -140,9 +139,7 @@ func main() {
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
}
promscrape.Init(func(at *auth.Token, wr *prompbmarshal.WriteRequest) {
_ = remotewrite.TryPush(at, wr)
})
promscrape.Init(remotewrite.PushDropSamplesOnFailure)
if len(*httpListenAddr) > 0 {
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)

View file

@ -360,6 +360,16 @@ func Stop() {
}
}
// PushDropSamplesOnFailure pushes wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
//
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
//
// PushDropSamplesOnFailure can modify wr contents.
func PushDropSamplesOnFailure(at *auth.Token, wr *prompbmarshal.WriteRequest) {
_ = tryPush(at, wr, true)
}
// TryPush tries sending wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
//
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
@ -370,6 +380,10 @@ func Stop() {
//
// The caller must return ErrQueueFullHTTPRetry to the client, which sends wr, if TryPush returns false.
func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
return tryPush(at, wr, *dropSamplesOnOverload)
}
func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailure bool) bool {
if at == nil && len(*remoteWriteMultitenantURLs) > 0 {
// Write data to default tenant if at isn't set while -remoteWrite.multitenantURL is set.
at = defaultAuthToken
@ -404,7 +418,7 @@ func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
for _, rwctx := range rwctxs {
if rwctx.fq.IsWriteBlocked() {
pushFailures.Inc()
if *dropSamplesOnOverload {
if dropSamplesOnFailure {
// Just drop samples
samplesDropped.Add(rowsCount)
return true
@ -462,7 +476,7 @@ func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
logger.Panicf("BUG: tryPushBlockToRemoteStorages must return true if -remoteWrite.disableOnDiskQueue isn't set")
}
pushFailures.Inc()
if *dropSamplesOnOverload {
if dropSamplesOnFailure {
samplesDropped.Add(rowsCount)
return true
}

View file

@ -891,6 +891,7 @@ When this flag is specified, `vmagent` works in the following way if the configu
You can specify `-remoteWrite.dropSamplesOnOverload` command-line flag in order to drop the ingested samples instead of returning the error to clients in this case.
- It suspends consuming data from [Kafka side](#reading-metrics-from-kafka) or [Google PubSub side](#pubsub-integration) until the remote storage becomes available.
You can specify `-remoteWrite.dropSamplesOnOverload` command-line flag in order to drop the fetched samples instead of suspending data consumption from Kafka or Google PubSub.
- It drops samples pushed to `vmagent` via non-HTTP protocols and logs the error. Pass `-remoteWrite.dropSamplesOnOverload` on order to suppress error messages in this case.
- It drops samples [scraped from Prometheus-compatible targets](#how-to-collect-metrics-in-prometheus-format), because it is better to drop samples
instead of blocking the scrape process.
- It drops [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) output samples, because it is better to drop output samples
@ -899,6 +900,9 @@ When this flag is specified, `vmagent` works in the following way if the configu
The number of dropped samples because of overloaded remote storage can be [monitored](#monitoring) via `vmagent_remotewrite_samples_dropped_total` metric.
The number of unsuccessful attempts to send data to overloaded remote storage can be [monitored](#monitoring) via `vmagent_remotewrite_push_failures_total` metric.
Running `vmagent` on hosts with more RAM or increasing the value for `-memory.allowedPercent` may reduce the number of unsuccessful attempts or dropped samples
on spiky workloads, since `vmagent` may buffer more data in memory before returning the error or dropping data.
`vmagent` still may write pending in-memory data to `-remoteWrite.tmpDataPath` on graceful shutdown
if `-remoteWrite.disableOnDiskQueue` command-line flag is specified. It may also read buffered data from `-remoteWrite.tmpDataPath`
on startup.