mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-03-11 15:34:56 +00:00
app/vmagent: properly increase vmagent_remotewrite_samples_dropped_total when scraped samples cannot be sent to the remote storage and -remoteWrite.dropSamplesOnOverload is set
This is a follow-up for 5034aa0773
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2110
This commit is contained in:
parent
e36f29080d
commit
5ccc22d66d
3 changed files with 21 additions and 6 deletions
|
@ -37,7 +37,6 @@ import (
|
|||
opentsdbhttpserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
|
@ -140,9 +139,7 @@ func main() {
|
|||
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
|
||||
}
|
||||
|
||||
promscrape.Init(func(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
_ = remotewrite.TryPush(at, wr)
|
||||
})
|
||||
promscrape.Init(remotewrite.PushDropSamplesOnFailure)
|
||||
|
||||
if len(*httpListenAddr) > 0 {
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
|
|
|
@ -360,6 +360,16 @@ func Stop() {
|
|||
}
|
||||
}
|
||||
|
||||
// PushDropSamplesOnFailure pushes wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
|
||||
//
|
||||
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
|
||||
// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
|
||||
//
|
||||
// PushDropSamplesOnFailure can modify wr contents.
|
||||
func PushDropSamplesOnFailure(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
_ = tryPush(at, wr, true)
|
||||
}
|
||||
|
||||
// TryPush tries sending wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
|
||||
//
|
||||
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
|
||||
|
@ -370,6 +380,10 @@ func Stop() {
|
|||
//
|
||||
// The caller must return ErrQueueFullHTTPRetry to the client, which sends wr, if TryPush returns false.
|
||||
func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
|
||||
return tryPush(at, wr, *dropSamplesOnOverload)
|
||||
}
|
||||
|
||||
func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailure bool) bool {
|
||||
if at == nil && len(*remoteWriteMultitenantURLs) > 0 {
|
||||
// Write data to default tenant if at isn't set while -remoteWrite.multitenantURL is set.
|
||||
at = defaultAuthToken
|
||||
|
@ -404,7 +418,7 @@ func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
|
|||
for _, rwctx := range rwctxs {
|
||||
if rwctx.fq.IsWriteBlocked() {
|
||||
pushFailures.Inc()
|
||||
if *dropSamplesOnOverload {
|
||||
if dropSamplesOnFailure {
|
||||
// Just drop samples
|
||||
samplesDropped.Add(rowsCount)
|
||||
return true
|
||||
|
@ -462,7 +476,7 @@ func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
|
|||
logger.Panicf("BUG: tryPushBlockToRemoteStorages must return true if -remoteWrite.disableOnDiskQueue isn't set")
|
||||
}
|
||||
pushFailures.Inc()
|
||||
if *dropSamplesOnOverload {
|
||||
if dropSamplesOnFailure {
|
||||
samplesDropped.Add(rowsCount)
|
||||
return true
|
||||
}
|
||||
|
|
|
@ -891,6 +891,7 @@ When this flag is specified, `vmagent` works in the following way if the configu
|
|||
You can specify `-remoteWrite.dropSamplesOnOverload` command-line flag in order to drop the ingested samples instead of returning the error to clients in this case.
|
||||
- It suspends consuming data from [Kafka side](#reading-metrics-from-kafka) or [Google PubSub side](#pubsub-integration) until the remote storage becomes available.
|
||||
You can specify `-remoteWrite.dropSamplesOnOverload` command-line flag in order to drop the fetched samples instead of suspending data consumption from Kafka or Google PubSub.
|
||||
- It drops samples pushed to `vmagent` via non-HTTP protocols and logs the error. Pass `-remoteWrite.dropSamplesOnOverload` on order to suppress error messages in this case.
|
||||
- It drops samples [scraped from Prometheus-compatible targets](#how-to-collect-metrics-in-prometheus-format), because it is better to drop samples
|
||||
instead of blocking the scrape process.
|
||||
- It drops [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) output samples, because it is better to drop output samples
|
||||
|
@ -899,6 +900,9 @@ When this flag is specified, `vmagent` works in the following way if the configu
|
|||
The number of dropped samples because of overloaded remote storage can be [monitored](#monitoring) via `vmagent_remotewrite_samples_dropped_total` metric.
|
||||
The number of unsuccessful attempts to send data to overloaded remote storage can be [monitored](#monitoring) via `vmagent_remotewrite_push_failures_total` metric.
|
||||
|
||||
Running `vmagent` on hosts with more RAM or increasing the value for `-memory.allowedPercent` may reduce the number of unsuccessful attempts or dropped samples
|
||||
on spiky workloads, since `vmagent` may buffer more data in memory before returning the error or dropping data.
|
||||
|
||||
`vmagent` still may write pending in-memory data to `-remoteWrite.tmpDataPath` on graceful shutdown
|
||||
if `-remoteWrite.disableOnDiskQueue` command-line flag is specified. It may also read buffered data from `-remoteWrite.tmpDataPath`
|
||||
on startup.
|
||||
|
|
Loading…
Reference in a new issue