mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-02-19 15:30:17 +00:00
app/vmselect: properly cancel long running requests on client connection close
At this time `bufferedwriter` [silently ignores connection close errors](78eaa056c0/lib/bufferedwriter/bufferedwriter.go (L67)
). It may be very convenient in some situations (to not log such unimportant errors), but it's too implicit and unsafe for the others. For example, if you close [export API](https://docs.victoriametrics.com/#how-to-export-time-series) client connection in the middle of communication, VictoriaMetrics won't notice it and will start to hog CPU by exporting all the data into nowhere until it process all of them. If you'll make a few retries, it will be effectively a DoS on the server. This commit replaces this implicit error suppressing with explicit error handling which fixes the issue with export API. Issue was introduced ate78f3ac8ac
This commit is contained in:
parent
34204cc597
commit
17fd53bd37
3 changed files with 40 additions and 17 deletions
|
@ -29,6 +29,7 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
)
|
||||
|
@ -142,10 +143,13 @@ func FederateHandler(startTime time.Time, w http.ResponseWriter, r *http.Request
|
|||
WriteFederate(bb, rs)
|
||||
return sw.maybeFlushBuffer(bb)
|
||||
})
|
||||
if err != nil {
|
||||
if err == nil {
|
||||
err = sw.flush()
|
||||
}
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||
return fmt.Errorf("error during sending data to remote client: %w", err)
|
||||
}
|
||||
return sw.flush()
|
||||
return nil
|
||||
}
|
||||
|
||||
var federateDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/federate"}`)
|
||||
|
@ -226,10 +230,13 @@ func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Reques
|
|||
}()
|
||||
}
|
||||
err = <-doneCh
|
||||
if err != nil {
|
||||
if err == nil {
|
||||
err = sw.flush()
|
||||
}
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||
return fmt.Errorf("error during sending the exported csv data to remote client: %w", err)
|
||||
}
|
||||
return sw.flush()
|
||||
return nil
|
||||
}
|
||||
|
||||
var exportCSVDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/export/csv"}`)
|
||||
|
@ -281,10 +288,13 @@ func ExportNativeHandler(startTime time.Time, w http.ResponseWriter, r *http.Req
|
|||
bb.B = dst
|
||||
return sw.maybeFlushBuffer(bb)
|
||||
})
|
||||
if err != nil {
|
||||
if err == nil {
|
||||
err = sw.flush()
|
||||
}
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||
return fmt.Errorf("error during sending native data to remote client: %w", err)
|
||||
}
|
||||
return sw.flush()
|
||||
return nil
|
||||
}
|
||||
|
||||
var exportNativeDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/export/native"}`)
|
||||
|
@ -441,16 +451,19 @@ func exportHandler(qt *querytracer.Tracer, w http.ResponseWriter, cp *commonPara
|
|||
}()
|
||||
}
|
||||
err := <-doneCh
|
||||
if err != nil {
|
||||
if err == nil {
|
||||
err = sw.flush()
|
||||
}
|
||||
if err == nil {
|
||||
if format == "promapi" {
|
||||
WriteExportPromAPIFooter(bw, qt)
|
||||
}
|
||||
err = bw.Flush()
|
||||
}
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||
return fmt.Errorf("cannot send data to remote client: %w", err)
|
||||
}
|
||||
if err := sw.flush(); err != nil {
|
||||
return fmt.Errorf("cannot send data to remote client: %w", err)
|
||||
}
|
||||
if format == "promapi" {
|
||||
WriteExportPromAPIFooter(bw, qt)
|
||||
}
|
||||
return bw.Flush()
|
||||
return nil
|
||||
}
|
||||
|
||||
type exportBlock struct {
|
||||
|
|
|
@ -51,7 +51,7 @@ Released at 2025-01-24
|
|||
|
||||
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/metricsql/): allow executing queries with `$__interval` and `$__rate_interval` - these placeholders are automatically replaced with `1i` (e.g. `step` arg value at [`/api/v1/query_range`](https://docs.victoriametrics.com/keyconcepts/#range-query)) during query execution. This simplifies copying queries from Grafana dashboards.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): add command-line flag `-search.maxDeleteDuration(default 5m)` to limit the duration of the `/api/v1/admin/tsdb/delete_series` call. Previously, the call is limited by `-search.maxQueryDuration`.
|
||||
* FEATURE: [dashboards](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards): all dashboards that use [VictoriaMetrics Grafana datasource](https://github.com/VictoriaMetrics/victoriametrics-datasource) were updated to use a [new datasource ID](https://github.com/VictoriaMetrics/victoriametrics-datasource/releases/tag/v0.12.0).
|
||||
* FEATURE: [dashboards](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards): all dashboards that use [VictoriaMetrics Grafana datasource](https://github.com/VictoriaMetrics/victoriametrics-datasource) were updated to use a [new datasource ID](https://github.com/VictoriaMetrics/victoriametrics-datasource/releases/tag/v0.12.0).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): reflect column settings for the table view in URL, so the table view can be shared via link. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7662).
|
||||
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth/): fix possible runtime panic during requests processing under heavy load. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051) for details.
|
||||
|
@ -61,6 +61,7 @@ Released at 2025-01-24
|
|||
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and [vmselect](https://docs.victoriametrics.com/cluster-victoriametrics/): respect staleness detection in increase, increase_pure and delta functions when time series has gaps and `-search.maxStalenessInterval` is set. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8072) for details.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/), `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/) and [vmagent](https://docs.victoriametrics.com/vmagent/): allow ingesting histograms with missing `_sum` metric via [OpenTelemetry ingestion protocol](https://docs.victoriametrics.com/#sending-data-via-opentelemetry) in the same way as Prometheus does.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix an issue where pressing the "Enter" key in the query editor did not execute the query. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8058).
|
||||
* BUGFIX: [export API](https://docs.victoriametrics.com/#how-to-export-time-series): cancel export process on client connection close. Previously client connection close was ignored and VictoriaMetrics started to hog CPU by exporting metrics to nowhere until it export all of them.
|
||||
|
||||
## [v1.102.11](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.11)
|
||||
|
||||
|
|
|
@ -64,21 +64,30 @@ func (bw *Writer) Write(p []byte) (int, error) {
|
|||
return 0, bw.err
|
||||
}
|
||||
n, err := bw.bw.Write(p)
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||
if err != nil {
|
||||
bw.err = fmt.Errorf("cannot send %d bytes to client: %w", len(p), err)
|
||||
}
|
||||
return n, bw.err
|
||||
}
|
||||
|
||||
// Flush flushes bw to the underlying writer.
|
||||
//
|
||||
// Connection close errors are ignored to not trigger on them and to not write to logs, but Write method doesn't ignore
|
||||
// them since it may lead to an unexpected behaviour (see https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8157)
|
||||
func (bw *Writer) Flush() error {
|
||||
bw.lock.Lock()
|
||||
defer bw.lock.Unlock()
|
||||
if bw.err != nil {
|
||||
if netutil.IsTrivialNetworkError(bw.err) {
|
||||
return nil
|
||||
}
|
||||
return bw.err
|
||||
}
|
||||
if err := bw.bw.Flush(); err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||
if err := bw.bw.Flush(); err != nil {
|
||||
bw.err = fmt.Errorf("cannot flush data to client: %w", err)
|
||||
if netutil.IsTrivialNetworkError(err) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return bw.err
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue