app/vmselect: stop /api/v1/export/* execution if client disconnects

This commit is contained in:
Aliaksandr Valialkin 2020-09-27 23:17:14 +03:00
parent aadbd014ff
commit db14f22fc0
5 changed files with 235 additions and 74 deletions

View file

@ -0,0 +1,86 @@
package bufferedwriter
import (
"bufio"
"fmt"
"io"
"sync"
)
// Get returns buffered writer for the given w.
//
// The writer must be returned to the pool after use by calling Put().
func Get(w io.Writer) *Writer {
v := writerPool.Get()
if v == nil {
v = &Writer{
// By default net/http.Server uses 4KB buffers, which are flushed to client with chunked responses.
// These buffers may result in visible overhead for responses exceeding a few megabytes.
// So allocate 64Kb buffers.
bw: bufio.NewWriterSize(w, 64*1024),
}
}
bw := v.(*Writer)
bw.bw.Reset(w)
return bw
}
// Put returns back bw to the pool.
//
// bw cannot be used after returning to the pool.
func Put(bw *Writer) {
bw.reset()
writerPool.Put(bw)
}
var writerPool sync.Pool
// Writer is buffered writer, which may be used in order to reduce overhead
// when sending moderately big responses to http server.
//
// Writer methods can be called from concurrently running goroutines.
// The writer remembers the first occurred error, which can be inspected with Error method.
type Writer struct {
lock sync.Mutex
bw *bufio.Writer
err error
}
func (bw *Writer) reset() {
bw.bw.Reset(nil)
bw.err = nil
}
// Write writes p to bw.
func (bw *Writer) Write(p []byte) (int, error) {
bw.lock.Lock()
defer bw.lock.Unlock()
if bw.err != nil {
return 0, bw.err
}
n, err := bw.bw.Write(p)
if err != nil {
bw.err = fmt.Errorf("cannot send %d bytes to client: %w", len(p), err)
}
return n, bw.err
}
// Flush flushes bw to the underlying writer.
func (bw *Writer) Flush() error {
bw.lock.Lock()
defer bw.lock.Unlock()
if bw.err != nil {
return bw.err
}
if err := bw.bw.Flush(); err != nil {
bw.err = fmt.Errorf("cannot flush data to client: %w", err)
}
return bw.err
}
// Error returns the first occurred error in bw.
func (bw *Writer) Error() error {
bw.lock.Lock()
defer bw.lock.Unlock()
return bw.err
}

View file

@ -9,6 +9,7 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/bufferedwriter"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
@ -92,7 +93,12 @@ func MetricsFindHandler(startTime time.Time, at *auth.Token, w http.ResponseWrit
contentType = "text/javascript"
}
w.Header().Set("Content-Type", contentType)
WriteMetricsFindResponse(w, paths, delimiter, format, wildcards, jsonp)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteMetricsFindResponse(bw, paths, delimiter, format, wildcards, jsonp)
if err := bw.Flush(); err != nil {
return err
}
metricsFindDuration.UpdateDuration(startTime)
return nil
}
@ -188,7 +194,12 @@ func MetricsExpandHandler(startTime time.Time, at *auth.Token, w http.ResponseWr
}
}
sortPaths(paths, delimiter)
WriteMetricsExpandResponseFlat(w, paths, jsonp)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteMetricsExpandResponseFlat(bw, paths, jsonp)
if err := bw.Flush(); err != nil {
return err
}
metricsExpandDuration.UpdateDuration(startTime)
return nil
}
@ -214,7 +225,12 @@ func MetricsIndexHandler(startTime time.Time, at *auth.Token, w http.ResponseWri
contentType = "text/javascript"
}
w.Header().Set("Content-Type", contentType)
WriteMetricsIndexResponse(w, metricNames, jsonp)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteMetricsIndexResponse(bw, metricNames, jsonp)
if err := bw.Flush(); err != nil {
return err
}
metricsIndexDuration.UpdateDuration(startTime)
return nil
}

View file

@ -10,6 +10,7 @@ import (
"sort"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils"
@ -74,10 +75,11 @@ func (rss *Results) Cancel() {
var timeseriesWorkCh = make(chan *timeseriesWork, gomaxprocs*16)
type timeseriesWork struct {
rss *Results
pts *packedTimeseries
f func(rs *Result, workerID uint)
doneCh chan error
mustStop uint64
rss *Results
pts *packedTimeseries
f func(rs *Result, workerID uint) error
doneCh chan error
rowsProcessed int
}
@ -97,12 +99,19 @@ func timeseriesWorker(workerID uint) {
tsw.doneCh <- fmt.Errorf("timeout exceeded during query execution: %s", rss.deadline.String())
continue
}
if atomic.LoadUint64(&tsw.mustStop) != 0 {
tsw.doneCh <- nil
continue
}
if err := tsw.pts.Unpack(rss.tbf, &rs, rss.tr, rss.fetchData, rss.at); err != nil {
tsw.doneCh <- fmt.Errorf("error during time series unpacking: %w", err)
continue
}
if len(rs.Timestamps) > 0 || !rss.fetchData {
tsw.f(&rs, workerID)
if err := tsw.f(&rs, workerID); err != nil {
tsw.doneCh <- err
continue
}
}
tsw.rowsProcessed = len(rs.Values)
tsw.doneCh <- nil
@ -119,9 +128,10 @@ func timeseriesWorker(workerID uint) {
//
// f shouldn't hold references to rs after returning.
// workerID is the id of the worker goroutine that calls f.
// Data processing is immediately stopped if f returns non-nil error.
//
// rss becomes unusable after the call to RunParallel.
func (rss *Results) RunParallel(f func(rs *Result, workerID uint)) error {
func (rss *Results) RunParallel(f func(rs *Result, workerID uint) error) error {
defer func() {
putTmpBlocksFile(rss.tbf)
rss.tbf = nil
@ -150,6 +160,10 @@ func (rss *Results) RunParallel(f func(rs *Result, workerID uint)) error {
// Return just the first error, since other errors
// are likely duplicate the first error.
firstErr = err
// Notify all the the tsws that they shouldn't be executed.
for _, tsw := range tsws {
atomic.StoreUint64(&tsw.mustStop, 1)
}
}
rowsProcessedTotal += tsw.rowsProcessed
}
@ -1003,7 +1017,7 @@ var metricNamePool = &sync.Pool{
// ExportBlocks searches for time series matching sq and calls f for each found block.
//
// f is called in parallel from multiple goroutines.
// the process is stopped if f return non-nil error.
// Data processing is immediately stopped if f returns non-nil error.
// It is the responsibility of f to call b.UnmarshalData before reading timestamps and values from the block.
// It is the responsibility of f to filter blocks according to the given tr.
func ExportBlocks(at *auth.Token, sq *storage.SearchQuery, deadline searchutils.Deadline, f func(mn *storage.MetricName, b *storage.Block, tr storage.TimeRange) error) (bool, error) {

View file

@ -1,7 +1,6 @@
package prometheus
import (
"bufio"
"flag"
"fmt"
"math"
@ -12,6 +11,7 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/bufferedwriter"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils"
@ -46,11 +46,6 @@ var (
// Default step used if not set.
const defaultStep = 5 * 60 * 1000
// Buffer size for big responses (i.e. /federate and /api/v1/export/* )
// By default net/http.Server uses 4KB buffers, which are flushed to client with chunked responses.
// These buffers may result in visible overhead for responses exceeding tens of megabytes.
const bigResponseBufferSize = 128 * 1024
// FederateHandler implements /federate . See https://prometheus.io/docs/prometheus/latest/federation/
func FederateHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error {
ct := startTime.UnixNano() / 1e6
@ -100,30 +95,25 @@ func FederateHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter,
return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
}
resultsCh := make(chan *quicktemplate.ByteBuffer)
doneCh := make(chan error)
go func() {
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
bb := quicktemplate.AcquireByteBuffer()
WriteFederate(bb, rs)
resultsCh <- bb
})
close(resultsCh)
doneCh <- err
}()
w.Header().Set("Content-Type", "text/plain")
bw := bufio.NewWriterSize(w, bigResponseBufferSize)
for bb := range resultsCh {
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) error {
if err := bw.Error(); err != nil {
return err
}
bb := quicktemplate.AcquireByteBuffer()
WriteFederate(bb, rs)
bw.Write(bb.B)
quicktemplate.ReleaseByteBuffer(bb)
}
_ = bw.Flush()
err = <-doneCh
return nil
})
if err != nil {
return fmt.Errorf("error during data fetching: %w", err)
}
if err := bw.Flush(); err != nil {
return err
}
federateDuration.UpdateDuration(startTime)
return nil
}
@ -166,7 +156,8 @@ func ExportNativeHandler(startTime time.Time, at *auth.Token, w http.ResponseWri
TagFilterss: tagFilterss,
}
w.Header().Set("Content-Type", "VictoriaMetrics/native")
bw := bufio.NewWriterSize(w, bigResponseBufferSize)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
// Marshal tr
trBuf := make([]byte, 0, 16)
@ -174,8 +165,11 @@ func ExportNativeHandler(startTime time.Time, at *auth.Token, w http.ResponseWri
trBuf = encoding.MarshalInt64(trBuf, end)
bw.Write(trBuf)
var bwLock sync.Mutex
// Marshal native blocks.
isPartial, err := netstorage.ExportBlocks(at, sq, deadline, func(mn *storage.MetricName, b *storage.Block, tr storage.TimeRange) error {
if err := bw.Error(); err != nil {
return err
}
dstBuf := bbPool.Get()
tmpBuf := bbPool.Get()
dst := dstBuf.B
@ -194,24 +188,27 @@ func ExportNativeHandler(startTime time.Time, at *auth.Token, w http.ResponseWri
tmpBuf.B = tmp
bbPool.Put(tmpBuf)
bwLock.Lock()
_, err := bw.Write(dst)
bwLock.Unlock()
if err != nil {
return fmt.Errorf("cannot write data to client: %w", err)
}
bw.Write(dst)
dstBuf.B = dst
bbPool.Put(dstBuf)
return nil
})
_ = bw.Flush()
if err == nil && isPartial && searchutils.GetDenyPartialResponse(r) {
err = fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
}
return err
if err != nil {
return err
}
if err := bw.Flush(); err != nil {
return err
}
exportNativeDuration.UpdateDuration(startTime)
return nil
}
var exportNativeDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/export/native"}`)
var bbPool bytesutil.ByteBufferPool
// ExportHandler exports data in raw format from /api/v1/export.
@ -318,6 +315,10 @@ func exportHandler(at *auth.Token, w http.ResponseWriter, r *http.Request, match
MaxTimestamp: end,
TagFilterss: tagFilterss,
}
w.Header().Set("Content-Type", contentType)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
resultsCh := make(chan *quicktemplate.ByteBuffer, runtime.GOMAXPROCS(-1))
doneCh := make(chan error)
if !reduceMemUsage {
@ -330,7 +331,10 @@ func exportHandler(at *auth.Token, w http.ResponseWriter, r *http.Request, match
return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
}
go func() {
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) error {
if err := bw.Error(); err != nil {
return err
}
xb := exportBlockPool.Get().(*exportBlock)
xb.mn = &rs.MetricName
xb.timestamps = rs.Timestamps
@ -338,6 +342,7 @@ func exportHandler(at *auth.Token, w http.ResponseWriter, r *http.Request, match
writeLineFunc(xb, resultsCh)
xb.reset()
exportBlockPool.Put(xb)
return nil
})
close(resultsCh)
doneCh <- err
@ -345,6 +350,9 @@ func exportHandler(at *auth.Token, w http.ResponseWriter, r *http.Request, match
} else {
go func() {
isPartial, err := netstorage.ExportBlocks(at, sq, deadline, func(mn *storage.MetricName, b *storage.Block, tr storage.TimeRange) error {
if err := bw.Error(); err != nil {
return err
}
if err := b.UnmarshalData(); err != nil {
return fmt.Errorf("cannot unmarshal block during export: %s", err)
}
@ -366,15 +374,10 @@ func exportHandler(at *auth.Token, w http.ResponseWriter, r *http.Request, match
}()
}
w.Header().Set("Content-Type", contentType)
bw := bufio.NewWriterSize(w, bigResponseBufferSize)
// writeResponseFunc must consume all the data from resultsCh.
writeResponseFunc(bw, resultsCh)
_ = bw.Flush()
// Consume all the data from resultsCh in the event writeResponseFunc
// fails to consume all the data.
for bb := range resultsCh {
quicktemplate.ReleaseByteBuffer(bb)
if err := bw.Flush(); err != nil {
return err
}
err = <-doneCh
if err != nil {
@ -517,7 +520,12 @@ func LabelValuesHandler(startTime time.Time, at *auth.Token, labelName string, w
}
w.Header().Set("Content-Type", "application/json")
WriteLabelValuesResponse(w, labelValues)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteLabelValuesResponse(bw, labelValues)
if err := bw.Flush(); err != nil {
return err
}
labelValuesDuration.UpdateDuration(startTime)
return nil
}
@ -560,14 +568,15 @@ func labelValuesWithMatches(at *auth.Token, labelName string, matches []string,
m := make(map[string]struct{})
var mLock sync.Mutex
err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) error {
labelValue := rs.MetricName.GetTagValue(labelName)
if len(labelValue) == 0 {
return
return nil
}
mLock.Lock()
m[string(labelValue)] = struct{}{}
mLock.Unlock()
return nil
})
if err != nil {
return nil, false, fmt.Errorf("error when data fetching: %w", err)
@ -594,7 +603,12 @@ func LabelsCountHandler(startTime time.Time, at *auth.Token, w http.ResponseWrit
return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
}
w.Header().Set("Content-Type", "application/json")
WriteLabelsCountResponse(w, labelEntries)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteLabelsCountResponse(bw, labelEntries)
if err := bw.Flush(); err != nil {
return err
}
labelsCountDuration.UpdateDuration(startTime)
return nil
}
@ -643,7 +657,12 @@ func TSDBStatusHandler(startTime time.Time, at *auth.Token, w http.ResponseWrite
return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
}
w.Header().Set("Content-Type", "application/json")
WriteTSDBStatusResponse(w, status)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteTSDBStatusResponse(bw, status)
if err := bw.Flush(); err != nil {
return err
}
tsdbStatusDuration.UpdateDuration(startTime)
return nil
}
@ -692,7 +711,12 @@ func LabelsHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r
}
w.Header().Set("Content-Type", "application/json")
WriteLabelsResponse(w, labels)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteLabelsResponse(bw, labels)
if err := bw.Flush(); err != nil {
return err
}
labelsDuration.UpdateDuration(startTime)
return nil
}
@ -722,7 +746,7 @@ func labelsWithMatches(at *auth.Token, matches []string, start, end int64, deadl
m := make(map[string]struct{})
var mLock sync.Mutex
err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) error {
mLock.Lock()
tags := rs.MetricName.Tags
for i := range tags {
@ -731,6 +755,7 @@ func labelsWithMatches(at *auth.Token, matches []string, start, end int64, deadl
}
m["__name__"] = struct{}{}
mLock.Unlock()
return nil
})
if err != nil {
return nil, false, fmt.Errorf("error when data fetching: %w", err)
@ -758,7 +783,12 @@ func SeriesCountHandler(startTime time.Time, at *auth.Token, w http.ResponseWrit
}
w.Header().Set("Content-Type", "application/json")
WriteSeriesCountResponse(w, n)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteSeriesCountResponse(bw, n)
if err := bw.Flush(); err != nil {
return err
}
seriesCountDuration.UpdateDuration(startTime)
return nil
}
@ -815,25 +845,28 @@ func SeriesHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r
return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
}
w.Header().Set("Content-Type", "application/json")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
resultsCh := make(chan *quicktemplate.ByteBuffer)
doneCh := make(chan error)
go func() {
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) error {
if err := bw.Error(); err != nil {
return err
}
bb := quicktemplate.AcquireByteBuffer()
writemetricNameObject(bb, &rs.MetricName)
resultsCh <- bb
return nil
})
close(resultsCh)
doneCh <- err
}()
w.Header().Set("Content-Type", "application/json")
WriteSeriesResponse(w, resultsCh)
// Consume all the data from resultsCh in the event WriteSeriesResponse
// fails to consume all the data.
for bb := range resultsCh {
quicktemplate.ReleaseByteBuffer(bb)
// WriteSeriesResponse must consume all the data from resultsCh.
WriteSeriesResponse(bw, resultsCh)
if err := bw.Flush(); err != nil {
return err
}
err = <-doneCh
if err != nil {
@ -953,7 +986,12 @@ func QueryHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r
}
w.Header().Set("Content-Type", "application/json")
WriteQueryResponse(w, result)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteQueryResponse(bw, result)
if err := bw.Flush(); err != nil {
return err
}
queryDuration.UpdateDuration(startTime)
return nil
}
@ -1050,7 +1088,12 @@ func queryRangeHandler(startTime time.Time, at *auth.Token, w http.ResponseWrite
result = removeEmptyValuesAndTimeseries(result)
w.Header().Set("Content-Type", "application/json")
WriteQueryRangeResponse(w, result)
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteQueryRangeResponse(bw, result)
if err := bw.Flush(); err != nil {
return err
}
return nil
}

View file

@ -755,7 +755,7 @@ func getRollupMemoryLimiter() *memoryLimiter {
func evalRollupWithIncrementalAggregate(name string, iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig,
preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) error {
preFunc(rs.Values, rs.Timestamps)
ts := getTimeseries()
defer putTimeseries(ts)
@ -775,6 +775,7 @@ func evalRollupWithIncrementalAggregate(name string, iafc *incrementalAggrFuncCo
ts.Timestamps = nil
ts.denyReuse = false
}
return nil
})
if err != nil {
return nil, err
@ -787,7 +788,7 @@ func evalRollupNoIncrementalAggregate(name string, rss *netstorage.Results, rcs
preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
tss := make([]*timeseries, 0, rss.Len()*len(rcs))
var tssLock sync.Mutex
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) error {
preFunc(rs.Values, rs.Timestamps)
for _, rc := range rcs {
if tsm := newTimeseriesMap(name, sharedTimestamps, &rs.MetricName); tsm != nil {
@ -803,6 +804,7 @@ func evalRollupNoIncrementalAggregate(name string, rss *netstorage.Results, rcs
tss = append(tss, &ts)
tssLock.Unlock()
}
return nil
})
if err != nil {
return nil, err