From 63e0f16062c32a601a197d27ca6006077ba9bd8c Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Mon, 15 Aug 2022 12:38:47 +0200 Subject: [PATCH] vmselect: introduce UserReadableError type of error (#2894) When read query fails, VM returns rich error message with all the details. While these details might be useful for debugging specific cases, they're usually too verbose for users. Introducing a new error type `UserReadableError` is supposed to allow to return to user only the most important parts of the error trace. This supposed to improve error readability in web interfaces such as VMUI or Grafana. The full error trace is still logged with the full context and can be found in vmselect logs. Signed-off-by: hagen1778 Signed-off-by: hagen1778 --- app/vmselect/main.go | 7 +++++++ app/vmselect/prometheus/prometheus.go | 2 +- app/vmselect/promql/eval.go | 18 +++++++++--------- app/vmselect/promql/exec.go | 13 ++++++++++++- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/app/vmselect/main.go b/app/vmselect/main.go index 96c347f774..0af87b18f1 100644 --- a/app/vmselect/main.go +++ b/app/vmselect/main.go @@ -500,6 +500,13 @@ func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) { statusCode = esc.StatusCode } w.WriteHeader(statusCode) + + var ure promql.UserReadableError + if errors.As(err, &ure) { + prometheus.WriteErrorResponse(w, statusCode, ure.Err) + return + } + prometheus.WriteErrorResponse(w, statusCode, err) } diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index 46255c7255..e85a094c49 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -839,7 +839,7 @@ func queryRangeHandler(qt *querytracer.Tracer, startTime time.Time, w http.Respo } result, err := promql.Exec(qt, &ec, query, false) if err != nil { - return fmt.Errorf("cannot execute query: %w", err) + return err } if step < maxStepForPointsAdjustment.Milliseconds() { queryOffset := getLatencyOffsetMilliseconds() diff --git a/app/vmselect/promql/eval.go b/app/vmselect/promql/eval.go index 4650d8c7a1..f21b404307 100644 --- a/app/vmselect/promql/eval.go +++ b/app/vmselect/promql/eval.go @@ -294,7 +294,7 @@ func evalExprInternal(qt *querytracer.Tracer, ec *EvalConfig, e metricsql.Expr) func evalTransformFunc(qt *querytracer.Tracer, ec *EvalConfig, fe *metricsql.FuncExpr) ([]*timeseries, error) { tf := getTransformFunc(fe.Name) if tf == nil { - return nil, fmt.Errorf(`unknown func %q`, fe.Name) + return nil, UserReadableError{Err: fmt.Errorf(`unknown func %q`, fe.Name)} } args, err := evalExprs(qt, ec, fe.Args) if err != nil { @@ -336,7 +336,7 @@ func evalAggrFunc(qt *querytracer.Tracer, ec *EvalConfig, ae *metricsql.AggrFunc } af := getAggrFunc(ae.Name) if af == nil { - return nil, fmt.Errorf(`unknown func %q`, ae.Name) + return nil, UserReadableError{Err: fmt.Errorf(`unknown func %q`, ae.Name)} } afa := &aggrFuncArg{ ae: ae, @@ -679,10 +679,10 @@ func evalRollupFunc(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf } tssAt, err := evalExpr(qt, ec, re.At) if err != nil { - return nil, fmt.Errorf("cannot evaluate `@` modifier: %w", err) + return nil, UserReadableError{Err: fmt.Errorf("cannot evaluate `@` modifier: %w", err)} } if len(tssAt) != 1 { - return nil, fmt.Errorf("`@` modifier must return a single series; it returns %d series instead", len(tssAt)) + return nil, UserReadableError{Err: fmt.Errorf("`@` modifier must return a single series; it returns %d series instead", len(tssAt))} } atTimestamp := int64(tssAt[0].Values[0] * 1000) ecNew := copyEvalConfig(ec) @@ -742,7 +742,7 @@ func evalRollupFuncWithoutAt(qt *querytracer.Tracer, ec *EvalConfig, funcName st rvs, err = evalRollupFuncWithSubquery(qt, ecNew, funcName, rf, expr, re) } if err != nil { - return nil, err + return nil, UserReadableError{Err: err} } if funcName == "absent_over_time" { rvs = aggregateAbsentOverTime(ec, re.Expr, rvs) @@ -964,7 +964,7 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa sq := storage.NewSearchQuery(minTimestamp, ec.End, tfss, ec.MaxSeries) rss, err := netstorage.ProcessSearchQuery(qt, sq, ec.Deadline) if err != nil { - return nil, err + return nil, UserReadableError{Err: err} } rssLen := rss.Len() if rssLen == 0 { @@ -1000,12 +1000,12 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa rml := getRollupMemoryLimiter() if !rml.Get(uint64(rollupMemorySize)) { rss.Cancel() - return nil, fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+ + return nil, UserReadableError{Err: fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+ "total available memory for concurrent requests: %d bytes; "+ "requested memory: %d bytes; "+ "possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+ "increasing -memory.allowedPercent; increasing `step` query arg (%gs)", - rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3) + rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3)} } defer rml.Put(uint64(rollupMemorySize)) @@ -1018,7 +1018,7 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa tss, err = evalRollupNoIncrementalAggregate(qt, funcName, keepMetricNames, rss, rcs, preFunc, sharedTimestamps) } if err != nil { - return nil, err + return nil, UserReadableError{Err: err} } tss = mergeTimeseries(tssCached, tss, start, ec) rollupResultCacheV.Put(qt, ec, expr, window, tss) diff --git a/app/vmselect/promql/exec.go b/app/vmselect/promql/exec.go index 3a460ab8e8..d8d2fe1c99 100644 --- a/app/vmselect/promql/exec.go +++ b/app/vmselect/promql/exec.go @@ -26,6 +26,17 @@ var ( `This option is DEPRECATED in favor of {__graphite__="a.*.c"} syntax for selecting metrics matching the given Graphite metrics filter`) ) +// UserReadableError is a type of error which supposed +// to be returned to the user without additional context. +type UserReadableError struct { + Err error +} + +// Error satisfies Error interface +func (ure UserReadableError) Error() string { + return ure.Err.Error() +} + // Exec executes q for the given ec. func Exec(qt *querytracer.Tracer, ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result, error) { if querystats.Enabled() { @@ -73,7 +84,7 @@ func Exec(qt *querytracer.Tracer, ec *EvalConfig, q string, isFirstPointOnly boo } qt.Printf("round series values to %d decimal digits after the point", n) } - return result, err + return result, nil } func maySortResults(e metricsql.Expr, tss []*timeseries) bool {