vmselect: introduce UserReadableError type of error (#2894)

When read query fails, VM returns rich error message with
all the details. While these details might be useful
for debugging specific cases, they're usually too verbose
for users.
Introducing a new error type `UserReadableError` is supposed
to allow to return to user only the most important parts
of the error trace. This supposed to improve error readability
in web interfaces such as VMUI or Grafana.

The full error trace is still logged with the full context
and can be found in vmselect logs.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2022-08-15 12:38:47 +02:00 committed by Aliaksandr Valialkin
parent 79e396dac9
commit 8a26ec435d
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
4 changed files with 29 additions and 11 deletions

View file

@ -626,6 +626,13 @@ func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
statusCode = esc.StatusCode
}
w.WriteHeader(statusCode)
var ure promql.UserReadableError
if errors.As(err, &ure) {
prometheus.WriteErrorResponse(w, statusCode, ure.Err)
return
}
prometheus.WriteErrorResponse(w, statusCode, err)
}

View file

@ -910,7 +910,7 @@ func queryRangeHandler(qt *querytracer.Tracer, startTime time.Time, at *auth.Tok
}
result, err := promql.Exec(qt, &ec, query, false)
if err != nil {
return fmt.Errorf("cannot execute query: %w", err)
return err
}
if step < maxStepForPointsAdjustment.Milliseconds() {
queryOffset := getLatencyOffsetMilliseconds()

View file

@ -311,7 +311,7 @@ func evalExprInternal(qt *querytracer.Tracer, ec *EvalConfig, e metricsql.Expr)
func evalTransformFunc(qt *querytracer.Tracer, ec *EvalConfig, fe *metricsql.FuncExpr) ([]*timeseries, error) {
tf := getTransformFunc(fe.Name)
if tf == nil {
return nil, fmt.Errorf(`unknown func %q`, fe.Name)
return nil, UserReadableError{Err: fmt.Errorf(`unknown func %q`, fe.Name)}
}
args, err := evalExprs(qt, ec, fe.Args)
if err != nil {
@ -353,7 +353,7 @@ func evalAggrFunc(qt *querytracer.Tracer, ec *EvalConfig, ae *metricsql.AggrFunc
}
af := getAggrFunc(ae.Name)
if af == nil {
return nil, fmt.Errorf(`unknown func %q`, ae.Name)
return nil, UserReadableError{Err: fmt.Errorf(`unknown func %q`, ae.Name)}
}
afa := &aggrFuncArg{
ae: ae,
@ -696,10 +696,10 @@ func evalRollupFunc(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf
}
tssAt, err := evalExpr(qt, ec, re.At)
if err != nil {
return nil, fmt.Errorf("cannot evaluate `@` modifier: %w", err)
return nil, UserReadableError{Err: fmt.Errorf("cannot evaluate `@` modifier: %w", err)}
}
if len(tssAt) != 1 {
return nil, fmt.Errorf("`@` modifier must return a single series; it returns %d series instead", len(tssAt))
return nil, UserReadableError{Err: fmt.Errorf("`@` modifier must return a single series; it returns %d series instead", len(tssAt))}
}
atTimestamp := int64(tssAt[0].Values[0] * 1000)
ecNew := copyEvalConfig(ec)
@ -759,7 +759,7 @@ func evalRollupFuncWithoutAt(qt *querytracer.Tracer, ec *EvalConfig, funcName st
rvs, err = evalRollupFuncWithSubquery(qt, ecNew, funcName, rf, expr, re)
}
if err != nil {
return nil, err
return nil, UserReadableError{Err: err}
}
if funcName == "absent_over_time" {
rvs = aggregateAbsentOverTime(ec, re.Expr, rvs)
@ -983,7 +983,7 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa
sq := storage.NewSearchQuery(ec.AuthToken.AccountID, ec.AuthToken.ProjectID, minTimestamp, ec.End, tfss, ec.MaxSeries)
rss, isPartial, err := netstorage.ProcessSearchQuery(qt, ec.DenyPartialResponse, sq, ec.Deadline)
if err != nil {
return nil, err
return nil, UserReadableError{Err: err}
}
ec.updateIsPartialResponse(isPartial)
rssLen := rss.Len()
@ -1020,12 +1020,12 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa
rml := getRollupMemoryLimiter()
if !rml.Get(uint64(rollupMemorySize)) {
rss.Cancel()
return nil, fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+
return nil, UserReadableError{Err: fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+
"total available memory for concurrent requests: %d bytes; "+
"requested memory: %d bytes; "+
"possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+
"increasing -memory.allowedPercent; increasing `step` query arg (%gs)",
rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3)
rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3)}
}
defer rml.Put(uint64(rollupMemorySize))
@ -1038,7 +1038,7 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa
tss, err = evalRollupNoIncrementalAggregate(qt, funcName, keepMetricNames, rss, rcs, preFunc, sharedTimestamps)
}
if err != nil {
return nil, err
return nil, UserReadableError{Err: err}
}
tss = mergeTimeseries(tssCached, tss, start, ec)
if !isPartial {

View file

@ -26,6 +26,17 @@ var (
`This option is DEPRECATED in favor of {__graphite__="a.*.c"} syntax for selecting metrics matching the given Graphite metrics filter`)
)
// UserReadableError is a type of error which supposed
// to be returned to the user without additional context.
type UserReadableError struct {
Err error
}
// Error satisfies Error interface
func (ure UserReadableError) Error() string {
return ure.Err.Error()
}
// Exec executes q for the given ec.
func Exec(qt *querytracer.Tracer, ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result, error) {
if querystats.Enabled() {
@ -74,7 +85,7 @@ func Exec(qt *querytracer.Tracer, ec *EvalConfig, q string, isFirstPointOnly boo
}
qt.Printf("round series values to %d decimal digits after the point", n)
}
return result, err
return result, nil
}
func maySortResults(e metricsql.Expr, tss []*timeseries) bool {