mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-03-11 15:34:56 +00:00
vmalert: add evalAlignment
for rule group and fix evalutaion timstamp (#5066)
* vmalert: add `query_time_alignment` for rule group 1. add `eval_alignment` attribute for group which by default is true. So group rule query stamp will be aligned with interval and propagated to ALERT metrics and the messages for alertmanager; 2. deprecate `datasource.queryTimeAlignment` flag. https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
This commit is contained in:
parent
244c887825
commit
2aa0f5fc41
16 changed files with 149 additions and 152 deletions
|
@ -131,6 +131,15 @@ name: <string>
|
|||
# By default, "prometheus" type is used.
|
||||
[ type: <string> ]
|
||||
|
||||
# Optional
|
||||
# The evaluation timestamp will be aligned with group's interval,
|
||||
# instead of using the actual timestamp that evaluation happens at.
|
||||
# By default, it's enabled to get more predictable results
|
||||
# and to visually align with results plotted via Grafana or vmui.
|
||||
# See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
|
||||
# Available starting from v1.95
|
||||
[ eval_alignment: <bool> | default true]
|
||||
|
||||
# Optional list of HTTP URL parameters
|
||||
# applied for all rules requests within a group
|
||||
# For example:
|
||||
|
@ -527,10 +536,6 @@ Alertmanagers.
|
|||
To avoid recording rules results and alerts state duplication in VictoriaMetrics server
|
||||
don't forget to configure [deduplication](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#deduplication).
|
||||
The recommended value for `-dedup.minScrapeInterval` must be multiple of vmalert's `-evaluationInterval`.
|
||||
If you observe inconsistent or "jumping" values in series produced by vmalert, try disabling `-datasource.queryTimeAlignment`
|
||||
command line flag. Because of alignment, two or more vmalert HA pairs will produce results with the same timestamps.
|
||||
But due of backfilling (data delivered to the datasource with some delay) values of such results may differ,
|
||||
which would affect deduplication logic and result into "jumping" datapoints.
|
||||
|
||||
Alertmanager will automatically deduplicate alerts with identical labels, so ensure that
|
||||
all `vmalert`s are having the same config.
|
||||
|
@ -975,6 +980,7 @@ The shortlist of configuration flags is the following:
|
|||
-datasource.queryStep duration
|
||||
How far a value can fallback to when evaluating queries. For example, if -datasource.queryStep=15s then param "step" with value "15s" will be added to every query. If set to 0, rule's evaluation interval will be used instead. (default 5m0s)
|
||||
-datasource.queryTimeAlignment
|
||||
Flag is deprecated and will be removed in next releases, please use `eval_alignment` in rule group instead.
|
||||
Whether to align "time" parameter with evaluation interval.Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257 (default true)
|
||||
-datasource.roundDigits int
|
||||
Adds "round_digits" GET param to datasource requests. In VM "round_digits" limits the number of digits after the decimal point in response values.
|
||||
|
|
|
@ -72,7 +72,6 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
|||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: group.Type.String(),
|
||||
EvaluationInterval: group.Interval,
|
||||
EvalOffset: group.EvalOffset,
|
||||
QueryParams: group.Params,
|
||||
Headers: group.Headers,
|
||||
Debug: cfg.Debug,
|
||||
|
|
|
@ -39,6 +39,8 @@ type Group struct {
|
|||
Headers []Header `yaml:"headers,omitempty"`
|
||||
// NotifierHeaders contains optional HTTP headers sent to notifiers for generated notifications
|
||||
NotifierHeaders []Header `yaml:"notifier_headers,omitempty"`
|
||||
// EvalAlignment will make the timestamp of group query requests be aligned with interval
|
||||
EvalAlignment *bool `yaml:"eval_alignment,omitempty"`
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
}
|
||||
|
|
|
@ -44,7 +44,6 @@ type QuerierBuilder interface {
|
|||
type QuerierParams struct {
|
||||
DataSourceType string
|
||||
EvaluationInterval time.Duration
|
||||
EvalOffset *time.Duration
|
||||
QueryParams url.Values
|
||||
Headers map[string]string
|
||||
Debug bool
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -46,7 +47,8 @@ var (
|
|||
queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries. "+
|
||||
"For example, if -datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query. "+
|
||||
"If set to 0, rule's evaluation interval will be used instead.")
|
||||
queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, `Whether to align "time" parameter with evaluation interval.`+
|
||||
queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, "Flag is deprecated and will be removed in next releases, please use `eval_alignment` in rule group instead."+
|
||||
`Whether to align "time" parameter with evaluation interval.`+
|
||||
"Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
|
||||
maxIdleConnections = flag.Int("datasource.maxIdleConnections", 100, `Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state.`)
|
||||
disableKeepAlive = flag.Bool("datasource.disableKeepAlive", false, `Whether to disable long-lived connections to the datasource. `+
|
||||
|
@ -79,6 +81,9 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
|
|||
if *addr == "" {
|
||||
return nil, fmt.Errorf("datasource.url is empty")
|
||||
}
|
||||
if !*queryTimeAlignment {
|
||||
logger.Warnf("flag `datasource.queryTimeAlignment` is deprecated and will be removed in next releases, please use `eval_alignment` in rule group instead")
|
||||
}
|
||||
|
||||
tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||
if err != nil {
|
||||
|
|
|
@ -39,14 +39,8 @@ type VMStorage struct {
|
|||
queryStep time.Duration
|
||||
dataSourceType datasourceType
|
||||
|
||||
// evaluationInterval will align the request's timestamp
|
||||
// if `datasource.queryTimeAlignment` is enabled,
|
||||
// will set request's `step` param as well.
|
||||
// evaluationInterval will help setting request's `step` param.
|
||||
evaluationInterval time.Duration
|
||||
// evaluationOffset shifts the request's timestamp, will be equal
|
||||
// to the offset specified evaluationInterval.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4693
|
||||
evaluationOffset *time.Duration
|
||||
// extraParams contains params to be attached to each HTTP request
|
||||
extraParams url.Values
|
||||
// extraHeaders are headers to be attached to each HTTP request
|
||||
|
@ -95,7 +89,6 @@ func (s *VMStorage) Clone() *VMStorage {
|
|||
func (s *VMStorage) ApplyParams(params QuerierParams) *VMStorage {
|
||||
s.dataSourceType = toDatasourceType(params.DataSourceType)
|
||||
s.evaluationInterval = params.EvaluationInterval
|
||||
s.evaluationOffset = params.EvalOffset
|
||||
if params.QueryParams != nil {
|
||||
if s.extraParams == nil {
|
||||
s.extraParams = url.Values{}
|
||||
|
|
|
@ -161,8 +161,9 @@ func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string,
|
|||
r.URL.Path += "/api/v1/query"
|
||||
}
|
||||
q := r.URL.Query()
|
||||
|
||||
timestamp = s.adjustReqTimestamp(timestamp)
|
||||
if s.lookBack > 0 {
|
||||
timestamp = timestamp.Add(-s.lookBack)
|
||||
}
|
||||
q.Set("time", timestamp.Format(time.RFC3339))
|
||||
if !*disableStepParam && s.evaluationInterval > 0 { // set step as evaluationInterval by default
|
||||
// always convert to seconds to keep compatibility with older
|
||||
|
@ -186,9 +187,6 @@ func (s *VMStorage) setPrometheusRangeReqParams(r *http.Request, query string, s
|
|||
r.URL.Path += "/api/v1/query_range"
|
||||
}
|
||||
q := r.URL.Query()
|
||||
if s.evaluationOffset != nil {
|
||||
start = start.Truncate(s.evaluationInterval).Add(*s.evaluationOffset)
|
||||
}
|
||||
q.Add("start", start.Format(time.RFC3339))
|
||||
q.Add("end", end.Format(time.RFC3339))
|
||||
if s.evaluationInterval > 0 { // set step as evaluationInterval by default
|
||||
|
@ -213,30 +211,3 @@ func (s *VMStorage) setPrometheusReqParams(r *http.Request, query string) {
|
|||
q.Set("query", query)
|
||||
r.URL.RawQuery = q.Encode()
|
||||
}
|
||||
|
||||
func (s *VMStorage) adjustReqTimestamp(timestamp time.Time) time.Time {
|
||||
if s.evaluationOffset != nil {
|
||||
// calculate the min timestamp on the evaluationInterval
|
||||
intervalStart := timestamp.Truncate(s.evaluationInterval)
|
||||
ts := intervalStart.Add(*s.evaluationOffset)
|
||||
if timestamp.Before(ts) {
|
||||
// if passed timestamp is before the expected evaluation offset,
|
||||
// then we should adjust it to the previous evaluation round.
|
||||
// E.g. request with evaluationInterval=1h and evaluationOffset=30m
|
||||
// was evaluated at 11:20. Then the timestamp should be adjusted
|
||||
// to 10:30, to the previous evaluationInterval.
|
||||
return ts.Add(-s.evaluationInterval)
|
||||
}
|
||||
// evaluationOffset shouldn't interfere with queryTimeAlignment or lookBack,
|
||||
// so we return it immediately
|
||||
return ts
|
||||
}
|
||||
if *queryTimeAlignment {
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1232
|
||||
timestamp = timestamp.Truncate(s.evaluationInterval)
|
||||
}
|
||||
if s.lookBack > 0 {
|
||||
timestamp = timestamp.Add(-s.lookBack)
|
||||
}
|
||||
return timestamp
|
||||
}
|
||||
|
|
|
@ -3,7 +3,6 @@ package datasource
|
|||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func BenchmarkMetrics(b *testing.B) {
|
||||
|
@ -19,74 +18,3 @@ func BenchmarkMetrics(b *testing.B) {
|
|||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
offset := 30 * time.Minute
|
||||
testCases := []struct {
|
||||
name string
|
||||
s *VMStorage
|
||||
queryTimeAlignment bool
|
||||
originTS, expTS string
|
||||
}{
|
||||
{
|
||||
"with eval_offset, find previous offset point",
|
||||
&VMStorage{
|
||||
evaluationOffset: &offset,
|
||||
evaluationInterval: time.Hour,
|
||||
lookBack: 1 * time.Minute,
|
||||
},
|
||||
false,
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T10:30:00+00:00",
|
||||
},
|
||||
{
|
||||
"with eval_offset",
|
||||
&VMStorage{
|
||||
evaluationOffset: &offset,
|
||||
evaluationInterval: time.Hour,
|
||||
},
|
||||
true,
|
||||
"2023-08-28T11:41:00+00:00",
|
||||
"2023-08-28T11:30:00+00:00",
|
||||
},
|
||||
{
|
||||
"with query align",
|
||||
&VMStorage{
|
||||
evaluationInterval: time.Hour,
|
||||
},
|
||||
true,
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T11:00:00+00:00",
|
||||
},
|
||||
{
|
||||
"with query align and lookback",
|
||||
&VMStorage{
|
||||
evaluationInterval: time.Hour,
|
||||
lookBack: 1 * time.Minute,
|
||||
},
|
||||
true,
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T10:59:00+00:00",
|
||||
},
|
||||
{
|
||||
"without query align",
|
||||
&VMStorage{
|
||||
evaluationInterval: time.Hour,
|
||||
},
|
||||
false,
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
oldAlignPara := *queryTimeAlignment
|
||||
*queryTimeAlignment = tc.queryTimeAlignment
|
||||
originT, _ := time.Parse(time.RFC3339, tc.originTS)
|
||||
expT, _ := time.Parse(time.RFC3339, tc.expTS)
|
||||
gotTS := tc.s.adjustReqTimestamp(originT)
|
||||
if !gotTS.Equal(expT) {
|
||||
t.Fatalf("get wrong prometheus request timestamp, expect %s, got %s", expT, gotTS)
|
||||
}
|
||||
*queryTimeAlignment = oldAlignPara
|
||||
}
|
||||
}
|
||||
|
|
|
@ -506,8 +506,7 @@ func TestRequestParams(t *testing.T) {
|
|||
},
|
||||
func(t *testing.T, r *http.Request) {
|
||||
evalInterval := 15 * time.Second
|
||||
tt := timestamp.Truncate(evalInterval)
|
||||
exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {tt.Format(time.RFC3339)}}
|
||||
exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
},
|
||||
},
|
||||
|
@ -521,7 +520,6 @@ func TestRequestParams(t *testing.T) {
|
|||
func(t *testing.T, r *http.Request) {
|
||||
evalInterval := 15 * time.Second
|
||||
tt := timestamp.Add(-time.Minute)
|
||||
tt = tt.Truncate(evalInterval)
|
||||
exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {tt.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
},
|
||||
|
@ -549,8 +547,7 @@ func TestRequestParams(t *testing.T) {
|
|||
},
|
||||
func(t *testing.T, r *http.Request) {
|
||||
evalInterval := 3 * time.Hour
|
||||
tt := timestamp.Truncate(evalInterval)
|
||||
exp := url.Values{"query": {query}, "step": {fmt.Sprintf("%ds", int(evalInterval.Seconds()))}, "time": {tt.Format(time.RFC3339)}}
|
||||
exp := url.Values{"query": {query}, "step": {fmt.Sprintf("%ds", int(evalInterval.Seconds()))}, "time": {timestamp.Format(time.RFC3339)}}
|
||||
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
|
||||
},
|
||||
},
|
||||
|
|
|
@ -52,6 +52,9 @@ type Group struct {
|
|||
evalCancel context.CancelFunc
|
||||
|
||||
metrics *groupMetrics
|
||||
// evalAlignment will make the timestamp of group query
|
||||
// requests be aligned with interval
|
||||
evalAlignment *bool
|
||||
}
|
||||
|
||||
type groupMetrics struct {
|
||||
|
@ -106,6 +109,7 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
|||
Headers: make(map[string]string),
|
||||
NotifierHeaders: make(map[string]string),
|
||||
Labels: cfg.Labels,
|
||||
evalAlignment: cfg.EvalAlignment,
|
||||
|
||||
doneCh: make(chan struct{}),
|
||||
finishedCh: make(chan struct{}),
|
||||
|
@ -285,10 +289,11 @@ var skipRandSleepOnGroupStart bool
|
|||
func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *remotewrite.Client, rr datasource.QuerierBuilder) {
|
||||
defer func() { close(g.finishedCh) }()
|
||||
|
||||
evalTS := time.Now()
|
||||
// sleep random duration to spread group rules evaluation
|
||||
// over time in order to reduce load on datasource.
|
||||
if !skipRandSleepOnGroupStart {
|
||||
sleepBeforeStart := delayBeforeStart(time.Now(), g.ID(), g.Interval, g.EvalOffset)
|
||||
sleepBeforeStart := delayBeforeStart(evalTS, g.ID(), g.Interval, g.EvalOffset)
|
||||
g.infof("will start in %v", sleepBeforeStart)
|
||||
|
||||
sleepTimer := time.NewTimer(sleepBeforeStart)
|
||||
|
@ -301,10 +306,9 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
|||
return
|
||||
case <-sleepTimer.C:
|
||||
}
|
||||
evalTS = evalTS.Add(sleepBeforeStart)
|
||||
}
|
||||
|
||||
evalTS := time.Now()
|
||||
|
||||
e := &executor{
|
||||
rw: rw,
|
||||
notifiers: nts,
|
||||
|
@ -326,6 +330,7 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
|||
}
|
||||
|
||||
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
|
||||
ts = g.adjustReqTimestamp(ts)
|
||||
errs := e.execConcurrently(ctx, g.Rules, ts, g.Concurrency, resolveDuration, g.Limit)
|
||||
for err := range errs {
|
||||
if err != nil {
|
||||
|
@ -424,7 +429,7 @@ func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *
|
|||
randSleep += *offset
|
||||
}
|
||||
}
|
||||
return randSleep.Truncate(time.Second)
|
||||
return randSleep
|
||||
}
|
||||
|
||||
func (g *Group) infof(format string, args ...interface{}) {
|
||||
|
@ -446,6 +451,32 @@ func getResolveDuration(groupInterval, delta, maxDuration time.Duration) time.Du
|
|||
return resolveDuration
|
||||
}
|
||||
|
||||
func (g *Group) adjustReqTimestamp(timestamp time.Time) time.Time {
|
||||
if g.EvalOffset != nil {
|
||||
// calculate the min timestamp on the evaluationInterval
|
||||
intervalStart := timestamp.Truncate(g.Interval)
|
||||
ts := intervalStart.Add(*g.EvalOffset)
|
||||
if timestamp.Before(ts) {
|
||||
// if passed timestamp is before the expected evaluation offset,
|
||||
// then we should adjust it to the previous evaluation round.
|
||||
// E.g. request with evaluationInterval=1h and evaluationOffset=30m
|
||||
// was evaluated at 11:20. Then the timestamp should be adjusted
|
||||
// to 10:30, to the previous evaluationInterval.
|
||||
return ts.Add(-g.Interval)
|
||||
}
|
||||
// EvalOffset shouldn't interfere with evalAlignment,
|
||||
// so we return it immediately
|
||||
return ts
|
||||
}
|
||||
if g.evalAlignment == nil || *g.evalAlignment {
|
||||
// align query time with interval to get similar result with grafana when plotting time series.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1232
|
||||
return timestamp.Truncate(g.Interval)
|
||||
}
|
||||
return timestamp
|
||||
}
|
||||
|
||||
type executor struct {
|
||||
notifiers func() []notifier.Notifier
|
||||
notifierHeaders map[string]string
|
||||
|
|
|
@ -533,11 +533,11 @@ func TestGroupStartDelay(t *testing.T) {
|
|||
|
||||
f := func(atS, expS string) {
|
||||
t.Helper()
|
||||
at, err := time.Parse(time.DateTime, atS)
|
||||
at, err := time.Parse(time.RFC3339Nano, atS)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expTS, err := time.Parse(time.DateTime, expS)
|
||||
expTS, err := time.Parse(time.RFC3339Nano, expS)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -549,37 +549,91 @@ func TestGroupStartDelay(t *testing.T) {
|
|||
}
|
||||
|
||||
// test group without offset
|
||||
f("2023-01-01 00:00:00", "2023-01-01 00:00:30")
|
||||
f("2023-01-01 00:00:29", "2023-01-01 00:00:30")
|
||||
f("2023-01-01 00:00:31", "2023-01-01 00:05:30")
|
||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:30.000+00:00")
|
||||
f("2023-01-01T00:00:00.999+00:00", "2023-01-01T00:00:30.000+00:00")
|
||||
f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:00:30.000+00:00")
|
||||
f("2023-01-01T00:00:31.000+00:00", "2023-01-01T00:05:30.000+00:00")
|
||||
|
||||
// test group with offset smaller than above fixed randSleep,
|
||||
// this way randSleep will always be enough
|
||||
offset := 20 * time.Second
|
||||
g.EvalOffset = &offset
|
||||
|
||||
f("2023-01-01 00:00:00", "2023-01-01 00:00:30")
|
||||
f("2023-01-01 00:00:29", "2023-01-01 00:00:30")
|
||||
f("2023-01-01 00:00:31", "2023-01-01 00:05:30")
|
||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:30.000+00:00")
|
||||
f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:00:30.000+00:00")
|
||||
f("2023-01-01T00:00:31.000+00:00", "2023-01-01T00:05:30.000+00:00")
|
||||
|
||||
// test group with offset bigger than above fixed randSleep,
|
||||
// this way offset will be added to delay
|
||||
offset = 3 * time.Minute
|
||||
g.EvalOffset = &offset
|
||||
|
||||
f("2023-01-01 00:00:00", "2023-01-01 00:03:30")
|
||||
f("2023-01-01 00:00:29", "2023-01-01 00:03:30")
|
||||
f("2023-01-01 00:01:00", "2023-01-01 00:08:30")
|
||||
f("2023-01-01 00:03:30", "2023-01-01 00:08:30")
|
||||
f("2023-01-01 00:07:30", "2023-01-01 00:13:30")
|
||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:03:30.000+00:00")
|
||||
f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:03:30.000+00:00")
|
||||
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:08:30.000+00:00")
|
||||
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:30.000+00:00")
|
||||
f("2023-01-01T00:07:30.000+00:00", "2023-01-01T00:13:30.000+00:00")
|
||||
|
||||
offset = 10 * time.Minute
|
||||
g.EvalOffset = &offset
|
||||
// interval of 1h and key generate a static delay of 6m
|
||||
g.Interval = time.Hour
|
||||
|
||||
f("2023-01-01 00:00:00", "2023-01-01 00:16:00")
|
||||
f("2023-01-01 00:05:00", "2023-01-01 00:16:00")
|
||||
f("2023-01-01 00:30:00", "2023-01-01 01:16:00")
|
||||
|
||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:16:00.000+00:00")
|
||||
f("2023-01-01T00:05:00.000+00:00", "2023-01-01T00:16:00.000+00:00")
|
||||
f("2023-01-01T00:30:00.000+00:00", "2023-01-01T01:16:00.000+00:00")
|
||||
}
|
||||
|
||||
func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
offset := 30 * time.Minute
|
||||
disableAlign := false
|
||||
testCases := []struct {
|
||||
name string
|
||||
g *Group
|
||||
originTS, expTS string
|
||||
}{
|
||||
{
|
||||
"with query align",
|
||||
&Group{
|
||||
Interval: time.Hour,
|
||||
},
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T11:00:00+00:00",
|
||||
},
|
||||
{
|
||||
"without query align",
|
||||
&Group{
|
||||
Interval: time.Hour,
|
||||
evalAlignment: &disableAlign,
|
||||
},
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
},
|
||||
{
|
||||
"with eval_offset, find previous offset point",
|
||||
&Group{
|
||||
EvalOffset: &offset,
|
||||
Interval: time.Hour,
|
||||
},
|
||||
"2023-08-28T11:11:00+00:00",
|
||||
"2023-08-28T10:30:00+00:00",
|
||||
},
|
||||
{
|
||||
"with eval_offset",
|
||||
&Group{
|
||||
EvalOffset: &offset,
|
||||
Interval: time.Hour,
|
||||
},
|
||||
"2023-08-28T11:41:00+00:00",
|
||||
"2023-08-28T11:30:00+00:00",
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
originT, _ := time.Parse(time.RFC3339, tc.originTS)
|
||||
expT, _ := time.Parse(time.RFC3339, tc.expTS)
|
||||
gotTS := tc.g.adjustReqTimestamp(originT)
|
||||
if !gotTS.Equal(expT) {
|
||||
t.Fatalf("get wrong prometheus request timestamp, expect %s, got %s", expT, gotTS)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -61,7 +61,6 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
|||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: group.Type.String(),
|
||||
EvaluationInterval: group.Interval,
|
||||
EvalOffset: group.EvalOffset,
|
||||
QueryParams: group.Params,
|
||||
Headers: group.Headers,
|
||||
}),
|
||||
|
|
|
@ -81,13 +81,15 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw *remotewr
|
|||
func (g *Group) replay(start, end time.Time, rw *remotewrite.Client) int {
|
||||
var total int
|
||||
step := g.Interval * time.Duration(*replayMaxDatapoints)
|
||||
start = g.adjustReqTimestamp(start)
|
||||
ri := rangeIterator{start: start, end: end, step: step}
|
||||
iterations := int(end.Sub(start)/step) + 1
|
||||
fmt.Printf("\nGroup %q"+
|
||||
"\ninterval: \t%v"+
|
||||
"\neval_offset: \t%v"+
|
||||
"\nrequests to make: \t%d"+
|
||||
"\nmax range per request: \t%v\n",
|
||||
g.Name, g.Interval, iterations, step)
|
||||
g.Name, g.Interval, g.EvalOffset, iterations, step)
|
||||
if g.Limit > 0 {
|
||||
fmt.Printf("\nPlease note, `limit: %d` param has no effect during replay.\n",
|
||||
g.Limit)
|
||||
|
|
|
@ -42,7 +42,7 @@ type ruleState struct {
|
|||
type ruleStateEntry struct {
|
||||
// stores last moment of time rule.Exec was called
|
||||
time time.Time
|
||||
// stores the timesteamp with which rule.Exec was called
|
||||
// stores the timestamp rule.Exec was called with
|
||||
at time.Time
|
||||
// stores the duration of the last rule.Exec call
|
||||
duration time.Duration
|
||||
|
|
|
@ -28,6 +28,11 @@ The sandbox cluster installation is running under the constant load generated by
|
|||
|
||||
## tip
|
||||
|
||||
**vmalert's cmd-line flag `datasource.queryTimeAlignment` was deprecated and will have no effect anymore. It will be completely removed in next releases. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049) and more detailed changes below.**
|
||||
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `eval_alignment` attribute for [Groups](https://docs.victoriametrics.com/vmalert.html#groups), it will align group query requests timestamp with interval like `datasource.queryTimeAlignment` did.
|
||||
This also means that `datasource.queryTimeAlignment` command-line flag becomes deprecated now and will have no effect if configured. If `datasource.queryTimeAlignment` was set to `false` before, then `eval_alignment` has to be set to `false` explicitly under group.
|
||||
See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): support data ingestion from [NewRelic infrastructure agent](https://docs.newrelic.com/docs/infrastructure/install-infrastructure-agent). See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-newrelic-agent), [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3520) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4712).
|
||||
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): add `-filestream.disableFadvise` command-line flag, which can be used for disabling `fadvise` syscall during backup upload to the remote storage. By default `vmbackup` uses `fadvise` syscall in order to prevent from eviction of recently accessed data from the [OS page cache](https://en.wikipedia.org/wiki/Page_cache) when backing up large files. Sometimes the `fadvise` syscall may take significant amounts of CPU when the backup is performed with large value of `-concurrency` command-line flag on systems with big number of CPU cores. In this case it is better to manually disable `fadvise` syscall by passing `-filestream.disableFadvise` command-line flag to `vmbackup`. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5120) for details.
|
||||
* FEATURE: [Alerting rules for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#alerts): account for `vmauth` component for alerts `ServiceDown` and `TooManyRestarts`.
|
||||
|
|
|
@ -142,6 +142,15 @@ name: <string>
|
|||
# By default, "prometheus" type is used.
|
||||
[ type: <string> ]
|
||||
|
||||
# Optional
|
||||
# The evaluation timestamp will be aligned with group's interval,
|
||||
# instead of using the actual timestamp that evaluation happens at.
|
||||
# By default, it's enabled to get more predictable results
|
||||
# and to visually align with results plotted via Grafana or vmui.
|
||||
# See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
|
||||
# Available starting from v1.95
|
||||
[ eval_alignment: <bool> | default true]
|
||||
|
||||
# Optional list of HTTP URL parameters
|
||||
# applied for all rules requests within a group
|
||||
# For example:
|
||||
|
@ -538,10 +547,6 @@ Alertmanagers.
|
|||
To avoid recording rules results and alerts state duplication in VictoriaMetrics server
|
||||
don't forget to configure [deduplication](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#deduplication).
|
||||
The recommended value for `-dedup.minScrapeInterval` must be multiple of vmalert's `-evaluationInterval`.
|
||||
If you observe inconsistent or "jumping" values in series produced by vmalert, try disabling `-datasource.queryTimeAlignment`
|
||||
command line flag. Because of alignment, two or more vmalert HA pairs will produce results with the same timestamps.
|
||||
But due of backfilling (data delivered to the datasource with some delay) values of such results may differ,
|
||||
which would affect deduplication logic and result into "jumping" datapoints.
|
||||
|
||||
Alertmanager will automatically deduplicate alerts with identical labels, so ensure that
|
||||
all `vmalert`s are having the same config.
|
||||
|
@ -986,6 +991,7 @@ The shortlist of configuration flags is the following:
|
|||
-datasource.queryStep duration
|
||||
How far a value can fallback to when evaluating queries. For example, if -datasource.queryStep=15s then param "step" with value "15s" will be added to every query. If set to 0, rule's evaluation interval will be used instead. (default 5m0s)
|
||||
-datasource.queryTimeAlignment
|
||||
Flag is deprecated and will be removed in next releases, please use `eval_alignment` in rule group instead.
|
||||
Whether to align "time" parameter with evaluation interval.Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257 (default true)
|
||||
-datasource.roundDigits int
|
||||
Adds "round_digits" GET param to datasource requests. In VM "round_digits" limits the number of digits after the decimal point in response values.
|
||||
|
|
Loading…
Reference in a new issue