2024-05-13 13:39:49 +00:00
|
|
|
package streamaggr
|
|
|
|
|
|
|
|
import (
|
|
|
|
"sync"
|
|
|
|
|
2024-06-07 14:24:09 +00:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
2024-05-13 13:39:49 +00:00
|
|
|
)
|
|
|
|
|
2024-07-14 15:23:59 +00:00
|
|
|
// rateAggrState calculates output=rate_avg and rate_sum, e.g. the average per-second increase rate for counter metrics.
|
2024-05-13 13:39:49 +00:00
|
|
|
type rateAggrState struct {
|
|
|
|
m sync.Map
|
|
|
|
|
2024-07-14 15:23:59 +00:00
|
|
|
// isAvg is set to true if rate_avg() must be calculated instead of rate_sum().
|
|
|
|
isAvg bool
|
2024-05-13 13:39:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type rateStateValue struct {
|
|
|
|
mu sync.Mutex
|
2024-07-03 10:42:45 +00:00
|
|
|
state map[string]rateState
|
2024-05-13 13:39:49 +00:00
|
|
|
deleted bool
|
2024-07-03 10:42:45 +00:00
|
|
|
deleteDeadline int64
|
2024-05-13 13:39:49 +00:00
|
|
|
}
|
|
|
|
|
2024-07-03 10:42:45 +00:00
|
|
|
type rateState struct {
|
|
|
|
lastValues [aggrStateSize]rateLastValueState
|
|
|
|
// prevTimestamp stores timestamp of the last registered value
|
|
|
|
// in the previous aggregation interval
|
|
|
|
prevTimestamp int64
|
2024-05-13 13:39:49 +00:00
|
|
|
|
2024-07-03 10:42:45 +00:00
|
|
|
// prevValue stores last registered value
|
|
|
|
// in the previous aggregation interval
|
|
|
|
prevValue float64
|
|
|
|
deleteDeadline int64
|
|
|
|
}
|
2024-07-14 15:23:59 +00:00
|
|
|
|
2024-07-03 10:42:45 +00:00
|
|
|
type rateLastValueState struct {
|
|
|
|
firstValue float64
|
|
|
|
value float64
|
|
|
|
timestamp int64
|
|
|
|
|
|
|
|
// total stores cumulative difference between registered values
|
|
|
|
// in the aggregation interval
|
|
|
|
total float64
|
2024-05-13 13:39:49 +00:00
|
|
|
}
|
|
|
|
|
2024-07-03 10:42:45 +00:00
|
|
|
func newRateAggrState(isAvg bool) *rateAggrState {
|
2024-05-13 13:39:49 +00:00
|
|
|
return &rateAggrState{
|
2024-07-14 15:23:59 +00:00
|
|
|
isAvg: isAvg,
|
2024-05-13 13:39:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-07-03 10:42:45 +00:00
|
|
|
func (as *rateAggrState) pushSamples(samples []pushSample, deleteDeadline int64, idx int) {
|
2024-05-13 13:39:49 +00:00
|
|
|
for i := range samples {
|
|
|
|
s := &samples[i]
|
|
|
|
inputKey, outputKey := getInputOutputKey(s.key)
|
|
|
|
|
|
|
|
again:
|
|
|
|
v, ok := as.m.Load(outputKey)
|
|
|
|
if !ok {
|
|
|
|
// The entry is missing in the map. Try creating it.
|
2024-07-03 10:42:45 +00:00
|
|
|
rsv := &rateStateValue{
|
|
|
|
state: make(map[string]rateState),
|
2024-05-13 13:39:49 +00:00
|
|
|
}
|
2024-07-03 10:42:45 +00:00
|
|
|
v = rsv
|
2024-06-07 14:24:09 +00:00
|
|
|
outputKey = bytesutil.InternString(outputKey)
|
2024-05-13 13:39:49 +00:00
|
|
|
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
|
|
|
if loaded {
|
|
|
|
// Use the entry created by a concurrent goroutine.
|
|
|
|
v = vNew
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sv := v.(*rateStateValue)
|
|
|
|
sv.mu.Lock()
|
|
|
|
deleted := sv.deleted
|
|
|
|
if !deleted {
|
2024-07-03 10:42:45 +00:00
|
|
|
state, ok := sv.state[inputKey]
|
|
|
|
lv := state.lastValues[idx]
|
|
|
|
if ok && lv.timestamp > 0 {
|
2024-05-13 13:39:49 +00:00
|
|
|
if s.timestamp < lv.timestamp {
|
|
|
|
// Skip out of order sample
|
|
|
|
sv.mu.Unlock()
|
|
|
|
continue
|
|
|
|
}
|
2024-07-03 10:42:45 +00:00
|
|
|
if state.prevTimestamp == 0 {
|
|
|
|
state.prevTimestamp = lv.timestamp
|
|
|
|
state.prevValue = lv.value
|
|
|
|
}
|
2024-05-13 13:39:49 +00:00
|
|
|
if s.value >= lv.value {
|
2024-07-03 10:42:45 +00:00
|
|
|
lv.total += s.value - lv.value
|
2024-05-13 13:39:49 +00:00
|
|
|
} else {
|
|
|
|
// counter reset
|
2024-07-03 10:42:45 +00:00
|
|
|
lv.total += s.value
|
2024-05-13 13:39:49 +00:00
|
|
|
}
|
2024-07-03 10:42:45 +00:00
|
|
|
} else if state.prevTimestamp > 0 {
|
|
|
|
lv.firstValue = s.value
|
2024-05-13 13:39:49 +00:00
|
|
|
}
|
|
|
|
lv.value = s.value
|
|
|
|
lv.timestamp = s.timestamp
|
2024-07-03 10:42:45 +00:00
|
|
|
state.lastValues[idx] = lv
|
|
|
|
state.deleteDeadline = deleteDeadline
|
2024-06-07 14:24:09 +00:00
|
|
|
inputKey = bytesutil.InternString(inputKey)
|
2024-07-03 10:42:45 +00:00
|
|
|
sv.state[inputKey] = state
|
2024-05-13 13:39:49 +00:00
|
|
|
sv.deleteDeadline = deleteDeadline
|
|
|
|
}
|
|
|
|
sv.mu.Unlock()
|
|
|
|
if deleted {
|
|
|
|
// The entry has been deleted by the concurrent call to flushState
|
|
|
|
// Try obtaining and updating the entry again.
|
|
|
|
goto again
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
app/vmagent/remotewrite: follow-up for f153f54d11250da050aa93bc4fa9b7ba9e144691
- Move the remaining code responsible for stream aggregation initialization from remotewrite.go to streamaggr.go .
This improves code maintainability a bit.
- Properly shut down streamaggr.Aggregators initialized inside remotewrite.CheckStreamAggrConfigs().
This prevents from potential resource leaks.
- Use separate functions for initializing and reloading of global stream aggregation and per-remoteWrite.url stream aggregation.
This makes the code easier to read and maintain. This also fixes INFO and ERROR logs emitted by these functions.
- Add an ability to specify `name` option in every stream aggregation config. This option is used as `name` label
in metrics exposed by stream aggregation at /metrics page. This simplifies investigation of the exposed metrics.
- Add `path` label additionally to `name`, `url` and `position` labels at metrics exposed by streaming aggregation.
This label should simplify investigation of the exposed metrics.
- Remove `match` and `group` labels from metrics exposed by streaming aggregation, since they have little practical applicability:
it is hard to use these labels in query filters and aggregation functions.
- Rename the metric `vm_streamaggr_flushed_samples_total` to less misleading `vm_streamaggr_output_samples_total` .
This metric shows the number of samples generated by the corresponding streaming aggregation rule.
This metric has been added in the commit 861852f2624895e01f93ce196607c72616ce2a94 .
See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6462
- Remove the metric `vm_streamaggr_stale_samples_total`, since it is unclear how it can be used in practice.
This metric has been added in the commit 861852f2624895e01f93ce196607c72616ce2a94 .
See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6462
- Remove Alias and aggrID fields from streamaggr.Options struct, since these fields aren't related to optional params,
which could modify the behaviour of the constructed streaming aggregator.
Convert the Alias field to regular argument passed to LoadFromFile() function, since this argument is mandatory.
- Pass Options arg to LoadFromFile() function by reference, since this structure is quite big.
This also allows passing nil instead of Options when default options are enough.
- Add `name`, `path`, `url` and `position` labels to `vm_streamaggr_dedup_state_size_bytes` and `vm_streamaggr_dedup_state_items_count` metrics,
so they have consistent set of labels comparing to the rest of streaming aggregation metrics.
- Convert aggregator.aggrStates field type from `map[string]aggrState` to `[]aggrOutput`, where `aggrOutput` contains the corresponding
`aggrState` plus all the related metrics (currently only `vm_streamaggr_output_samples_total` metric is exposed with the corresponding
`output` label per each configured output function). This simplifies and speeds up the code responsible for updating per-output
metrics. This is a follow-up for the commit 2eb1bc4f814037ae87ac6556011ae0d3caee6bc8 .
See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6604
- Added missing urls to docs ( https://docs.victoriametrics.com/stream-aggregation/ ) in error messages. These urls help users
figuring out why VictoriaMetrics or vmagent generates the corresponding error messages. The urls were removed for unknown reason
in the commit 2eb1bc4f814037ae87ac6556011ae0d3caee6bc8 .
- Fix incorrect update for `vm_streamaggr_output_samples_total` metric in flushCtx.appendSeriesWithExtraLabel() function.
While at it, reduce memory usage by limiting the maximum number of samples per flush to 10K.
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5467
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6268
2024-07-15 16:01:37 +00:00
|
|
|
func (as *rateAggrState) getSuffix() string {
|
2024-07-03 10:42:45 +00:00
|
|
|
if as.isAvg {
|
|
|
|
return "rate_avg"
|
|
|
|
}
|
|
|
|
return "rate_sum"
|
app/vmagent/remotewrite: follow-up for f153f54d11250da050aa93bc4fa9b7ba9e144691
- Move the remaining code responsible for stream aggregation initialization from remotewrite.go to streamaggr.go .
This improves code maintainability a bit.
- Properly shut down streamaggr.Aggregators initialized inside remotewrite.CheckStreamAggrConfigs().
This prevents from potential resource leaks.
- Use separate functions for initializing and reloading of global stream aggregation and per-remoteWrite.url stream aggregation.
This makes the code easier to read and maintain. This also fixes INFO and ERROR logs emitted by these functions.
- Add an ability to specify `name` option in every stream aggregation config. This option is used as `name` label
in metrics exposed by stream aggregation at /metrics page. This simplifies investigation of the exposed metrics.
- Add `path` label additionally to `name`, `url` and `position` labels at metrics exposed by streaming aggregation.
This label should simplify investigation of the exposed metrics.
- Remove `match` and `group` labels from metrics exposed by streaming aggregation, since they have little practical applicability:
it is hard to use these labels in query filters and aggregation functions.
- Rename the metric `vm_streamaggr_flushed_samples_total` to less misleading `vm_streamaggr_output_samples_total` .
This metric shows the number of samples generated by the corresponding streaming aggregation rule.
This metric has been added in the commit 861852f2624895e01f93ce196607c72616ce2a94 .
See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6462
- Remove the metric `vm_streamaggr_stale_samples_total`, since it is unclear how it can be used in practice.
This metric has been added in the commit 861852f2624895e01f93ce196607c72616ce2a94 .
See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6462
- Remove Alias and aggrID fields from streamaggr.Options struct, since these fields aren't related to optional params,
which could modify the behaviour of the constructed streaming aggregator.
Convert the Alias field to regular argument passed to LoadFromFile() function, since this argument is mandatory.
- Pass Options arg to LoadFromFile() function by reference, since this structure is quite big.
This also allows passing nil instead of Options when default options are enough.
- Add `name`, `path`, `url` and `position` labels to `vm_streamaggr_dedup_state_size_bytes` and `vm_streamaggr_dedup_state_items_count` metrics,
so they have consistent set of labels comparing to the rest of streaming aggregation metrics.
- Convert aggregator.aggrStates field type from `map[string]aggrState` to `[]aggrOutput`, where `aggrOutput` contains the corresponding
`aggrState` plus all the related metrics (currently only `vm_streamaggr_output_samples_total` metric is exposed with the corresponding
`output` label per each configured output function). This simplifies and speeds up the code responsible for updating per-output
metrics. This is a follow-up for the commit 2eb1bc4f814037ae87ac6556011ae0d3caee6bc8 .
See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6604
- Added missing urls to docs ( https://docs.victoriametrics.com/stream-aggregation/ ) in error messages. These urls help users
figuring out why VictoriaMetrics or vmagent generates the corresponding error messages. The urls were removed for unknown reason
in the commit 2eb1bc4f814037ae87ac6556011ae0d3caee6bc8 .
- Fix incorrect update for `vm_streamaggr_output_samples_total` metric in flushCtx.appendSeriesWithExtraLabel() function.
While at it, reduce memory usage by limiting the maximum number of samples per flush to 10K.
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5467
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6268
2024-07-15 16:01:37 +00:00
|
|
|
}
|
|
|
|
|
2024-07-03 10:42:45 +00:00
|
|
|
func (as *rateAggrState) flushState(ctx *flushCtx) {
|
2024-07-14 15:23:59 +00:00
|
|
|
m := &as.m
|
2024-07-03 10:42:45 +00:00
|
|
|
suffix := as.getSuffix()
|
2024-07-14 15:23:59 +00:00
|
|
|
m.Range(func(k, v any) bool {
|
|
|
|
sv := v.(*rateStateValue)
|
|
|
|
sv.mu.Lock()
|
2024-07-03 10:42:45 +00:00
|
|
|
|
|
|
|
// check for stale entries
|
|
|
|
deleted := ctx.flushTimestamp > sv.deleteDeadline
|
|
|
|
if deleted {
|
2024-05-13 13:39:49 +00:00
|
|
|
// Mark the current entry as deleted
|
2024-07-14 15:23:59 +00:00
|
|
|
sv.deleted = true
|
|
|
|
sv.mu.Unlock()
|
2024-05-13 13:39:49 +00:00
|
|
|
m.Delete(k)
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2024-07-03 10:42:45 +00:00
|
|
|
// Delete outdated entries in state
|
2024-07-09 08:07:10 +00:00
|
|
|
rate := 0.0
|
|
|
|
countSeries := 0
|
2024-07-03 10:42:45 +00:00
|
|
|
for k1, state := range sv.state {
|
|
|
|
if ctx.flushTimestamp > state.deleteDeadline {
|
|
|
|
delete(sv.state, k1)
|
|
|
|
continue
|
2024-05-13 13:39:49 +00:00
|
|
|
}
|
2024-07-03 10:42:45 +00:00
|
|
|
v1 := state.lastValues[ctx.idx]
|
|
|
|
rateInterval := v1.timestamp - state.prevTimestamp
|
|
|
|
if rateInterval > 0 && state.prevTimestamp > 0 {
|
|
|
|
if v1.firstValue >= state.prevValue {
|
|
|
|
v1.total += v1.firstValue - state.prevValue
|
|
|
|
} else {
|
|
|
|
v1.total += v1.firstValue
|
|
|
|
}
|
|
|
|
|
|
|
|
// calculate rate only if value was seen at least twice with different timestamps
|
|
|
|
rate += (v1.total) * 1000 / float64(rateInterval)
|
|
|
|
state.prevTimestamp = v1.timestamp
|
|
|
|
state.prevValue = v1.value
|
2024-07-09 08:07:10 +00:00
|
|
|
countSeries++
|
2024-07-03 10:42:45 +00:00
|
|
|
}
|
|
|
|
state.lastValues[ctx.idx] = rateLastValueState{}
|
|
|
|
sv.state[k1] = state
|
2024-05-13 13:39:49 +00:00
|
|
|
}
|
2024-07-03 10:42:45 +00:00
|
|
|
|
2024-05-13 13:39:49 +00:00
|
|
|
sv.mu.Unlock()
|
2024-07-03 10:42:45 +00:00
|
|
|
|
2024-07-09 08:07:10 +00:00
|
|
|
if countSeries > 0 {
|
2024-07-03 10:42:45 +00:00
|
|
|
if as.isAvg {
|
2024-07-09 08:07:10 +00:00
|
|
|
rate /= float64(countSeries)
|
2024-07-03 10:42:45 +00:00
|
|
|
}
|
|
|
|
key := k.(string)
|
|
|
|
ctx.appendSeries(key, suffix, rate)
|
|
|
|
}
|
2024-05-13 13:39:49 +00:00
|
|
|
return true
|
|
|
|
})
|
|
|
|
}
|