mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/streamaggr: pick sample with bigger timestamp or value on deduplicator (#5939)
Apply the same deduplication logic as in https://docs.victoriametrics.com/#deduplication This would require more memory for deduplication, since we need to track timestamp for each record. However, deduplication should become more consistent. https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5643 --------- Co-authored-by: Roman Khavronenko <roman@victoriametrics.com>
This commit is contained in:
parent
e80b44f19d
commit
15e33d56f1
7 changed files with 31 additions and 13 deletions
|
@ -53,6 +53,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
|
||||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for VictoriaMetrics destination specified via `--vm-*` cmd-line flags used in [InfluxDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-influxdb-1x), [Remote Read protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-by-remote-read-protocol), [OpenTSDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-opentsdb), [Prometheus](https://docs.victoriametrics.com/vmctl/#migrating-data-from-prometheus) and [Promscale](https://docs.victoriametrics.com/vmctl/#migrating-data-from-promscale) migration modes.
|
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for VictoriaMetrics destination specified via `--vm-*` cmd-line flags used in [InfluxDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-influxdb-1x), [Remote Read protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-by-remote-read-protocol), [OpenTSDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-opentsdb), [Prometheus](https://docs.victoriametrics.com/vmctl/#migrating-data-from-prometheus) and [Promscale](https://docs.victoriametrics.com/vmctl/#migrating-data-from-promscale) migration modes.
|
||||||
|
|
||||||
* BUGFIX: do not drop `match[]` filter at [`/api/v1/series`](https://docs.victoriametrics.com/url-examples/#apiv1series) if `-search.ignoreExtraFiltersAtLabelsAPI` command-line flag is set, since missing `match[]` filter breaks `/api/v1/series` requests.
|
* BUGFIX: do not drop `match[]` filter at [`/api/v1/series`](https://docs.victoriametrics.com/url-examples/#apiv1series) if `-search.ignoreExtraFiltersAtLabelsAPI` command-line flag is set, since missing `match[]` filter breaks `/api/v1/series` requests.
|
||||||
|
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): pick samples with bigger values and timestamps on deduplication interval
|
||||||
|
|
||||||
## [v1.99.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.99.0)
|
## [v1.99.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.99.0)
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,8 @@ type dedupAggrShardNopad struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type dedupAggrSample struct {
|
type dedupAggrSample struct {
|
||||||
value float64
|
value float64
|
||||||
|
timestamp int64
|
||||||
}
|
}
|
||||||
|
|
||||||
func newDedupAggr() *dedupAggr {
|
func newDedupAggr() *dedupAggr {
|
||||||
|
@ -172,8 +173,21 @@ func (das *dedupAggrShard) pushSamples(samples []pushSample) {
|
||||||
das.m = m
|
das.m = m
|
||||||
}
|
}
|
||||||
for _, sample := range samples {
|
for _, sample := range samples {
|
||||||
m[sample.key] = dedupAggrSample{
|
s, ok := m[sample.key]
|
||||||
value: sample.value,
|
if !ok {
|
||||||
|
m[sample.key] = dedupAggrSample{
|
||||||
|
value: sample.value,
|
||||||
|
timestamp: sample.timestamp,
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// update the existing value according to logic described in
|
||||||
|
// https://docs.victoriametrics.com/#deduplication
|
||||||
|
if sample.timestamp > s.timestamp || (sample.timestamp == s.timestamp && sample.value > s.value) {
|
||||||
|
m[sample.key] = dedupAggrSample{
|
||||||
|
value: sample.value,
|
||||||
|
timestamp: sample.timestamp,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,8 +23,8 @@ func TestDedupAggrSerial(t *testing.T) {
|
||||||
da.pushSamples(samples)
|
da.pushSamples(samples)
|
||||||
}
|
}
|
||||||
|
|
||||||
if n := da.sizeBytes(); n > 3_400_000 {
|
if n := da.sizeBytes(); n > 4_200_000 {
|
||||||
t.Fatalf("too big dedupAggr state before flush: %d bytes; it shouldn't exceed 3_400_000 bytes", n)
|
t.Fatalf("too big dedupAggr state before flush: %d bytes; it shouldn't exceed 4_200_000 bytes", n)
|
||||||
}
|
}
|
||||||
if n := da.itemsCount(); n != seriesCount {
|
if n := da.itemsCount(); n != seriesCount {
|
||||||
t.Fatalf("unexpected itemsCount; got %d; want %d", n, seriesCount)
|
t.Fatalf("unexpected itemsCount; got %d; want %d", n, seriesCount)
|
||||||
|
|
|
@ -107,8 +107,9 @@ func (d *Deduplicator) Push(tss []prompbmarshal.TimeSeries) {
|
||||||
key := bytesutil.InternBytes(buf)
|
key := bytesutil.InternBytes(buf)
|
||||||
for _, s := range ts.Samples {
|
for _, s := range ts.Samples {
|
||||||
pss = append(pss, pushSample{
|
pss = append(pss, pushSample{
|
||||||
key: key,
|
key: key,
|
||||||
value: s.Value,
|
value: s.Value,
|
||||||
|
timestamp: s.Timestamp,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,7 @@ baz_aaa_aaa_fdd{instance="x",job="aaa",pod="sdfd-dfdfdfs",node="aosijjewrerfd",n
|
||||||
bar{container="ohohffd",job="aaa",namespace="asdff",pod="sdfd-dfdfdfs"} 34.54
|
bar{container="ohohffd",job="aaa",namespace="asdff",pod="sdfd-dfdfdfs"} 34.54
|
||||||
baz_aaa_aaa_fdd{container="ohohffd",job="aaa",namespace="asdff",pod="sdfd-dfdfdfs"} -2.3
|
baz_aaa_aaa_fdd{container="ohohffd",job="aaa",namespace="asdff",pod="sdfd-dfdfdfs"} -2.3
|
||||||
foo{container="ohohffd",job="aaa",namespace="asdff",pod="sdfd-dfdfdfs"} 894
|
foo{container="ohohffd",job="aaa",namespace="asdff",pod="sdfd-dfdfdfs"} 894
|
||||||
x 433
|
x 90984
|
||||||
`
|
`
|
||||||
if result != resultExpected {
|
if result != resultExpected {
|
||||||
t.Fatalf("unexpected result; got\n%s\nwant\n%s", result, resultExpected)
|
t.Fatalf("unexpected result; got\n%s\nwant\n%s", result, resultExpected)
|
||||||
|
|
|
@ -776,8 +776,9 @@ func (a *aggregator) Push(tss []prompbmarshal.TimeSeries, matchIdxs []byte) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
samples = append(samples, pushSample{
|
samples = append(samples, pushSample{
|
||||||
key: key,
|
key: key,
|
||||||
value: sample.Value,
|
value: sample.Value,
|
||||||
|
timestamp: sample.Timestamp,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -851,8 +852,9 @@ func (ctx *pushCtx) reset() {
|
||||||
}
|
}
|
||||||
|
|
||||||
type pushSample struct {
|
type pushSample struct {
|
||||||
key string
|
key string
|
||||||
value float64
|
value float64
|
||||||
|
timestamp int64
|
||||||
}
|
}
|
||||||
|
|
||||||
func getPushCtx() *pushCtx {
|
func getPushCtx() *pushCtx {
|
||||||
|
|
|
@ -939,7 +939,7 @@ foo{baz="qwe"} -5
|
||||||
bar{baz="qwer"} 343
|
bar{baz="qwer"} 343
|
||||||
bar{baz="qwer"} 344
|
bar{baz="qwer"} 344
|
||||||
foo{baz="qwe"} 10
|
foo{baz="qwe"} 10
|
||||||
`, `bar:1m_sum_samples{baz="qwe"} 2
|
`, `bar:1m_sum_samples{baz="qwe"} 4.34
|
||||||
bar:1m_sum_samples{baz="qwer"} 344
|
bar:1m_sum_samples{baz="qwer"} 344
|
||||||
foo:1m_sum_samples 123
|
foo:1m_sum_samples 123
|
||||||
foo:1m_sum_samples{baz="qwe"} 10
|
foo:1m_sum_samples{baz="qwe"} 10
|
||||||
|
|
Loading…
Reference in a new issue