mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
28a9e92b5e
- Reduce memory usage by up to 5x when de-duplicating samples across big number of time series. - Reduce memory usage by up to 5x when aggregating across big number of output time series. - Add lib/promutils.LabelsCompressor, which is going to be used by other VictoriaMetrics components for reducing memory usage for marshaled []prompbmarshal.Label. - Add `dedup_interval` option at aggregation config, which allows setting individual deduplication intervals per each aggregation. - Add `keep_metric_names` option at aggregation config, which allows keeping the original metric names in the output samples. - Add `unique_samples` output, which counts the number of unique sample values. - Add `increase_prometheus` and `total_prometheus` outputs, which ignore the first sample per each newly encountered time series. - Use 64-bit hashes instead of marshaled labels as map keys when calculating `count_series` output. This makes obsolete https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5579 - Expose various metrics, which may help debugging stream aggregation: - vm_streamaggr_dedup_state_size_bytes - the size of data structures responsible for deduplication - vm_streamaggr_dedup_state_items_count - the number of items in the deduplication data structures - vm_streamaggr_labels_compressor_size_bytes - the size of labels compressor data structures - vm_streamaggr_labels_compressor_items_count - the number of entries in the labels compressor - vm_streamaggr_flush_duration_seconds - a histogram, which shows the duration of stream aggregation flushes - vm_streamaggr_dedup_flush_duration_seconds - a histogram, which shows the duration of deduplication flushes - vm_streamaggr_flush_timeouts_total - counter for timed out stream aggregation flushes, which took longer than the configured interval - vm_streamaggr_dedup_flush_timeouts_total - counter for timed out deduplication flushes, which took longer than the configured dedup_interval - Actualize docs/stream-aggregation.md The memory usage reduction increases CPU usage during stream aggregation by up to 30%. This commit is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5850 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5898
125 lines
3.5 KiB
Go
125 lines
3.5 KiB
Go
package promutils
|
|
|
|
import (
|
|
"sync"
|
|
"sync/atomic"
|
|
"unsafe"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
|
)
|
|
|
|
// LabelsCompressor compresses []prompbmarshal.Label into short binary strings
|
|
type LabelsCompressor struct {
|
|
labelToIdx sync.Map
|
|
idxToLabel sync.Map
|
|
|
|
nextIdx atomic.Uint64
|
|
|
|
totalSizeBytes atomic.Uint64
|
|
}
|
|
|
|
// SizeBytes returns the size of lc data in bytes
|
|
func (lc *LabelsCompressor) SizeBytes() uint64 {
|
|
return uint64(unsafe.Sizeof(*lc)) + lc.totalSizeBytes.Load()
|
|
}
|
|
|
|
// ItemsCount returns the number of items in lc
|
|
func (lc *LabelsCompressor) ItemsCount() uint64 {
|
|
return lc.nextIdx.Load()
|
|
}
|
|
|
|
// Compress compresses labels, appends the compressed labels to dst and returns the result.
|
|
func (lc *LabelsCompressor) Compress(dst []byte, labels []prompbmarshal.Label) []byte {
|
|
if len(labels) == 0 {
|
|
// Fast path
|
|
return append(dst, 0)
|
|
}
|
|
|
|
a := encoding.GetUint64s(len(labels) + 1)
|
|
a.A[0] = uint64(len(labels))
|
|
lc.compress(a.A[1:], labels)
|
|
dst = encoding.MarshalVarUint64s(dst, a.A)
|
|
encoding.PutUint64s(a)
|
|
return dst
|
|
}
|
|
|
|
func (lc *LabelsCompressor) compress(dst []uint64, labels []prompbmarshal.Label) {
|
|
if len(labels) == 0 {
|
|
return
|
|
}
|
|
_ = dst[len(labels)-1]
|
|
for i := range labels {
|
|
label := &labels[i]
|
|
v, ok := lc.labelToIdx.Load(*label)
|
|
if !ok {
|
|
v = lc.nextIdx.Add(1)
|
|
labelCopy := cloneLabel(label)
|
|
lc.idxToLabel.Store(v, labelCopy)
|
|
lc.labelToIdx.Store(*labelCopy, v)
|
|
|
|
// Update lc.totalSizeBytes
|
|
labelSizeBytes := uint64(len(label.Name) + len(label.Value))
|
|
entrySizeBytes := labelSizeBytes + uint64(unsafe.Sizeof(label)+unsafe.Sizeof(*label)+2*unsafe.Sizeof(v))
|
|
lc.totalSizeBytes.Add(entrySizeBytes)
|
|
}
|
|
dst[i] = v.(uint64)
|
|
}
|
|
}
|
|
|
|
func cloneLabel(label *prompbmarshal.Label) *prompbmarshal.Label {
|
|
// pre-allocate memory for label name and value
|
|
n := len(label.Name) + len(label.Value)
|
|
buf := make([]byte, 0, n)
|
|
|
|
buf = append(buf, label.Name...)
|
|
labelName := bytesutil.ToUnsafeString(buf)
|
|
|
|
buf = append(buf, label.Value...)
|
|
labelValue := bytesutil.ToUnsafeString(buf[len(labelName):])
|
|
return &prompbmarshal.Label{
|
|
Name: labelName,
|
|
Value: labelValue,
|
|
}
|
|
}
|
|
|
|
// Decompress decompresses src into []prompbmarshal.Label, appends it to dst and returns the result.
|
|
func (lc *LabelsCompressor) Decompress(dst []prompbmarshal.Label, src []byte) []prompbmarshal.Label {
|
|
tail, labelsLen, err := encoding.UnmarshalVarUint64(src)
|
|
if err != nil {
|
|
logger.Panicf("BUG: cannot unmarshal labels length: %s", err)
|
|
}
|
|
if labelsLen == 0 {
|
|
// fast path - nothing to decode
|
|
if len(tail) > 0 {
|
|
logger.Panicf("BUG: unexpected non-empty tail left; len(tail)=%d; tail=%X", len(tail), tail)
|
|
}
|
|
return dst
|
|
}
|
|
|
|
a := encoding.GetUint64s(int(labelsLen))
|
|
tail, err = encoding.UnmarshalVarUint64s(a.A, tail)
|
|
if err != nil {
|
|
logger.Panicf("BUG: cannot unmarshal label indexes: %s", err)
|
|
}
|
|
if len(tail) > 0 {
|
|
logger.Panicf("BUG: unexpected non-empty tail left: len(tail)=%d; tail=%X", len(tail), tail)
|
|
}
|
|
dst = lc.decompress(dst, a.A)
|
|
encoding.PutUint64s(a)
|
|
return dst
|
|
}
|
|
|
|
func (lc *LabelsCompressor) decompress(dst []prompbmarshal.Label, src []uint64) []prompbmarshal.Label {
|
|
for _, idx := range src {
|
|
v, ok := lc.idxToLabel.Load(idx)
|
|
if !ok {
|
|
logger.Panicf("BUG: missing label for idx=%d", idx)
|
|
}
|
|
label := *(v.(*prompbmarshal.Label))
|
|
dst = append(dst, label)
|
|
}
|
|
return dst
|
|
}
|