mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
lib/storage: deduplicate samples more thoroughly
Previously some duplicate samples may be left on disk for time series with high churn rate. This may result in higher disk space usage.
This commit is contained in:
parent
92070cbb67
commit
4ff647137a
10 changed files with 173 additions and 17 deletions
|
@ -20,6 +20,7 @@ sort: 15
|
|||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): restore the ability to use `$labels.alertname` in labels templating. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1921).
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): add missing `query` caption to the input field for the query. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1900).
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix navigation over query history with `Ctrl+up/down` and fix zoom relatively to the cursor position. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1936).
|
||||
* BUGFIX: deduplicate samples more thoroughly if [deduplication](https://docs.victoriametrics.com/#deduplication) is enabled. Previously some duplicate samples may be left on disk for time series with high churn rate. This may result in bigger storage space requirements.
|
||||
|
||||
|
||||
## [v1.70.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.70.0)
|
||||
|
|
|
@ -147,12 +147,25 @@ func (b *Block) tooBig() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func (b *Block) deduplicateSamplesDuringMerge(dedupInterval int64) {
|
||||
if len(b.values) == 0 {
|
||||
// Nothing to dedup or the data is already marshaled.
|
||||
func (b *Block) deduplicateSamplesDuringMerge() {
|
||||
if !isDedupEnabled() {
|
||||
// Deduplication is disabled
|
||||
return
|
||||
}
|
||||
// Unmarshal block if it isn't unmarshaled yet in order to apply the de-duplication to unmarshaled samples.
|
||||
if err := b.UnmarshalData(); err != nil {
|
||||
logger.Panicf("FATAL: cannot unmarshal block: %s", err)
|
||||
}
|
||||
srcTimestamps := b.timestamps[b.nextIdx:]
|
||||
if len(srcTimestamps) < 2 {
|
||||
// Nothing to dedup.
|
||||
return
|
||||
}
|
||||
dedupInterval := GetDedupInterval()
|
||||
if dedupInterval <= 0 {
|
||||
// Deduplication is disabled.
|
||||
return
|
||||
}
|
||||
srcValues := b.values[b.nextIdx:]
|
||||
timestamps, values := deduplicateSamplesDuringMerge(srcTimestamps, srcValues, dedupInterval)
|
||||
dedups := len(srcTimestamps) - len(timestamps)
|
||||
|
|
|
@ -184,12 +184,9 @@ func (bsw *blockStreamWriter) MustClose() {
|
|||
}
|
||||
|
||||
// WriteExternalBlock writes b to bsw and updates ph and rowsMerged.
|
||||
func (bsw *blockStreamWriter) WriteExternalBlock(b *Block, ph *partHeader, rowsMerged *uint64, needDedup bool) {
|
||||
func (bsw *blockStreamWriter) WriteExternalBlock(b *Block, ph *partHeader, rowsMerged *uint64) {
|
||||
atomic.AddUint64(rowsMerged, uint64(b.rowsCount()))
|
||||
if needDedup {
|
||||
dedupInterval := GetDedupInterval()
|
||||
b.deduplicateSamplesDuringMerge(dedupInterval)
|
||||
}
|
||||
b.deduplicateSamplesDuringMerge()
|
||||
headerData, timestampsData, valuesData := b.MarshalData(bsw.timestampsBlockOffset, bsw.valuesBlockOffset)
|
||||
usePrevTimestamps := len(bsw.prevTimestampsData) > 0 && bytes.Equal(timestampsData, bsw.prevTimestampsData)
|
||||
if usePrevTimestamps {
|
||||
|
|
|
@ -49,7 +49,7 @@ func benchmarkBlockStreamWriter(b *testing.B, ebs []Block, rowsCount int, writeR
|
|||
|
||||
bsw.InitFromInmemoryPart(&mp)
|
||||
for i := range ebsCopy {
|
||||
bsw.WriteExternalBlock(&ebsCopy[i], &ph, &rowsMerged, false)
|
||||
bsw.WriteExternalBlock(&ebsCopy[i], &ph, &rowsMerged)
|
||||
}
|
||||
bsw.MustClose()
|
||||
mp.Reset()
|
||||
|
|
|
@ -20,6 +20,10 @@ func GetDedupInterval() int64 {
|
|||
|
||||
var globalDedupInterval int64
|
||||
|
||||
func isDedupEnabled() bool {
|
||||
return globalDedupInterval > 0
|
||||
}
|
||||
|
||||
// DeduplicateSamples removes samples from src* if they are closer to each other than dedupInterval in millseconds.
|
||||
func DeduplicateSamples(srcTimestamps []int64, srcValues []float64, dedupInterval int64) ([]int64, []float64) {
|
||||
if !needsDedup(srcTimestamps, dedupInterval) {
|
||||
|
|
|
@ -76,14 +76,14 @@ func mergeBlockStreamsInternal(ph *partHeader, bsw *blockStreamWriter, bsm *bloc
|
|||
if bsm.Block.bh.TSID.Less(&pendingBlock.bh.TSID) {
|
||||
logger.Panicf("BUG: the next TSID=%+v is smaller than the current TSID=%+v", &bsm.Block.bh.TSID, &pendingBlock.bh.TSID)
|
||||
}
|
||||
bsw.WriteExternalBlock(pendingBlock, ph, rowsMerged, true)
|
||||
bsw.WriteExternalBlock(pendingBlock, ph, rowsMerged)
|
||||
pendingBlock.CopyFrom(bsm.Block)
|
||||
continue
|
||||
}
|
||||
if pendingBlock.tooBig() && pendingBlock.bh.MaxTimestamp <= bsm.Block.bh.MinTimestamp {
|
||||
// Fast path - pendingBlock is too big and it doesn't overlap with bsm.Block.
|
||||
// Write the pendingBlock and then deal with bsm.Block.
|
||||
bsw.WriteExternalBlock(pendingBlock, ph, rowsMerged, true)
|
||||
bsw.WriteExternalBlock(pendingBlock, ph, rowsMerged)
|
||||
pendingBlock.CopyFrom(bsm.Block)
|
||||
continue
|
||||
}
|
||||
|
@ -119,13 +119,13 @@ func mergeBlockStreamsInternal(ph *partHeader, bsw *blockStreamWriter, bsm *bloc
|
|||
tmpBlock.timestamps = tmpBlock.timestamps[:maxRowsPerBlock]
|
||||
tmpBlock.values = tmpBlock.values[:maxRowsPerBlock]
|
||||
tmpBlock.fixupTimestamps()
|
||||
bsw.WriteExternalBlock(tmpBlock, ph, rowsMerged, true)
|
||||
bsw.WriteExternalBlock(tmpBlock, ph, rowsMerged)
|
||||
}
|
||||
if err := bsm.Error(); err != nil {
|
||||
return fmt.Errorf("cannot read block to be merged: %w", err)
|
||||
}
|
||||
if !pendingBlockIsEmpty {
|
||||
bsw.WriteExternalBlock(pendingBlock, ph, rowsMerged, true)
|
||||
bsw.WriteExternalBlock(pendingBlock, ph, rowsMerged)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1,11 +1,17 @@
|
|||
package storage
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
)
|
||||
|
||||
// partHeader represents part header.
|
||||
|
@ -21,6 +27,9 @@ type partHeader struct {
|
|||
|
||||
// MaxTimestamp is the maximum timestamp in the part.
|
||||
MaxTimestamp int64
|
||||
|
||||
// MinDedupInterval is minimal dedup interval in milliseconds across all the blocks in the part.
|
||||
MinDedupInterval int64
|
||||
}
|
||||
|
||||
// String returns string representation of ph.
|
||||
|
@ -104,6 +113,10 @@ func (ph *partHeader) ParseFromPath(path string) error {
|
|||
return fmt.Errorf("blocksCount cannot be bigger than rowsCount; got blocksCount=%d, rowsCount=%d", ph.BlocksCount, ph.RowsCount)
|
||||
}
|
||||
|
||||
if err := ph.readMinDedupInterval(path); err != nil {
|
||||
return fmt.Errorf("cannot read min dedup interval: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -113,4 +126,34 @@ func (ph *partHeader) Reset() {
|
|||
ph.BlocksCount = 0
|
||||
ph.MinTimestamp = (1 << 63) - 1
|
||||
ph.MaxTimestamp = -1 << 63
|
||||
ph.MinDedupInterval = 0
|
||||
}
|
||||
|
||||
func (ph *partHeader) readMinDedupInterval(partPath string) error {
|
||||
filePath := partPath + "/min_dedup_interval"
|
||||
data, err := ioutil.ReadFile(filePath)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
// The minimum dedup interval may not exist for old parts.
|
||||
ph.MinDedupInterval = 0
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("cannot read %q: %w", filePath, err)
|
||||
}
|
||||
dedupInterval, err := metricsql.DurationValue(string(data), 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse minimum dedup interval %q at %q: %w", data, filePath, err)
|
||||
}
|
||||
ph.MinDedupInterval = dedupInterval
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ph *partHeader) writeMinDedupInterval(partPath string) error {
|
||||
filePath := partPath + "/min_dedup_interval"
|
||||
dedupInterval := time.Duration(ph.MinDedupInterval) * time.Millisecond
|
||||
data := dedupInterval.String()
|
||||
if err := fs.WriteFileAtomically(filePath, []byte(data)); err != nil {
|
||||
return fmt.Errorf("cannot create %q: %w", filePath, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -835,7 +835,17 @@ func (pt *partition) ForceMergeAllParts() error {
|
|||
// Nothing to merge.
|
||||
return nil
|
||||
}
|
||||
// If len(pws) == 1, then the merge must run anyway, so deleted time series could be removed from the part.
|
||||
|
||||
// Check whether there is enough disk space for merging pws.
|
||||
newPartSize := getPartsSize(pws)
|
||||
maxOutBytes := fs.MustGetFreeSpace(pt.bigPartsPath)
|
||||
if newPartSize > maxOutBytes {
|
||||
freeSpaceNeededBytes := newPartSize - maxOutBytes
|
||||
logger.Warnf("cannot initiate force merge for the partition %s; additional space needed: %d bytes", pt.name, freeSpaceNeededBytes)
|
||||
return nil
|
||||
}
|
||||
|
||||
// If len(pws) == 1, then the merge must run anyway. This allows removing the deleted series and performing de-duplication if needed.
|
||||
if err := pt.mergePartsOptimal(pws, pt.stopCh); err != nil {
|
||||
return fmt.Errorf("cannot force merge %d parts from partition %q: %w", len(pws), pt.name, err)
|
||||
}
|
||||
|
@ -1056,6 +1066,31 @@ func atomicSetBool(p *uint64, b bool) {
|
|||
atomic.StoreUint64(p, v)
|
||||
}
|
||||
|
||||
func (pt *partition) runFinalDedup() error {
|
||||
if !isDedupNeeded(pt) {
|
||||
return nil
|
||||
}
|
||||
t := time.Now()
|
||||
logger.Infof("starting final dedup for partition %s", pt.name)
|
||||
if err := pt.ForceMergeAllParts(); err != nil {
|
||||
return fmt.Errorf("cannot perform final dedup for partition %s: %w", pt.name, err)
|
||||
}
|
||||
logger.Infof("final dedup for partition %s finished in %.3f seconds", pt.name, time.Since(t).Seconds())
|
||||
return nil
|
||||
}
|
||||
|
||||
func isDedupNeeded(pt *partition) bool {
|
||||
pws := pt.GetParts(nil)
|
||||
defer pt.PutParts(pws)
|
||||
dedupInterval := GetDedupInterval()
|
||||
if dedupInterval <= 0 {
|
||||
// The deduplication isn't needed.
|
||||
return false
|
||||
}
|
||||
minDedupInterval := getMinDedupInterval(pws)
|
||||
return minDedupInterval < dedupInterval
|
||||
}
|
||||
|
||||
// mergeParts merges pws.
|
||||
//
|
||||
// Merging is immediately stopped if stopCh is closed.
|
||||
|
@ -1146,6 +1181,11 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}) erro
|
|||
}
|
||||
bsrs = nil
|
||||
|
||||
ph.MinDedupInterval = getMinDedupInterval(pws)
|
||||
if err := ph.writeMinDedupInterval(tmpPartPath); err != nil {
|
||||
return fmt.Errorf("cannot store min dedup interval for part %q: %w", tmpPartPath, err)
|
||||
}
|
||||
|
||||
// Create a transaction for atomic deleting old parts and moving
|
||||
// new part to its destination place.
|
||||
var bb bytesutil.ByteBuffer
|
||||
|
@ -1225,6 +1265,20 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}) erro
|
|||
return nil
|
||||
}
|
||||
|
||||
func getMinDedupInterval(pws []*partWrapper) int64 {
|
||||
if len(pws) == 0 {
|
||||
return 0
|
||||
}
|
||||
dMin := pws[0].p.ph.MinDedupInterval
|
||||
for _, pw := range pws[1:] {
|
||||
d := pw.p.ph.MinDedupInterval
|
||||
if d < dMin {
|
||||
dMin = d
|
||||
}
|
||||
}
|
||||
return dMin
|
||||
}
|
||||
|
||||
func getCompressLevelForRowsCount(rowsCount, blocksCount uint64) int {
|
||||
avgRowsPerBlock := rowsCount / blocksCount
|
||||
if avgRowsPerBlock <= 200 {
|
||||
|
|
|
@ -115,7 +115,7 @@ func (rrm *rawRowsMarshaler) marshalToInmemoryPart(mp *inmemoryPart, rows []rawR
|
|||
|
||||
rrm.auxValues, scale = decimal.AppendFloatToDecimal(rrm.auxValues[:0], rrm.auxFloatValues)
|
||||
tmpBlock.Init(tsid, rrm.auxTimestamps, rrm.auxValues, scale, precisionBits)
|
||||
rrm.bsw.WriteExternalBlock(tmpBlock, ph, &rowsMerged, false)
|
||||
rrm.bsw.WriteExternalBlock(tmpBlock, ph, &rowsMerged)
|
||||
|
||||
tsid = &r.TSID
|
||||
precisionBits = r.PrecisionBits
|
||||
|
@ -125,7 +125,7 @@ func (rrm *rawRowsMarshaler) marshalToInmemoryPart(mp *inmemoryPart, rows []rawR
|
|||
|
||||
rrm.auxValues, scale = decimal.AppendFloatToDecimal(rrm.auxValues[:0], rrm.auxFloatValues)
|
||||
tmpBlock.Init(tsid, rrm.auxTimestamps, rrm.auxValues, scale, precisionBits)
|
||||
rrm.bsw.WriteExternalBlock(tmpBlock, ph, &rowsMerged, false)
|
||||
rrm.bsw.WriteExternalBlock(tmpBlock, ph, &rowsMerged)
|
||||
if rowsMerged != uint64(len(rows)) {
|
||||
logger.Panicf("BUG: unexpected rowsMerged; got %d; want %d", rowsMerged, len(rows))
|
||||
}
|
||||
|
|
|
@ -31,7 +31,8 @@ type table struct {
|
|||
|
||||
stop chan struct{}
|
||||
|
||||
retentionWatcherWG sync.WaitGroup
|
||||
retentionWatcherWG sync.WaitGroup
|
||||
finalDedupWatcherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
// partitionWrapper provides refcounting mechanism for the partition.
|
||||
|
@ -135,6 +136,7 @@ func openTable(path string, getDeletedMetricIDs func() *uint64set.Set, retention
|
|||
tb.addPartitionNolock(pt)
|
||||
}
|
||||
tb.startRetentionWatcher()
|
||||
tb.startFinalDedupWatcher()
|
||||
return tb, nil
|
||||
}
|
||||
|
||||
|
@ -193,6 +195,7 @@ func (tb *table) addPartitionNolock(pt *partition) {
|
|||
func (tb *table) MustClose() {
|
||||
close(tb.stop)
|
||||
tb.retentionWatcherWG.Wait()
|
||||
tb.finalDedupWatcherWG.Wait()
|
||||
|
||||
tb.ptwsLock.Lock()
|
||||
ptws := tb.ptws
|
||||
|
@ -435,6 +438,47 @@ func (tb *table) retentionWatcher() {
|
|||
}
|
||||
}
|
||||
|
||||
func (tb *table) startFinalDedupWatcher() {
|
||||
tb.finalDedupWatcherWG.Add(1)
|
||||
go func() {
|
||||
tb.finalDedupWatcher()
|
||||
tb.finalDedupWatcherWG.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
func (tb *table) finalDedupWatcher() {
|
||||
if !isDedupEnabled() {
|
||||
// Deduplication is disabled.
|
||||
return
|
||||
}
|
||||
f := func() {
|
||||
ptws := tb.GetPartitions(nil)
|
||||
defer tb.PutPartitions(ptws)
|
||||
timestamp := timestampFromTime(time.Now())
|
||||
currentPartitionName := timestampToPartitionName(timestamp)
|
||||
for _, ptw := range ptws {
|
||||
if ptw.pt.name == currentPartitionName {
|
||||
// Do not run final dedup for the current month.
|
||||
continue
|
||||
}
|
||||
if err := ptw.pt.runFinalDedup(); err != nil {
|
||||
logger.Errorf("cannot run final dedup for partition %s: %s", ptw.pt.name, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
t := time.NewTicker(time.Hour)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-tb.stop:
|
||||
return
|
||||
case <-t.C:
|
||||
f()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GetPartitions appends tb's partitions snapshot to dst and returns the result.
|
||||
//
|
||||
// The returned partitions must be passed to PutPartitions
|
||||
|
|
Loading…
Reference in a new issue