app/vmselect: log the metric which trigger rollup result cache reset

This should help finding the source of stale metrics
This commit is contained in:
Aliaksandr Valialkin 2021-03-25 21:30:41 +02:00
parent 50f790b5d7
commit aa81039b42
2 changed files with 10 additions and 1 deletions

View file

@ -29,12 +29,16 @@ var (
// ResetRollupResultCacheIfNeeded resets rollup result cache if mrs contains timestamps outside `now - search.cacheTimestampOffset`. // ResetRollupResultCacheIfNeeded resets rollup result cache if mrs contains timestamps outside `now - search.cacheTimestampOffset`.
func ResetRollupResultCacheIfNeeded(mrs []storage.MetricRow) { func ResetRollupResultCacheIfNeeded(mrs []storage.MetricRow) {
checkRollupResultCacheResetOnce.Do(func() { checkRollupResultCacheResetOnce.Do(func() {
rollupResultResetMetricRowSample.Store(&storage.MetricRow{})
go checkRollupResultCacheReset() go checkRollupResultCacheReset()
}) })
minTimestamp := int64(fasttime.UnixTimestamp()*1000) - cacheTimestampOffset.Milliseconds() + checkRollupResultCacheResetInterval.Milliseconds() minTimestamp := int64(fasttime.UnixTimestamp()*1000) - cacheTimestampOffset.Milliseconds() + checkRollupResultCacheResetInterval.Milliseconds()
needCacheReset := false needCacheReset := false
for i := range mrs { for i := range mrs {
if mrs[i].Timestamp < minTimestamp { if mrs[i].Timestamp < minTimestamp {
var mr storage.MetricRow
mr.CopyFrom(&mrs[i])
rollupResultResetMetricRowSample.Store(&mr)
needCacheReset = true needCacheReset = true
break break
} }
@ -49,6 +53,10 @@ func checkRollupResultCacheReset() {
for { for {
time.Sleep(checkRollupResultCacheResetInterval) time.Sleep(checkRollupResultCacheResetInterval)
if atomic.SwapUint32(&needRollupResultCacheReset, 0) > 0 { if atomic.SwapUint32(&needRollupResultCacheReset, 0) > 0 {
mr := rollupResultResetMetricRowSample.Load().(*storage.MetricRow)
d := int64(fasttime.UnixTimestamp()*1000) - mr.Timestamp - cacheTimestampOffset.Milliseconds()
logger.Warnf("resetting rollup result cache because the metric %s has a timestamp older than -search.cacheTimestampOffset=%s by %.3fs",
mr.String(), cacheTimestampOffset, float64(d)/1e3)
ResetRollupResultCache() ResetRollupResultCache()
} }
} }
@ -58,6 +66,7 @@ const checkRollupResultCacheResetInterval = 5 * time.Second
var needRollupResultCacheReset uint32 var needRollupResultCacheReset uint32
var checkRollupResultCacheResetOnce sync.Once var checkRollupResultCacheResetOnce sync.Once
var rollupResultResetMetricRowSample atomic.Value
var rollupResultCacheV = &rollupResultCache{ var rollupResultCacheV = &rollupResultCache{
c: workingsetcache.New(1024*1024, time.Hour), // This is a cache for testing. c: workingsetcache.New(1024*1024, time.Hour), // This is a cache for testing.

View file

@ -1272,7 +1272,7 @@ func (mr *MetricRow) String() string {
if err := mn.unmarshalRaw(mr.MetricNameRaw); err == nil { if err := mn.unmarshalRaw(mr.MetricNameRaw); err == nil {
metricName = mn.String() metricName = mn.String()
} }
return fmt.Sprintf("MetricName=%s, Timestamp=%d, Value=%f\n", metricName, mr.Timestamp, mr.Value) return fmt.Sprintf("%s (Timestamp=%d, Value=%f)", metricName, mr.Timestamp, mr.Value)
} }
// Marshal appends marshaled mr to dst and returns the result. // Marshal appends marshaled mr to dst and returns the result.