From d2bbbf147cc44a9b789ae5cf3e5f1962196fc256 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 24 Jun 2022 17:55:17 +0300 Subject: [PATCH] all: limit the maximum memory usage for regexp cache, which stores parsed regular expressions in MetricsQL queries Previously the cache could store 10K unique regexps. When every regexp is huge (e.g. hundreds of kilobytes), then the total cache size could grow to multiples of gigabytes. Now the cache size is limited by the total length of all cached regexps. So huge regexps won't result in high memory usage for the cache. --- docs/CHANGELOG.md | 1 + go.mod | 2 +- go.sum | 4 +- .../VictoriaMetrics/metricsql/regexp_cache.go | 67 +++++++++++++++---- vendor/modules.txt | 2 +- 5 files changed, 58 insertions(+), 18 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 4dbfc397d..62a3bcc23 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -29,6 +29,7 @@ scrape_configs: - targets: ["host123:8080"] ``` +* BUGFIX: limit max memory occupied by the cache, which stores parsed regular expressions. Previously too long regular expressions passed in [MetricsQL queries](https://docs.victoriametrics.com/MetricsQL.html) could result in big amounts of used memory (e.g. multiple of gigabytes). Now the max cache size for parsed regexps is limited to a a few megabytes. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes. * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767). diff --git a/go.mod b/go.mod index 5e7bcba43..a575c7bf6 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b github.com/VictoriaMetrics/fasthttp v1.1.0 github.com/VictoriaMetrics/metrics v1.18.1 - github.com/VictoriaMetrics/metricsql v0.43.0 + github.com/VictoriaMetrics/metricsql v0.44.0 github.com/aws/aws-sdk-go v1.44.37 github.com/cespare/xxhash/v2 v2.1.2 diff --git a/go.sum b/go.sum index a43e7544a..2432b16ee 100644 --- a/go.sum +++ b/go.sum @@ -107,8 +107,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0 h1:3crd4YWHsMwu60GUXRH6OstowiFvqrwS4a github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR2uydjiWvoLp5ZTqQ= github.com/VictoriaMetrics/metrics v1.18.1 h1:OZ0+kTTto8oPfHnVAnTOoyl0XlRhRkoQrD2n2cOuRw0= github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA= -github.com/VictoriaMetrics/metricsql v0.43.0 h1:pFkzfExn9GJ1w3tE1pFTkjlyPd4kr/onh5CBAJAZf+s= -github.com/VictoriaMetrics/metricsql v0.43.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= +github.com/VictoriaMetrics/metricsql v0.44.0 h1:zxBVeg9tbm6zl2ft2Ica87ItUWYey02hy3MN2ti1ljg= +github.com/VictoriaMetrics/metricsql v0.44.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= diff --git a/vendor/github.com/VictoriaMetrics/metricsql/regexp_cache.go b/vendor/github.com/VictoriaMetrics/metricsql/regexp_cache.go index 327a6dfed..d41e1fc08 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/regexp_cache.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/regexp_cache.go @@ -29,10 +29,14 @@ func CompileRegexp(re string) (*regexp.Regexp, error) { return rcv.r, rcv.err } +// regexpCacheCharsMax limits the max number of chars stored in regexp cache across all entries. +// +// We limit by number of chars since calculating the exact size of each regexp is problematic, +// while using chars seems like universal approach for short and long regexps. +const regexpCacheCharsMax = 1e6 + var regexpCacheV = func() *regexpCache { - rc := ®expCache{ - m: make(map[string]*regexpCacheValue), - } + rc := newRegexpCache(regexpCacheCharsMax) metrics.NewGauge(`vm_cache_requests_total{type="promql/regexp"}`, func() float64 { return float64(rc.Requests()) }) @@ -42,27 +46,51 @@ var regexpCacheV = func() *regexpCache { metrics.NewGauge(`vm_cache_entries{type="promql/regexp"}`, func() float64 { return float64(rc.Len()) }) + metrics.NewGauge(`vm_cache_chars_current{type="promql/regexp"}`, func() float64 { + return float64(rc.CharsCurrent()) + }) + metrics.NewGauge(`vm_cache_chars_max{type="promql/regexp"}`, func() float64 { + return float64(rc.charsLimit) + }) return rc }() -const regexpCacheMaxLen = 10e3 - type regexpCacheValue struct { r *regexp.Regexp err error } +func (rcv *regexpCacheValue) RegexpLen() int { + if r := rcv.r; r != nil { + return len(r.String()) + } + return len(rcv.err.Error()) +} + type regexpCache struct { // Move atomic counters to the top of struct for 8-byte alignment on 32-bit arch. // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212 - requests uint64 misses uint64 + // charsCurrent stores the total number of characters used in stored regexps. + // is used for memory usage estimation. + charsCurrent int + + // charsLimit is the maximum number of chars the regexpCache can store. + charsLimit int + m map[string]*regexpCacheValue mu sync.RWMutex } +func newRegexpCache(charsLimit int) *regexpCache { + return ®expCache{ + m: make(map[string]*regexpCacheValue), + charsLimit: charsLimit, + } +} + func (rc *regexpCache) Requests() uint64 { return atomic.LoadUint64(&rc.requests) } @@ -71,11 +99,18 @@ func (rc *regexpCache) Misses() uint64 { return atomic.LoadUint64(&rc.misses) } -func (rc *regexpCache) Len() uint64 { +func (rc *regexpCache) Len() int { rc.mu.RLock() n := len(rc.m) rc.mu.RUnlock() - return uint64(n) + return n +} + +func (rc *regexpCache) CharsCurrent() int { + rc.mu.RLock() + n := rc.charsCurrent + rc.mu.RUnlock() + return int(n) } func (rc *regexpCache) Get(regexp string) *regexpCacheValue { @@ -93,18 +128,22 @@ func (rc *regexpCache) Get(regexp string) *regexpCacheValue { func (rc *regexpCache) Put(regexp string, rcv *regexpCacheValue) { rc.mu.Lock() - overflow := len(rc.m) - regexpCacheMaxLen - if overflow > 0 { - // Remove 10% of items from the cache. - overflow = int(float64(len(rc.m)) * 0.1) - for k := range rc.m { + if rc.charsCurrent > rc.charsLimit { + // Remove items accounting for 10% chars from the cache. + overflow := int(float64(rc.charsLimit) * 0.1) + for k, v := range rc.m { delete(rc.m, k) - overflow-- + + size := len(k) + v.RegexpLen() + overflow -= size + rc.charsCurrent -= size + if overflow <= 0 { break } } } rc.m[regexp] = rcv + rc.charsCurrent += len(regexp) + rcv.RegexpLen() rc.mu.Unlock() } diff --git a/vendor/modules.txt b/vendor/modules.txt index 2b87ef92b..f76d20b22 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -27,7 +27,7 @@ github.com/VictoriaMetrics/fasthttp/stackless # github.com/VictoriaMetrics/metrics v1.18.1 ## explicit; go 1.12 github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.43.0 +# github.com/VictoriaMetrics/metricsql v0.44.0 ## explicit; go 1.13 github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop