all: limit the maximum memory usage for regexp cache, which stores parsed regular expressions in MetricsQL queries

Previously the cache could store 10K unique regexps. When every regexp is huge (e.g. hundreds of kilobytes),
then the total cache size could grow to multiples of gigabytes. Now the cache size is limited by the total length
of all cached regexps. So huge regexps won't result in high memory usage for the cache.
This commit is contained in:
Aliaksandr Valialkin 2022-06-24 17:55:17 +03:00
parent bb7f31541f
commit d2bbbf147c
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
5 changed files with 58 additions and 18 deletions

View file

@ -29,6 +29,7 @@ scrape_configs:
- targets: ["host123:8080"] - targets: ["host123:8080"]
``` ```
* BUGFIX: limit max memory occupied by the cache, which stores parsed regular expressions. Previously too long regular expressions passed in [MetricsQL queries](https://docs.victoriametrics.com/MetricsQL.html) could result in big amounts of used memory (e.g. multiple of gigabytes). Now the max cache size for parsed regexps is limited to a a few megabytes.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes.
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767). * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767).

2
go.mod
View file

@ -10,7 +10,7 @@ require (
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.1.0 github.com/VictoriaMetrics/fasthttp v1.1.0
github.com/VictoriaMetrics/metrics v1.18.1 github.com/VictoriaMetrics/metrics v1.18.1
github.com/VictoriaMetrics/metricsql v0.43.0 github.com/VictoriaMetrics/metricsql v0.44.0
github.com/aws/aws-sdk-go v1.44.37 github.com/aws/aws-sdk-go v1.44.37
github.com/cespare/xxhash/v2 v2.1.2 github.com/cespare/xxhash/v2 v2.1.2

4
go.sum
View file

@ -107,8 +107,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0 h1:3crd4YWHsMwu60GUXRH6OstowiFvqrwS4a
github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR2uydjiWvoLp5ZTqQ= github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR2uydjiWvoLp5ZTqQ=
github.com/VictoriaMetrics/metrics v1.18.1 h1:OZ0+kTTto8oPfHnVAnTOoyl0XlRhRkoQrD2n2cOuRw0= github.com/VictoriaMetrics/metrics v1.18.1 h1:OZ0+kTTto8oPfHnVAnTOoyl0XlRhRkoQrD2n2cOuRw0=
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA= github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
github.com/VictoriaMetrics/metricsql v0.43.0 h1:pFkzfExn9GJ1w3tE1pFTkjlyPd4kr/onh5CBAJAZf+s= github.com/VictoriaMetrics/metricsql v0.44.0 h1:zxBVeg9tbm6zl2ft2Ica87ItUWYey02hy3MN2ti1ljg=
github.com/VictoriaMetrics/metricsql v0.43.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= github.com/VictoriaMetrics/metricsql v0.44.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=

View file

@ -29,10 +29,14 @@ func CompileRegexp(re string) (*regexp.Regexp, error) {
return rcv.r, rcv.err return rcv.r, rcv.err
} }
// regexpCacheCharsMax limits the max number of chars stored in regexp cache across all entries.
//
// We limit by number of chars since calculating the exact size of each regexp is problematic,
// while using chars seems like universal approach for short and long regexps.
const regexpCacheCharsMax = 1e6
var regexpCacheV = func() *regexpCache { var regexpCacheV = func() *regexpCache {
rc := &regexpCache{ rc := newRegexpCache(regexpCacheCharsMax)
m: make(map[string]*regexpCacheValue),
}
metrics.NewGauge(`vm_cache_requests_total{type="promql/regexp"}`, func() float64 { metrics.NewGauge(`vm_cache_requests_total{type="promql/regexp"}`, func() float64 {
return float64(rc.Requests()) return float64(rc.Requests())
}) })
@ -42,27 +46,51 @@ var regexpCacheV = func() *regexpCache {
metrics.NewGauge(`vm_cache_entries{type="promql/regexp"}`, func() float64 { metrics.NewGauge(`vm_cache_entries{type="promql/regexp"}`, func() float64 {
return float64(rc.Len()) return float64(rc.Len())
}) })
metrics.NewGauge(`vm_cache_chars_current{type="promql/regexp"}`, func() float64 {
return float64(rc.CharsCurrent())
})
metrics.NewGauge(`vm_cache_chars_max{type="promql/regexp"}`, func() float64 {
return float64(rc.charsLimit)
})
return rc return rc
}() }()
const regexpCacheMaxLen = 10e3
type regexpCacheValue struct { type regexpCacheValue struct {
r *regexp.Regexp r *regexp.Regexp
err error err error
} }
func (rcv *regexpCacheValue) RegexpLen() int {
if r := rcv.r; r != nil {
return len(r.String())
}
return len(rcv.err.Error())
}
type regexpCache struct { type regexpCache struct {
// Move atomic counters to the top of struct for 8-byte alignment on 32-bit arch. // Move atomic counters to the top of struct for 8-byte alignment on 32-bit arch.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212 // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212
requests uint64 requests uint64
misses uint64 misses uint64
// charsCurrent stores the total number of characters used in stored regexps.
// is used for memory usage estimation.
charsCurrent int
// charsLimit is the maximum number of chars the regexpCache can store.
charsLimit int
m map[string]*regexpCacheValue m map[string]*regexpCacheValue
mu sync.RWMutex mu sync.RWMutex
} }
func newRegexpCache(charsLimit int) *regexpCache {
return &regexpCache{
m: make(map[string]*regexpCacheValue),
charsLimit: charsLimit,
}
}
func (rc *regexpCache) Requests() uint64 { func (rc *regexpCache) Requests() uint64 {
return atomic.LoadUint64(&rc.requests) return atomic.LoadUint64(&rc.requests)
} }
@ -71,11 +99,18 @@ func (rc *regexpCache) Misses() uint64 {
return atomic.LoadUint64(&rc.misses) return atomic.LoadUint64(&rc.misses)
} }
func (rc *regexpCache) Len() uint64 { func (rc *regexpCache) Len() int {
rc.mu.RLock() rc.mu.RLock()
n := len(rc.m) n := len(rc.m)
rc.mu.RUnlock() rc.mu.RUnlock()
return uint64(n) return n
}
func (rc *regexpCache) CharsCurrent() int {
rc.mu.RLock()
n := rc.charsCurrent
rc.mu.RUnlock()
return int(n)
} }
func (rc *regexpCache) Get(regexp string) *regexpCacheValue { func (rc *regexpCache) Get(regexp string) *regexpCacheValue {
@ -93,18 +128,22 @@ func (rc *regexpCache) Get(regexp string) *regexpCacheValue {
func (rc *regexpCache) Put(regexp string, rcv *regexpCacheValue) { func (rc *regexpCache) Put(regexp string, rcv *regexpCacheValue) {
rc.mu.Lock() rc.mu.Lock()
overflow := len(rc.m) - regexpCacheMaxLen if rc.charsCurrent > rc.charsLimit {
if overflow > 0 { // Remove items accounting for 10% chars from the cache.
// Remove 10% of items from the cache. overflow := int(float64(rc.charsLimit) * 0.1)
overflow = int(float64(len(rc.m)) * 0.1) for k, v := range rc.m {
for k := range rc.m {
delete(rc.m, k) delete(rc.m, k)
overflow--
size := len(k) + v.RegexpLen()
overflow -= size
rc.charsCurrent -= size
if overflow <= 0 { if overflow <= 0 {
break break
} }
} }
} }
rc.m[regexp] = rcv rc.m[regexp] = rcv
rc.charsCurrent += len(regexp) + rcv.RegexpLen()
rc.mu.Unlock() rc.mu.Unlock()
} }

2
vendor/modules.txt vendored
View file

@ -27,7 +27,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.18.1 # github.com/VictoriaMetrics/metrics v1.18.1
## explicit; go 1.12 ## explicit; go 1.12
github.com/VictoriaMetrics/metrics github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.43.0 # github.com/VictoriaMetrics/metricsql v0.44.0
## explicit; go 1.13 ## explicit; go 1.13
github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop github.com/VictoriaMetrics/metricsql/binaryop