From 46127b432d27eb6fda5626773a37b15f976d8ed1 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 27 Feb 2023 14:15:49 -0800 Subject: [PATCH] lib/bytesutil: add `-internStringDisableCache` and `-internStringCacheExpireDuration` command-line flags This commit is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3872 --- README.md | 6 ++++- app/vmagent/README.md | 6 ++++- docs/CHANGELOG.md | 2 ++ docs/Cluster-VictoriaMetrics.md | 18 +++++++++++--- docs/README.md | 6 ++++- docs/Single-server-VictoriaMetrics.md | 6 ++++- docs/vmagent.md | 6 ++++- lib/bytesutil/fast_string_matcher.go | 7 +++++- lib/bytesutil/fast_string_transformer.go | 13 +++++++++- lib/bytesutil/internstring.go | 30 +++++++++++++++++------- 10 files changed, 81 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index e357108c5..3f54e17f2 100644 --- a/README.md +++ b/README.md @@ -2228,8 +2228,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li The interval for guaranteed saving of in-memory data to disk. The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). Smaller intervals increase disk IO load. Minimum supported value is 1s (default 5s) -insert.maxQueueDuration duration The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s) + -internStringCacheExpireDuration duration + The expire duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s) + -internStringDisableCache + Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen -internStringMaxLen int - The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500) + The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500) -logNewSeries Whether to log new series. This option is for debug purposes only. It can lead to performance issues when big number of new series are ingested into VictoriaMetrics -loggerDisableTimestamps diff --git a/app/vmagent/README.md b/app/vmagent/README.md index 09493e903..03e1f6f14 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -1241,8 +1241,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . Trim timestamps for InfluxDB line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms) -insert.maxQueueDuration duration The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s) + -internStringCacheExpireDuration duration + The expire duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s) + -internStringDisableCache + Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen -internStringMaxLen int - The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500) + The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500) -kafka.consumer.topic array Kafka topic names for data consumption. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html Supports an array of values separated by comma or specified via multiple flags. diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 479f75ed0..0b3724a6b 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -18,6 +18,8 @@ The following tip changes can be tested by building VictoriaMetrics components f * FEATURE: add `-snapshotCreateTimeout` flag to allow configuring timeout for [snapshot process](https://docs.victoriametrics.com/#how-to-work-with-snapshots). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3551). * FEATURE: expose `vm_http_requests_total` and `vm_http_request_errors_total` metrics for `snapshot/*` [paths](https://docs.victoriametrics.com/#how-to-work-with-snapshots) at [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) `vmstorage` and [VictoriaMetrics Single](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3551). * FEATURE: [vmgateway](https://docs.victoriametrics.com/vmgateway.html): add the ability to discover keys for JWT verification via [OpenID discovery endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html). See [these docs](https://docs.victoriametrics.com/vmgateway.html#using-openid-discovery-endpoint-for-jwt-signature-verification). +* FEATURE: add `-internStringDisableCache` command-line flag for disabling the cache for [interned strings](https://en.wikipedia.org/wiki/String_interning). This flag may be useful in some cases for reducing memory usage at the cost of higher CPU usage. +* FEATURE: add `-internStringCacheExpireDuration` command-line flag for controlling the lifetime of cached [interned strings](https://en.wikipedia.org/wiki/String_interning). * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): fix panic when executing the query `aggr_func(rollup*(some_value))`. The panic has been introduced in [v1.88.0](https://docs.victoriametrics.com/CHANGELOG.html#v1880). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): use the provided `-remoteWrite.*` auth options when determining whether the remote storage supports [VictoriaMetrics remote write protocol](https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol). Previously the auth options were ignored. This was preventing from automatic switch to VictoriaMetrics remote write protocol. diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md index 29ca1819e..8b8105681 100644 --- a/docs/Cluster-VictoriaMetrics.md +++ b/docs/Cluster-VictoriaMetrics.md @@ -894,8 +894,12 @@ Below is the output for `/path/to/vminsert -help`: Trim timestamps for InfluxDB line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms) -insert.maxQueueDuration duration The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s) + -internStringCacheExpireDuration duration + The expire duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s) + -internStringDisableCache + Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen -internStringMaxLen int - The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500) + The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500) -loggerDisableTimestamps Whether to disable writing timestamps in logs -loggerErrorsPerSecondLimit int @@ -1074,8 +1078,12 @@ Below is the output for `/path/to/vmselect -help`: Address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8481") -httpListenAddr.useProxyProtocol Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt + -internStringCacheExpireDuration duration + The expire duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s) + -internStringDisableCache + Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen -internStringMaxLen int - The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500) + The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500) -loggerDisableTimestamps Whether to disable writing timestamps in logs -loggerErrorsPerSecondLimit int @@ -1292,8 +1300,12 @@ Below is the output for `/path/to/vmstorage -help`: Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt -insert.maxQueueDuration duration The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s) + -internStringCacheExpireDuration duration + The expire duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s) + -internStringDisableCache + Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen -internStringMaxLen int - The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500) + The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500) -logNewSeries Whether to log new series. This option is for debug purposes only. It can lead to performance issues when big number of new series are ingested into VictoriaMetrics -loggerDisableTimestamps diff --git a/docs/README.md b/docs/README.md index 7e17ca5a5..eb7a413f9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2229,8 +2229,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li The interval for guaranteed saving of in-memory data to disk. The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). Smaller intervals increase disk IO load. Minimum supported value is 1s (default 5s) -insert.maxQueueDuration duration The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s) + -internStringCacheExpireDuration duration + The expire duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s) + -internStringDisableCache + Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen -internStringMaxLen int - The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500) + The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500) -logNewSeries Whether to log new series. This option is for debug purposes only. It can lead to performance issues when big number of new series are ingested into VictoriaMetrics -loggerDisableTimestamps diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index 143f6d151..ddaf0f2ed 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -2232,8 +2232,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li The interval for guaranteed saving of in-memory data to disk. The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). Smaller intervals increase disk IO load. Minimum supported value is 1s (default 5s) -insert.maxQueueDuration duration The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s) + -internStringCacheExpireDuration duration + The expire duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s) + -internStringDisableCache + Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen -internStringMaxLen int - The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500) + The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500) -logNewSeries Whether to log new series. This option is for debug purposes only. It can lead to performance issues when big number of new series are ingested into VictoriaMetrics -loggerDisableTimestamps diff --git a/docs/vmagent.md b/docs/vmagent.md index 2550137f2..f316f16f6 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -1245,8 +1245,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . Trim timestamps for InfluxDB line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms) -insert.maxQueueDuration duration The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s) + -internStringCacheExpireDuration duration + The expire duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s) + -internStringDisableCache + Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen -internStringMaxLen int - The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500) + The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500) -kafka.consumer.topic array Kafka topic names for data consumption. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html Supports an array of values separated by comma or specified via multiple flags. diff --git a/lib/bytesutil/fast_string_matcher.go b/lib/bytesutil/fast_string_matcher.go index 619bb325d..1b8d02b93 100644 --- a/lib/bytesutil/fast_string_matcher.go +++ b/lib/bytesutil/fast_string_matcher.go @@ -37,6 +37,10 @@ func NewFastStringMatcher(matchFunc func(s string) bool) *FastStringMatcher { // Match applies matchFunc to s and returns the result. func (fsm *FastStringMatcher) Match(s string) bool { + if isSkipCache(s) { + return fsm.matchFunc(s) + } + ct := fasttime.UnixTimestamp() v, ok := fsm.m.Load(s) if ok { @@ -65,9 +69,10 @@ func (fsm *FastStringMatcher) Match(s string) bool { if needCleanup(&fsm.lastCleanupTime, ct) { // Perform a global cleanup for fsm.m by removing items, which weren't accessed during the last 5 minutes. m := &fsm.m + deadline := ct - uint64(cacheExpireDuration.Seconds()) m.Range(func(k, v interface{}) bool { e := v.(*fsmEntry) - if atomic.LoadUint64(&e.lastAccessTime)+5*60 < ct { + if atomic.LoadUint64(&e.lastAccessTime) < deadline { m.Delete(k) } return true diff --git a/lib/bytesutil/fast_string_transformer.go b/lib/bytesutil/fast_string_transformer.go index 670f2c3f8..4dcc930d1 100644 --- a/lib/bytesutil/fast_string_transformer.go +++ b/lib/bytesutil/fast_string_transformer.go @@ -37,6 +37,16 @@ func NewFastStringTransformer(transformFunc func(s string) string) *FastStringTr // Transform applies transformFunc to s and returns the result. func (fst *FastStringTransformer) Transform(s string) string { + if isSkipCache(s) { + sTransformed := fst.transformFunc(s) + if sTransformed == s { + // Clone a string in order to protect from cases when s contains unsafe string. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3227 + sTransformed = strings.Clone(sTransformed) + } + return sTransformed + } + ct := fasttime.UnixTimestamp() v, ok := fst.m.Load(s) if ok { @@ -70,9 +80,10 @@ func (fst *FastStringTransformer) Transform(s string) string { if needCleanup(&fst.lastCleanupTime, ct) { // Perform a global cleanup for fst.m by removing items, which weren't accessed during the last 5 minutes. m := &fst.m + deadline := ct - uint64(cacheExpireDuration.Seconds()) m.Range(func(k, v interface{}) bool { e := v.(*fstEntry) - if atomic.LoadUint64(&e.lastAccessTime)+5*60 < ct { + if atomic.LoadUint64(&e.lastAccessTime) < deadline { m.Delete(k) } return true diff --git a/lib/bytesutil/internstring.go b/lib/bytesutil/internstring.go index 600758c8a..4ac49a290 100644 --- a/lib/bytesutil/internstring.go +++ b/lib/bytesutil/internstring.go @@ -5,12 +5,23 @@ import ( "strings" "sync" "sync/atomic" + "time" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime" ) -var internStringMaxLen = flag.Int("internStringMaxLen", 500, "The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. "+ - "See https://en.wikipedia.org/wiki/String_interning") +var ( + internStringMaxLen = flag.Int("internStringMaxLen", 500, "The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. "+ + "See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration") + disableCache = flag.Bool("internStringDisableCache", false, "Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. "+ + "See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen") + cacheExpireDuration = flag.Duration("internStringCacheExpireDuration", 6*time.Minute, "The expire duration for caches for interned strings. "+ + "See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache") +) + +func isSkipCache(s string) bool { + return *disableCache || len(s) > *internStringMaxLen +} // InternBytes interns b as a string func InternBytes(b []byte) string { @@ -22,6 +33,12 @@ func InternBytes(b []byte) string { // // This may be needed for reducing the amounts of allocated memory. func InternString(s string) string { + if isSkipCache(s) { + // Make a new copy for s in order to remove references from possible bigger string s refers to. + // This also protects from cases when s points to unsafe string - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3227 + return strings.Clone(s) + } + ct := fasttime.UnixTimestamp() if v, ok := internStringsMap.Load(s); ok { e := v.(*ismEntry) @@ -34,12 +51,6 @@ func InternString(s string) string { } // Make a new copy for s in order to remove references from possible bigger string s refers to. sCopy := strings.Clone(s) - if len(sCopy) > *internStringMaxLen { - // Do not intern long strings, since this may result in high memory usage - // like in https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3692 - return sCopy - } - e := &ismEntry{ lastAccessTime: ct, s: sCopy, @@ -49,9 +60,10 @@ func InternString(s string) string { if needCleanup(&internStringsMapLastCleanupTime, ct) { // Perform a global cleanup for internStringsMap by removing items, which weren't accessed during the last 5 minutes. m := &internStringsMap + deadline := ct - uint64(cacheExpireDuration.Seconds()) m.Range(func(k, v interface{}) bool { e := v.(*ismEntry) - if atomic.LoadUint64(&e.lastAccessTime)+5*60 < ct { + if atomic.LoadUint64(&e.lastAccessTime) < deadline { m.Delete(k) } return true