mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
lib/bytesutil: reduce the number of memory allocations per each interned string in bytesutil.InternString() from 5 to 1
This should reduce GC overhead when tens of millions of strings are interned (for example, during stream deduplication of millions of active time series).
This commit is contained in:
parent
8c8d84e30a
commit
6470eac7dc
1 changed files with 117 additions and 47 deletions
|
@ -19,6 +19,121 @@ var (
|
||||||
"See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache")
|
"See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type internStringMap struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
mutable map[string]string
|
||||||
|
mutableReads uint64
|
||||||
|
|
||||||
|
readonly atomic.Pointer[map[string]internStringMapEntry]
|
||||||
|
|
||||||
|
nextCleanupTime atomic.Uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
type internStringMapEntry struct {
|
||||||
|
deadline uint64
|
||||||
|
s string
|
||||||
|
}
|
||||||
|
|
||||||
|
func newInternStringMap() *internStringMap {
|
||||||
|
ism := &internStringMap{
|
||||||
|
mutable: make(map[string]string),
|
||||||
|
}
|
||||||
|
readonly := make(map[string]internStringMapEntry)
|
||||||
|
ism.readonly.Store(&readonly)
|
||||||
|
ism.nextCleanupTime.Store(fasttime.UnixTimestamp() + 61)
|
||||||
|
return ism
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *internStringMap) getReadonly() map[string]internStringMapEntry {
|
||||||
|
return *m.readonly.Load()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *internStringMap) intern(s string) string {
|
||||||
|
if *disableCache || len(s) > *internStringMaxLen {
|
||||||
|
return strings.Clone(s)
|
||||||
|
}
|
||||||
|
currentTime := fasttime.UnixTimestamp()
|
||||||
|
if currentTime >= m.nextCleanupTime.Load() {
|
||||||
|
m.nextCleanupTime.Store(currentTime + 61)
|
||||||
|
m.cleanup()
|
||||||
|
}
|
||||||
|
|
||||||
|
readonly := m.getReadonly()
|
||||||
|
e, ok := readonly[s]
|
||||||
|
if ok {
|
||||||
|
// Fast path - the string has been found in readonly map
|
||||||
|
return e.s
|
||||||
|
}
|
||||||
|
|
||||||
|
// Slower path - search for the string in mutable map
|
||||||
|
m.mu.Lock()
|
||||||
|
sInterned, ok := m.mutable[s]
|
||||||
|
if !ok {
|
||||||
|
// Verify whether the s has been already registered by concurrent goroutines in m.readonly
|
||||||
|
readonly = m.getReadonly()
|
||||||
|
e, ok = readonly[s]
|
||||||
|
if !ok {
|
||||||
|
// Slowest path - register the string in mutable map.
|
||||||
|
// Make a new copy for s in order to remove references from possible bigger string s refers to.
|
||||||
|
sInterned = strings.Clone(s)
|
||||||
|
m.mutable[sInterned] = sInterned
|
||||||
|
} else {
|
||||||
|
sInterned = e.s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.mutableReads++
|
||||||
|
if m.mutableReads > uint64(len(readonly)) {
|
||||||
|
m.migrateMutableToReadonlyLocked()
|
||||||
|
m.mutableReads = 0
|
||||||
|
}
|
||||||
|
m.mu.Unlock()
|
||||||
|
|
||||||
|
return sInterned
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *internStringMap) migrateMutableToReadonlyLocked() {
|
||||||
|
readonly := m.getReadonly()
|
||||||
|
readonlyCopy := make(map[string]internStringMapEntry, len(readonly)+len(m.mutable))
|
||||||
|
for k, e := range readonly {
|
||||||
|
readonlyCopy[k] = e
|
||||||
|
}
|
||||||
|
deadline := fasttime.UnixTimestamp() + uint64(cacheExpireDuration.Seconds()+0.5)
|
||||||
|
for k, s := range m.mutable {
|
||||||
|
readonlyCopy[k] = internStringMapEntry{
|
||||||
|
s: s,
|
||||||
|
deadline: deadline,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.mutable = make(map[string]string)
|
||||||
|
m.readonly.Store(&readonlyCopy)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *internStringMap) cleanup() {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
|
||||||
|
readonly := m.getReadonly()
|
||||||
|
currentTime := fasttime.UnixTimestamp()
|
||||||
|
needCleanup := false
|
||||||
|
for _, e := range readonly {
|
||||||
|
if e.deadline <= currentTime {
|
||||||
|
needCleanup = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !needCleanup {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
readonlyCopy := make(map[string]internStringMapEntry, len(readonly))
|
||||||
|
for k, e := range readonly {
|
||||||
|
if e.deadline > currentTime {
|
||||||
|
readonlyCopy[k] = e
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.readonly.Store(&readonlyCopy)
|
||||||
|
}
|
||||||
|
|
||||||
func isSkipCache(s string) bool {
|
func isSkipCache(s string) bool {
|
||||||
return *disableCache || len(s) > *internStringMaxLen
|
return *disableCache || len(s) > *internStringMaxLen
|
||||||
}
|
}
|
||||||
|
@ -33,52 +148,7 @@ func InternBytes(b []byte) string {
|
||||||
//
|
//
|
||||||
// This may be needed for reducing the amounts of allocated memory.
|
// This may be needed for reducing the amounts of allocated memory.
|
||||||
func InternString(s string) string {
|
func InternString(s string) string {
|
||||||
if isSkipCache(s) {
|
return ism.intern(s)
|
||||||
// Make a new copy for s in order to remove references from possible bigger string s refers to.
|
|
||||||
// This also protects from cases when s points to unsafe string - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3227
|
|
||||||
return strings.Clone(s)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ct := fasttime.UnixTimestamp()
|
var ism = newInternStringMap()
|
||||||
if v, ok := internStringsMap.Load(s); ok {
|
|
||||||
e := v.(*ismEntry)
|
|
||||||
if e.lastAccessTime.Load()+10 < ct {
|
|
||||||
// Reduce the frequency of e.lastAccessTime update to once per 10 seconds
|
|
||||||
// in order to improve the fast path speed on systems with many CPU cores.
|
|
||||||
e.lastAccessTime.Store(ct)
|
|
||||||
}
|
|
||||||
return e.s
|
|
||||||
}
|
|
||||||
// Make a new copy for s in order to remove references from possible bigger string s refers to.
|
|
||||||
sCopy := strings.Clone(s)
|
|
||||||
e := &ismEntry{
|
|
||||||
s: sCopy,
|
|
||||||
}
|
|
||||||
e.lastAccessTime.Store(ct)
|
|
||||||
internStringsMap.Store(sCopy, e)
|
|
||||||
|
|
||||||
if needCleanup(&internStringsMapLastCleanupTime, ct) {
|
|
||||||
// Perform a global cleanup for internStringsMap by removing items, which weren't accessed during the last 5 minutes.
|
|
||||||
m := &internStringsMap
|
|
||||||
deadline := ct - uint64(cacheExpireDuration.Seconds())
|
|
||||||
m.Range(func(k, v interface{}) bool {
|
|
||||||
e := v.(*ismEntry)
|
|
||||||
if e.lastAccessTime.Load() < deadline {
|
|
||||||
m.Delete(k)
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return sCopy
|
|
||||||
}
|
|
||||||
|
|
||||||
type ismEntry struct {
|
|
||||||
lastAccessTime atomic.Uint64
|
|
||||||
s string
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
internStringsMap sync.Map
|
|
||||||
internStringsMapLastCleanupTime atomic.Uint64
|
|
||||||
)
|
|
||||||
|
|
Loading…
Reference in a new issue