mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
e1cf962bad
Previously all the newly ingested time series were registered in global `MetricName -> TSID` index.
This index was used during data ingestion for locating the TSID (internal series id)
for the given canonical metric name (the canonical metric name consists of metric name plus all its labels sorted by label names).
The `MetricName -> TSID` index is stored on disk in order to make sure that the data
isn't lost on VictoriaMetrics restart or unclean shutdown.
The lookup in this index is relatively slow, since VictoriaMetrics needs to read the corresponding
data block from disk, unpack it, put the unpacked block into `indexdb/dataBlocks` cache,
and then search for the given `MetricName -> TSID` entry there. So VictoriaMetrics
uses in-memory cache for speeding up the lookup for active time series.
This cache is named `storage/tsid`. If this cache capacity is enough for all the currently ingested
active time series, then VictoriaMetrics works fast, since it doesn't need to read the data from disk.
VictoriaMetrics starts reading data from `MetricName -> TSID` on-disk index in the following cases:
- If `storage/tsid` cache capacity isn't enough for active time series.
Then just increase available memory for VictoriaMetrics or reduce the number of active time series
ingested into VictoriaMetrics.
- If new time series is ingested into VictoriaMetrics. In this case it cannot find
the needed entry in the `storage/tsid` cache, so it needs to consult on-disk `MetricName -> TSID` index,
since it doesn't know that the index has no the corresponding entry too.
This is a typical event under high churn rate, when old time series are constantly substituted
with new time series.
Reading the data from `MetricName -> TSID` index is slow, so inserts, which lead to reading this index,
are counted as slow inserts, and they can be monitored via `vm_slow_row_inserts_total` metric exposed by VictoriaMetrics.
Prior to this commit the `MetricName -> TSID` index was global, e.g. it contained entries sorted by `MetricName`
for all the time series ever ingested into VictoriaMetrics during the configured -retentionPeriod.
This index can become very large under high churn rate and long retention. VictoriaMetrics
caches data from this index in `indexdb/dataBlocks` in-memory cache for speeding up index lookups.
The `indexdb/dataBlocks` cache may occupy significant share of available memory for storing
recently accessed blocks at `MetricName -> TSID` index when searching for newly ingested time series.
This commit switches from global `MetricName -> TSID` index to per-day index. This allows significantly
reducing the amounts of data, which needs to be cached in `indexdb/dataBlocks`, since now VictoriaMetrics
consults only the index for the current day when new time series is ingested into it.
The downside of this change is increased indexdb size on disk for workloads without high churn rate,
e.g. with static time series, which do no change over time, since now VictoriaMetrics needs to store
identical `MetricName -> TSID` entries for static time series for every day.
This change removes an optimization for reducing CPU and disk IO spikes at indexdb rotation,
since it didn't work correctly - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1401 .
At the same time the change fixes the issue, which could result in lost access to time series,
which stop receving new samples during the first hour after indexdb rotation - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2698
The issue with the increased CPU and disk IO usage during indexdb rotation will be addressed
in a separate commit according to https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1401#issuecomment-1553488685
This is a follow-up for 1f28b46ae9
339 lines
10 KiB
Go
339 lines
10 KiB
Go
package storage
|
|
|
|
import (
|
|
"fmt"
|
|
"regexp"
|
|
"strconv"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
|
)
|
|
|
|
func BenchmarkRegexpFilterMatch(b *testing.B) {
|
|
b.ReportAllocs()
|
|
b.RunParallel(func(pb *testing.PB) {
|
|
re := regexp.MustCompile(`.*foo-bar-baz.*`)
|
|
b := []byte("fdsffd foo-bar-baz assd fdsfad dasf dsa")
|
|
for pb.Next() {
|
|
if !re.Match(b) {
|
|
panic("BUG: regexp must match!")
|
|
}
|
|
b[0]++
|
|
}
|
|
})
|
|
}
|
|
|
|
func BenchmarkRegexpFilterMismatch(b *testing.B) {
|
|
b.ReportAllocs()
|
|
b.RunParallel(func(pb *testing.PB) {
|
|
re := regexp.MustCompile(`.*foo-bar-baz.*`)
|
|
b := []byte("fdsffd foo-bar sfddsf assd nmn,mfdsdsakj")
|
|
for pb.Next() {
|
|
if re.Match(b) {
|
|
panic("BUG: regexp mustn't match!")
|
|
}
|
|
b[0]++
|
|
}
|
|
})
|
|
}
|
|
|
|
func BenchmarkIndexDBAddTSIDs(b *testing.B) {
|
|
const path = "BenchmarkIndexDBAddTSIDs"
|
|
s := MustOpenStorage(path, maxRetentionMsecs, 0, 0)
|
|
db := s.idb()
|
|
|
|
const recordsPerLoop = 1e3
|
|
|
|
var goroutineID uint32
|
|
|
|
b.ReportAllocs()
|
|
b.SetBytes(recordsPerLoop)
|
|
b.ResetTimer()
|
|
b.RunParallel(func(pb *testing.PB) {
|
|
var mn MetricName
|
|
var genTSID generationTSID
|
|
mn.AccountID = atomic.AddUint32(&goroutineID, 1)
|
|
|
|
// The most common tags.
|
|
mn.Tags = []Tag{
|
|
{
|
|
Key: []byte("job"),
|
|
},
|
|
{
|
|
Key: []byte("instance"),
|
|
},
|
|
}
|
|
|
|
startOffset := 0
|
|
for pb.Next() {
|
|
benchmarkIndexDBAddTSIDs(db, &genTSID, &mn, startOffset, recordsPerLoop)
|
|
startOffset += recordsPerLoop
|
|
}
|
|
})
|
|
b.StopTimer()
|
|
|
|
s.MustClose()
|
|
fs.MustRemoveAll(path)
|
|
}
|
|
|
|
func benchmarkIndexDBAddTSIDs(db *indexDB, genTSID *generationTSID, mn *MetricName, startOffset, recordsPerLoop int) {
|
|
date := uint64(0)
|
|
var metricNameRaw []byte
|
|
is := db.getIndexSearch(0, 0, noDeadline)
|
|
defer db.putIndexSearch(is)
|
|
for i := 0; i < recordsPerLoop; i++ {
|
|
mn.MetricGroup = strconv.AppendUint(mn.MetricGroup[:0], uint64(i+startOffset), 10)
|
|
for j := range mn.Tags {
|
|
mn.Tags[j].Value = strconv.AppendUint(mn.Tags[j].Value[:0], uint64(i*j), 16)
|
|
}
|
|
mn.sortTags()
|
|
|
|
metricNameRaw = mn.marshalRaw(metricNameRaw[:0])
|
|
generateTSID(&genTSID.TSID, mn)
|
|
genTSID.generation = db.generation
|
|
db.s.createAllIndexesForMetricName(is, mn, metricNameRaw, genTSID, date)
|
|
}
|
|
}
|
|
|
|
func BenchmarkHeadPostingForMatchers(b *testing.B) {
|
|
// This benchmark is equivalent to https://github.com/prometheus/prometheus/blob/23c0299d85bfeb5d9b59e994861553a25ca578e5/tsdb/head_bench_test.go#L52
|
|
// See https://www.robustperception.io/evaluating-performance-and-correctness for more details.
|
|
const path = "BenchmarkHeadPostingForMatchers"
|
|
s := MustOpenStorage(path, maxRetentionMsecs, 0, 0)
|
|
db := s.idb()
|
|
|
|
// Fill the db with data as in https://github.com/prometheus/prometheus/blob/23c0299d85bfeb5d9b59e994861553a25ca578e5/tsdb/head_bench_test.go#L66
|
|
const accountID = 34327843
|
|
const projectID = 893433
|
|
is := db.getIndexSearch(0, 0, noDeadline)
|
|
defer db.putIndexSearch(is)
|
|
var mn MetricName
|
|
var metricNameRaw []byte
|
|
var genTSID generationTSID
|
|
date := uint64(0)
|
|
addSeries := func(kvs ...string) {
|
|
mn.Reset()
|
|
for i := 0; i < len(kvs); i += 2 {
|
|
mn.AddTag(kvs[i], kvs[i+1])
|
|
}
|
|
mn.sortTags()
|
|
mn.AccountID = accountID
|
|
mn.ProjectID = projectID
|
|
metricNameRaw = mn.marshalRaw(metricNameRaw[:0])
|
|
generateTSID(&genTSID.TSID, &mn)
|
|
genTSID.generation = db.generation
|
|
db.s.createAllIndexesForMetricName(is, &mn, metricNameRaw, &genTSID, date)
|
|
}
|
|
for n := 0; n < 10; n++ {
|
|
ns := strconv.Itoa(n)
|
|
for i := 0; i < 100000; i++ {
|
|
ix := strconv.Itoa(i)
|
|
addSeries("i", ix, "n", ns, "j", "foo")
|
|
// Have some series that won't be matched, to properly test inverted matches.
|
|
addSeries("i", ix, "n", ns, "j", "bar")
|
|
addSeries("i", ix, "n", "0_"+ns, "j", "bar")
|
|
addSeries("i", ix, "n", "1_"+ns, "j", "bar")
|
|
addSeries("i", ix, "n", "2_"+ns, "j", "foo")
|
|
}
|
|
}
|
|
|
|
// Make sure all the items can be searched.
|
|
db.s.DebugFlush()
|
|
b.ResetTimer()
|
|
|
|
benchSearch := func(b *testing.B, tfs *TagFilters, expectedMetricIDs int) {
|
|
is := db.getIndexSearch(tfs.accountID, tfs.projectID, noDeadline)
|
|
tfss := []*TagFilters{tfs}
|
|
tr := TimeRange{
|
|
MinTimestamp: 0,
|
|
MaxTimestamp: timestampFromTime(time.Now()),
|
|
}
|
|
for i := 0; i < b.N; i++ {
|
|
metricIDs, err := is.searchMetricIDs(nil, tfss, tr, 2e9)
|
|
if err != nil {
|
|
b.Fatalf("unexpected error in searchMetricIDs: %s", err)
|
|
}
|
|
if len(metricIDs) != expectedMetricIDs {
|
|
b.Fatalf("unexpected metricIDs found; got %d; want %d", len(metricIDs), expectedMetricIDs)
|
|
}
|
|
}
|
|
db.putIndexSearch(is)
|
|
}
|
|
addTagFilter := func(tfs *TagFilters, key, value string, isNegative, isRegexp bool) {
|
|
if err := tfs.Add([]byte(key), []byte(value), isNegative, isRegexp); err != nil {
|
|
b.Fatalf("cannot add tag filter %q=%q, isNegative=%v, isRegexp=%v", key, value, isNegative, isRegexp)
|
|
}
|
|
}
|
|
|
|
b.Run(`n="1"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
benchSearch(b, tfs, 2e5)
|
|
})
|
|
b.Run(`n="1",j="foo"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
addTagFilter(tfs, "j", "foo", false, false)
|
|
benchSearch(b, tfs, 1e5)
|
|
})
|
|
b.Run(`j="foo",n="1"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "j", "foo", false, false)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
benchSearch(b, tfs, 1e5)
|
|
})
|
|
b.Run(`n="1",j!="foo"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
addTagFilter(tfs, "j", "foo", true, false)
|
|
benchSearch(b, tfs, 1e5)
|
|
})
|
|
b.Run(`i=~".*"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "i", ".*", false, true)
|
|
benchSearch(b, tfs, 0)
|
|
})
|
|
b.Run(`i=~".+"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "i", ".+", false, true)
|
|
benchSearch(b, tfs, 5e6)
|
|
})
|
|
b.Run(`i=~""`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "i", "", false, true)
|
|
benchSearch(b, tfs, 0)
|
|
})
|
|
b.Run(`i!=""`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "i", "", true, false)
|
|
benchSearch(b, tfs, 5e6)
|
|
})
|
|
b.Run(`n="1",i=~".*",j="foo"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
addTagFilter(tfs, "i", ".*", false, true)
|
|
addTagFilter(tfs, "j", "foo", false, false)
|
|
benchSearch(b, tfs, 1e5)
|
|
})
|
|
b.Run(`n="1",i=~".*",i!="2",j="foo"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
addTagFilter(tfs, "i", ".*", false, true)
|
|
addTagFilter(tfs, "i", "2", true, false)
|
|
addTagFilter(tfs, "j", "foo", false, false)
|
|
benchSearch(b, tfs, 1e5-1)
|
|
})
|
|
b.Run(`n="1",i!=""`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
addTagFilter(tfs, "i", "", true, false)
|
|
benchSearch(b, tfs, 2e5)
|
|
})
|
|
b.Run(`n="1",i!="",j="foo"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
addTagFilter(tfs, "i", "", true, false)
|
|
addTagFilter(tfs, "j", "foo", false, false)
|
|
benchSearch(b, tfs, 1e5)
|
|
})
|
|
b.Run(`n="1",i=~".+",j="foo"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
addTagFilter(tfs, "i", ".+", false, true)
|
|
addTagFilter(tfs, "j", "foo", false, false)
|
|
benchSearch(b, tfs, 1e5)
|
|
})
|
|
b.Run(`n="1",i=~"1.+",j="foo"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
addTagFilter(tfs, "i", "1.+", false, true)
|
|
addTagFilter(tfs, "j", "foo", false, false)
|
|
benchSearch(b, tfs, 11110)
|
|
})
|
|
b.Run(`n="1",i=~".+",i!="2",j="foo"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
addTagFilter(tfs, "i", ".+", false, true)
|
|
addTagFilter(tfs, "i", "2", true, false)
|
|
addTagFilter(tfs, "j", "foo", false, false)
|
|
benchSearch(b, tfs, 1e5-1)
|
|
})
|
|
b.Run(`n="1",i=~".+",i!~"2.*",j="foo"`, func(b *testing.B) {
|
|
tfs := NewTagFilters(accountID, projectID)
|
|
addTagFilter(tfs, "n", "1", false, false)
|
|
addTagFilter(tfs, "i", ".+", false, true)
|
|
addTagFilter(tfs, "i", "2.*", true, true)
|
|
addTagFilter(tfs, "j", "foo", false, false)
|
|
benchSearch(b, tfs, 88889)
|
|
})
|
|
|
|
s.MustClose()
|
|
fs.MustRemoveAll(path)
|
|
}
|
|
|
|
func BenchmarkIndexDBGetTSIDs(b *testing.B) {
|
|
const path = "BenchmarkIndexDBGetTSIDs"
|
|
s := MustOpenStorage(path, maxRetentionMsecs, 0, 0)
|
|
db := s.idb()
|
|
|
|
const recordsPerLoop = 1000
|
|
const accountsCount = 111
|
|
const projectsCount = 33333
|
|
const recordsCount = 1e5
|
|
|
|
// Fill the db with recordsCount records.
|
|
var mn MetricName
|
|
mn.MetricGroup = []byte("rps")
|
|
for i := 0; i < 2; i++ {
|
|
key := fmt.Sprintf("key_%d", i)
|
|
value := fmt.Sprintf("value_%d", i)
|
|
mn.AddTag(key, value)
|
|
}
|
|
var genTSID generationTSID
|
|
var metricNameRaw []byte
|
|
date := uint64(0)
|
|
|
|
is := db.getIndexSearch(0, 0, noDeadline)
|
|
defer db.putIndexSearch(is)
|
|
|
|
for i := 0; i < recordsCount; i++ {
|
|
mn.AccountID = uint32(i % accountsCount)
|
|
mn.ProjectID = uint32(i % projectsCount)
|
|
mn.sortTags()
|
|
metricNameRaw = mn.marshalRaw(metricNameRaw[:0])
|
|
generateTSID(&genTSID.TSID, &mn)
|
|
genTSID.generation = db.generation
|
|
db.s.createAllIndexesForMetricName(is, &mn, metricNameRaw, &genTSID, date)
|
|
}
|
|
db.s.DebugFlush()
|
|
|
|
b.SetBytes(recordsPerLoop)
|
|
b.ReportAllocs()
|
|
b.ResetTimer()
|
|
b.RunParallel(func(pb *testing.PB) {
|
|
var genTSIDLocal generationTSID
|
|
var metricNameLocal []byte
|
|
var metricNameLocalRaw []byte
|
|
mnLocal := mn
|
|
is := db.getIndexSearch(0, 0, noDeadline)
|
|
for pb.Next() {
|
|
for i := 0; i < recordsPerLoop; i++ {
|
|
mnLocal.AccountID = uint32(i % accountsCount)
|
|
mnLocal.ProjectID = uint32(i % projectsCount)
|
|
mnLocal.sortTags()
|
|
metricNameLocal = mnLocal.Marshal(metricNameLocal[:0])
|
|
metricNameLocalRaw = mnLocal.marshalRaw(metricNameLocalRaw[:0])
|
|
if !is.getTSIDByMetricName(&genTSIDLocal, metricNameLocal, date) {
|
|
panic(fmt.Errorf("cannot obtain tsid for row %d", i))
|
|
}
|
|
}
|
|
}
|
|
db.putIndexSearch(is)
|
|
})
|
|
b.StopTimer()
|
|
|
|
s.MustClose()
|
|
fs.MustRemoveAll(path)
|
|
}
|