From 28b610db07af47b1f3f6f49ecf7cd825b724854e Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 25 Feb 2022 13:21:02 +0200 Subject: [PATCH] lib/storage: document why job-like and instance-like labels must be stored at mn.Tags[0] and mn.Tags[1] Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2244 --- lib/storage/index_db.go | 5 +++++ lib/storage/metric_name.go | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index 7da690b9d8..4c8828b133 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -630,6 +630,11 @@ func generateTSID(dst *TSID, mn *MetricName) { dst.AccountID = mn.AccountID dst.ProjectID = mn.ProjectID dst.MetricGroupID = xxhash.Sum64(mn.MetricGroup) + // Assume that the job-like metric is put at mn.Tags[0], while instance-like metric is put at mn.Tags[1] + // This assumption is true because mn.Tags must be sorted with mn.sortTags() before calling generateTSID() function. + // This allows grouping data blocks for the same (job, instance) close to each other on disk. + // This reduces disk seeks and disk read IO when data blocks are read from disk for the same job and/or instance. + // For example, data blocks for time series matching `process_resident_memory_bytes{job="vmstorage"}` are physically adjancent on disk. if len(mn.Tags) > 0 { dst.JobID = uint32(xxhash.Sum64(mn.Tags[0].Value)) } diff --git a/lib/storage/metric_name.go b/lib/storage/metric_name.go index 5016561e96..90a23f971b 100644 --- a/lib/storage/metric_name.go +++ b/lib/storage/metric_name.go @@ -696,6 +696,12 @@ func unmarshalBytesFast(src []byte) ([]byte, []byte, error) { // sortTags sorts tags in mn to canonical form needed for storing in the index. // +// The sortTags tries moving job-like tag to mn.Tags[0], while instance-like tag to mn.Tags[1]. +// See commonTagKeys list for job-like and instance-like tags. +// This guarantees that indexdb entries for the same (job, instance) are located +// close to each other on disk. This reduces disk seeks and disk read IO when metrics +// for a particular job and/or instance are read from the disk. +// // The function also de-duplicates tags with identical keys in mn. The last tag value // for duplicate tags wins. //