mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-01 14:47:38 +00:00
lib/storage: document why job-like and instance-like labels must be stored at mn.Tags[0] and mn.Tags[1]
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2244
This commit is contained in:
parent
d1881fa582
commit
28b610db07
2 changed files with 11 additions and 0 deletions
|
@ -630,6 +630,11 @@ func generateTSID(dst *TSID, mn *MetricName) {
|
||||||
dst.AccountID = mn.AccountID
|
dst.AccountID = mn.AccountID
|
||||||
dst.ProjectID = mn.ProjectID
|
dst.ProjectID = mn.ProjectID
|
||||||
dst.MetricGroupID = xxhash.Sum64(mn.MetricGroup)
|
dst.MetricGroupID = xxhash.Sum64(mn.MetricGroup)
|
||||||
|
// Assume that the job-like metric is put at mn.Tags[0], while instance-like metric is put at mn.Tags[1]
|
||||||
|
// This assumption is true because mn.Tags must be sorted with mn.sortTags() before calling generateTSID() function.
|
||||||
|
// This allows grouping data blocks for the same (job, instance) close to each other on disk.
|
||||||
|
// This reduces disk seeks and disk read IO when data blocks are read from disk for the same job and/or instance.
|
||||||
|
// For example, data blocks for time series matching `process_resident_memory_bytes{job="vmstorage"}` are physically adjancent on disk.
|
||||||
if len(mn.Tags) > 0 {
|
if len(mn.Tags) > 0 {
|
||||||
dst.JobID = uint32(xxhash.Sum64(mn.Tags[0].Value))
|
dst.JobID = uint32(xxhash.Sum64(mn.Tags[0].Value))
|
||||||
}
|
}
|
||||||
|
|
|
@ -696,6 +696,12 @@ func unmarshalBytesFast(src []byte) ([]byte, []byte, error) {
|
||||||
|
|
||||||
// sortTags sorts tags in mn to canonical form needed for storing in the index.
|
// sortTags sorts tags in mn to canonical form needed for storing in the index.
|
||||||
//
|
//
|
||||||
|
// The sortTags tries moving job-like tag to mn.Tags[0], while instance-like tag to mn.Tags[1].
|
||||||
|
// See commonTagKeys list for job-like and instance-like tags.
|
||||||
|
// This guarantees that indexdb entries for the same (job, instance) are located
|
||||||
|
// close to each other on disk. This reduces disk seeks and disk read IO when metrics
|
||||||
|
// for a particular job and/or instance are read from the disk.
|
||||||
|
//
|
||||||
// The function also de-duplicates tags with identical keys in mn. The last tag value
|
// The function also de-duplicates tags with identical keys in mn. The last tag value
|
||||||
// for duplicate tags wins.
|
// for duplicate tags wins.
|
||||||
//
|
//
|
||||||
|
|
Loading…
Reference in a new issue