lib/storage: de-duplicate tags in MetricName.sortTags

Leave only the last tag among tags with duplicate keys. This is needed for reliable addition of extra_labels
during data ingestion. See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1007 for details.
This commit is contained in:
Aliaksandr Valialkin 2021-01-12 14:57:54 +02:00
parent 1dee98a331
commit 719ad49adf
2 changed files with 29 additions and 11 deletions

View file

@ -365,7 +365,8 @@ func (mn *MetricName) String() string {
// Marshal appends marshaled mn to dst and returns the result. // Marshal appends marshaled mn to dst and returns the result.
// //
// Tags must be sorted before calling this function. // mn.sortTags must be called before calling this function
// in order to sort and de-duplcate tags.
func (mn *MetricName) Marshal(dst []byte) []byte { func (mn *MetricName) Marshal(dst []byte) []byte {
// Calculate the required size and pre-allocate space in dst // Calculate the required size and pre-allocate space in dst
dstLen := len(dst) dstLen := len(dst)
@ -411,7 +412,7 @@ func (mn *MetricName) Unmarshal(src []byte) error {
} }
// There is no need in verifying for identical tag keys, // There is no need in verifying for identical tag keys,
// since they must be handled in MetricName.Marshal inside marshalTags. // since they must be handled by MetricName.sortTags before calling MetricName.Marshal.
return nil return nil
} }
@ -631,7 +632,10 @@ func unmarshalBytesFast(src []byte) ([]byte, []byte, error) {
return src[n:], src[:n], nil return src[n:], src[:n], nil
} }
// sortTags sorts tags in mn. // sortTags sorts tags in mn to canonical form needed for storing in the index.
//
// The function also de-duplicates tags with identical keys in mn. The last tag value
// for duplicate tags wins.
// //
// Tags sorting is quite slow, so try avoiding it by caching mn // Tags sorting is quite slow, so try avoiding it by caching mn
// with sorted tags. // with sorted tags.
@ -653,12 +657,25 @@ func (mn *MetricName) sortTags() {
} }
cts.tags = dst cts.tags = dst
// Use sort.Sort instead of sort.Slice, since sort.Slice allocates a lot. // Use sort.Stable instead of sort.Sort in order to preserve the order of tags with duplicate keys.
sort.Sort(&cts.tags) // The last tag value wins for tags with duplicate keys.
// Use sort.Stable instead of sort.SliceStable, since sort.SliceStable allocates a lot.
sort.Stable(&cts.tags)
j := 0
var prevKey []byte
for i := range cts.tags { for i := range cts.tags {
mn.Tags[i].copyFrom(&cts.tags[i].tag) tag := &cts.tags[i].tag
if j > 0 && bytes.Equal(tag.Key, prevKey) {
// Overwrite the previous tag with duplicate key.
j--
} else {
prevKey = tag.Key
} }
mn.Tags[j].copyFrom(tag)
j++
}
mn.Tags = mn.Tags[:j]
putCanonicalTags(cts) putCanonicalTags(cts)
} }

View file

@ -68,17 +68,18 @@ func TestMetricNameMarshalDuplicateKeys(t *testing.T) {
mn.ProjectID = 324 mn.ProjectID = 324
mn.MetricGroup = []byte("xxx") mn.MetricGroup = []byte("xxx")
mn.AddTag("foo", "bar") mn.AddTag("foo", "bar")
mn.AddTag("duplicate", "tag") mn.AddTag("duplicate", "tag1")
mn.AddTag("duplicate", "tag")
mn.AddTag("tt", "xx")
mn.AddTag("duplicate", "tag2") mn.AddTag("duplicate", "tag2")
mn.AddTag("tt", "xx")
mn.AddTag("foo", "abc")
mn.AddTag("duplicate", "tag3")
var mnExpected MetricName var mnExpected MetricName
mnExpected.AccountID = 123 mnExpected.AccountID = 123
mnExpected.ProjectID = 324 mnExpected.ProjectID = 324
mnExpected.MetricGroup = []byte("xxx") mnExpected.MetricGroup = []byte("xxx")
mnExpected.AddTag("duplicate", "tag") mnExpected.AddTag("duplicate", "tag3")
mnExpected.AddTag("foo", "bar") mnExpected.AddTag("foo", "abc")
mnExpected.AddTag("tt", "xx") mnExpected.AddTag("tt", "xx")
mn.sortTags() mn.sortTags()