app/vmselect/promql: reduce the number of memory allocations inside getCommonLabelFilters()

This should improve performance a bit for `q1 op q2` queries
This commit is contained in:
Aliaksandr Valialkin 2023-01-15 12:55:37 -08:00
parent ffe6e6fe59
commit 27afe7bc38
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1

View file

@ -505,28 +505,54 @@ func execBinaryOpArgs(qt *querytracer.Tracer, ec *EvalConfig, exprFirst, exprSec
} }
func getCommonLabelFilters(tss []*timeseries) []metricsql.LabelFilter { func getCommonLabelFilters(tss []*timeseries) []metricsql.LabelFilter {
m := make(map[string][]string) if len(tss) == 0 {
return nil
}
type valuesCounter struct {
values map[string]struct{}
count int
}
m := make(map[string]*valuesCounter, len(tss[0].MetricName.Tags))
for _, ts := range tss { for _, ts := range tss {
for _, tag := range ts.MetricName.Tags { for _, tag := range ts.MetricName.Tags {
vc, ok := m[string(tag.Key)]
if !ok {
k := bytesutil.InternBytes(tag.Key) k := bytesutil.InternBytes(tag.Key)
v := bytesutil.InternBytes(tag.Value) v := bytesutil.InternBytes(tag.Value)
m[k] = append(m[k], v) m[k] = &valuesCounter{
values: map[string]struct{}{
v: {},
},
count: 1,
}
continue
}
if len(vc.values) > 100 {
// Too many unique values found for the given tag.
// Do not make a filter on such values, since it may slow down
// search for matching time series.
continue
}
vc.count++
if _, ok := vc.values[string(tag.Value)]; !ok {
v := bytesutil.InternBytes(tag.Value)
vc.values[v] = struct{}{}
}
} }
} }
lfs := make([]metricsql.LabelFilter, 0, len(m)) lfs := make([]metricsql.LabelFilter, 0, len(m))
for key, values := range m { var values []string
if len(values) != len(tss) { for k, vc := range m {
if vc.count != len(tss) {
// Skip the tag, since it doesn't belong to all the time series. // Skip the tag, since it doesn't belong to all the time series.
continue continue
} }
values = getUniqueValues(values) values = values[:0]
if len(values) > 1000 { for s := range vc.values {
// Skip the filter on the given tag, since it needs to enumerate too many unique values. values = append(values, s)
// This may slow down the search for matching time series.
continue
} }
lf := metricsql.LabelFilter{ lf := metricsql.LabelFilter{
Label: key, Label: k,
} }
if len(values) == 1 { if len(values) == 1 {
lf.Value = values[0] lf.Value = values[0]
@ -543,18 +569,6 @@ func getCommonLabelFilters(tss []*timeseries) []metricsql.LabelFilter {
return lfs return lfs
} }
func getUniqueValues(a []string) []string {
m := make(map[string]struct{}, len(a))
results := make([]string, 0, len(a))
for _, s := range a {
if _, ok := m[s]; !ok {
results = append(results, s)
m[s] = struct{}{}
}
}
return results
}
func joinRegexpValues(a []string) string { func joinRegexpValues(a []string) string {
var b []byte var b []byte
for i, s := range a { for i, s := range a {