lib/storage: properly search series by multiple tag filters matching empty labels such as foo{bar=~"baz|",x=~"y|"}

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1601
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/395
This commit is contained in:
Aliaksandr Valialkin 2021-09-09 21:09:18 +03:00
parent 9b07cb2988
commit c2f37f049b
5 changed files with 57 additions and 43 deletions

View file

@ -1158,7 +1158,6 @@ func (ctx *vmselectRequestCtx) setupTfss(s *storage.Storage, tr storage.TimeRang
} }
} }
tfss = append(tfss, tfs) tfss = append(tfss, tfs)
tfss = append(tfss, tfs.Finalize()...)
} }
ctx.tfss = tfss ctx.tfss = tfss
return nil return nil

View file

@ -12,6 +12,7 @@ sort: 15
* FEATURE: add new relabeling actions: `keep_metrics` and `drop_metrics`. This simplifies metrics filtering by metric names. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details. * FEATURE: add new relabeling actions: `keep_metrics` and `drop_metrics`. This simplifies metrics filtering by metric names. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details.
* FAETURE: allow splitting long `regex` in relabeling filters into an array of shorter regexps, which can be put into multiple lines for better readability and maintainability. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details. * FAETURE: allow splitting long `regex` in relabeling filters into an array of shorter regexps, which can be put into multiple lines for better readability and maintainability. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details.
* BUGFIX: properly handle queries with multiple filters matching empty labels such as `metric{label1=~"foo|",label2="bar|"}`. This filter must match the following series: `metric`, `metric{label1="foo"}`, `metric{label2="bar"}` and `metric{label1="foo",label2="bar"}`. Previously it was matching only `metric{label1="foo",label2="bar"}`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1601).
* BUGFIX: vmselect: reset connection timeouts after each request to `vmstorage`. This should prevent from `cannot read data in 0.000 seconds: unexpected EOF` warning in logs. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1562). Thanks to @mxlxm . * BUGFIX: vmselect: reset connection timeouts after each request to `vmstorage`. This should prevent from `cannot read data in 0.000 seconds: unexpected EOF` warning in logs. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1562). Thanks to @mxlxm .
* BUGFIX: keep metric name for time series returned from [rollup_candlestick](https://docs.victoriametrics.com/MetricsQL.html#rollup_candlestick) function, since the returned series don't change the meaning of the original series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1600). * BUGFIX: keep metric name for time series returned from [rollup_candlestick](https://docs.victoriametrics.com/MetricsQL.html#rollup_candlestick) function, since the returned series don't change the meaning of the original series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1600).

View file

@ -2472,7 +2472,7 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
} }
for i, tfw := range tfws { for i, tfw := range tfws {
tf := tfw.tf tf := tfw.tf
if tf.isNegative { if tf.isNegative || tf.isEmptyMatch {
tfwsRemaining = append(tfwsRemaining, tfw) tfwsRemaining = append(tfwsRemaining, tfw)
continue continue
} }
@ -2577,7 +2577,7 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
return nil, err return nil, err
} }
storeFilterLoopsCount(&tfw, filterLoopsCount) storeFilterLoopsCount(&tfw, filterLoopsCount)
if tf.isNegative { if tf.isNegative || tf.isEmptyMatch {
metricIDs.Subtract(m) metricIDs.Subtract(m)
} else { } else {
metricIDs.Intersect(m) metricIDs.Intersect(m)
@ -2733,6 +2733,7 @@ func (is *indexSearch) getMetricIDsForDateTagFilter(tf *tagFilter, date uint64,
logger.Panicf("BUG: unexpected tf.prefix %q; must start with commonPrefix %q", tf.prefix, commonPrefix) logger.Panicf("BUG: unexpected tf.prefix %q; must start with commonPrefix %q", tf.prefix, commonPrefix)
} }
kb := kbPool.Get() kb := kbPool.Get()
defer kbPool.Put(kb)
if date != 0 { if date != 0 {
// Use per-date search. // Use per-date search.
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs) kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs)
@ -2746,8 +2747,27 @@ func (is *indexSearch) getMetricIDsForDateTagFilter(tf *tagFilter, date uint64,
tfNew.isNegative = false // isNegative for the original tf is handled by the caller. tfNew.isNegative = false // isNegative for the original tf is handled by the caller.
tfNew.prefix = kb.B tfNew.prefix = kb.B
metricIDs, loopsCount, err := is.getMetricIDsForTagFilter(&tfNew, maxMetrics, maxLoopsCount) metricIDs, loopsCount, err := is.getMetricIDsForTagFilter(&tfNew, maxMetrics, maxLoopsCount)
kbPool.Put(kb) if err != nil {
return metricIDs, loopsCount, err return nil, loopsCount, err
}
if tf.isNegative || !tf.isEmptyMatch {
return metricIDs, loopsCount, nil
}
// The tag filter, which matches empty label such as {foo=~"bar|"}
// Convert it to negative filter, which matches {foo=~".+",foo!~"bar|"}.
// This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1601
// See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/395
maxLoopsCount -= loopsCount
if err := tfNew.Init(kb.B, tf.key, []byte(".+"), false, true); err != nil {
logger.Panicf(`BUG: cannot init tag filter: {%q=~".+"}: %s`, tf.key, err)
}
m, lc, err := is.getMetricIDsForTagFilter(&tfNew, maxMetrics, maxLoopsCount)
loopsCount += lc
if err != nil {
return nil, loopsCount, err
}
m.Subtract(metricIDs)
return m, loopsCount, nil
} }
func (is *indexSearch) getLoopsCountAndTimestampForDateFilter(date uint64, tf *tagFilter) (int64, int64, uint64) { func (is *indexSearch) getLoopsCountAndTimestampForDateFilter(date uint64, tf *tagFilter) (int64, int64, uint64) {

View file

@ -911,9 +911,6 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
if err := tfs.Add(nil, []byte(re), false, true); err != nil { if err := tfs.Add(nil, []byte(re), false, true); err != nil {
return fmt.Errorf("cannot create regexp tag filter for Graphite wildcard") return fmt.Errorf("cannot create regexp tag filter for Graphite wildcard")
} }
if tfsNew := tfs.Finalize(); len(tfsNew) > 0 {
return fmt.Errorf("unexpected non-empty tag filters returned by TagFilters.Finalize: %v", tfsNew)
}
tsidsFound, err = db.searchTSIDs([]*TagFilters{tfs}, tr, 1e5, noDeadline) tsidsFound, err = db.searchTSIDs([]*TagFilters{tfs}, tr, 1e5, noDeadline)
if err != nil { if err != nil {
return fmt.Errorf("cannot search by regexp tag filter for Graphite wildcard: %w", err) return fmt.Errorf("cannot search by regexp tag filter for Graphite wildcard: %w", err)
@ -922,6 +919,37 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
return fmt.Errorf("tsids is missing in regexp for Graphite wildcard tsidsFound\ntsid=%+v\ntsidsFound=%+v\ntfs=%s\nmn=%s", tsid, tsidsFound, tfs, mn) return fmt.Errorf("tsids is missing in regexp for Graphite wildcard tsidsFound\ntsid=%+v\ntsidsFound=%+v\ntfs=%s\nmn=%s", tsid, tsidsFound, tfs, mn)
} }
// Search with a filter matching empty tag (a single filter)
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1601
tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, mn.MetricGroup, false, false); err != nil {
return fmt.Errorf("cannot create tag filter for MetricGroup: %w", err)
}
if err := tfs.Add([]byte("non-existent-tag"), []byte("foo|"), false, true); err != nil {
return fmt.Errorf("cannot create regexp tag filter for non-existing tag: %w", err)
}
tsidsFound, err = db.searchTSIDs([]*TagFilters{tfs}, tr, 1e5, noDeadline)
if err != nil {
return fmt.Errorf("cannot search with a filter matching empty tag: %w", err)
}
// Search with filters matching empty tags (multiple filters)
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1601
tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, mn.MetricGroup, false, false); err != nil {
return fmt.Errorf("cannot create tag filter for MetricGroup: %w", err)
}
if err := tfs.Add([]byte("non-existent-tag1"), []byte("foo|"), false, true); err != nil {
return fmt.Errorf("cannot create regexp tag filter for non-existing tag1: %w", err)
}
if err := tfs.Add([]byte("non-existent-tag2"), []byte("bar|"), false, true); err != nil {
return fmt.Errorf("cannot create regexp tag filter for non-existing tag2: %w", err)
}
tsidsFound, err = db.searchTSIDs([]*TagFilters{tfs}, tr, 1e5, noDeadline)
if err != nil {
return fmt.Errorf("cannot search with multipel filters matching empty tags: %w", err)
}
// Search with regexps. // Search with regexps.
tfs.Reset(mn.AccountID, mn.ProjectID) tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, mn.MetricGroup, false, true); err != nil { if err := tfs.Add(nil, mn.MetricGroup, false, true); err != nil {

View file

@ -33,7 +33,7 @@ func convertToCompositeTagFilters(tfs *TagFilters) *TagFilters {
for _, tf := range tfs.tfs { for _, tf := range tfs.tfs {
if len(tf.key) == 0 && !tf.isNegative && !tf.isRegexp { if len(tf.key) == 0 && !tf.isNegative && !tf.isRegexp {
name = tf.value name = tf.value
} else if !tf.isNegative { } else if !tf.isNegative && !tf.isEmptyMatch {
hasPositiveFilter = true hasPositiveFilter = true
} }
} }
@ -108,8 +108,6 @@ func (tfs *TagFilters) AddGraphiteQuery(query []byte, paths []string, isNegative
// Add adds the given tag filter to tfs. // Add adds the given tag filter to tfs.
// //
// MetricGroup must be encoded with nil key. // MetricGroup must be encoded with nil key.
//
// Finalize must be called after tfs is constructed.
func (tfs *TagFilters) Add(key, value []byte, isNegative, isRegexp bool) error { func (tfs *TagFilters) Add(key, value []byte, isNegative, isRegexp bool) error {
// Verify whether tag filter is empty. // Verify whether tag filter is empty.
if len(value) == 0 { if len(value) == 0 {
@ -163,38 +161,6 @@ func (tfs *TagFilters) addTagFilter() *tagFilter {
return &tfs.tfs[len(tfs.tfs)-1] return &tfs.tfs[len(tfs.tfs)-1]
} }
// Finalize finalizes tfs and may return complementary TagFilters,
// which must be added to the resulting set of tag filters.
func (tfs *TagFilters) Finalize() []*TagFilters {
var tfssNew []*TagFilters
for i := range tfs.tfs {
tf := &tfs.tfs[i]
if !tf.isNegative && tf.isEmptyMatch {
// tf matches empty value, so it must be accompanied with `key!~".+"` tag filter
// in order to match time series without the given label.
tfssNew = append(tfssNew, tfs.cloneWithNegativeFilter(tf))
}
}
return tfssNew
}
func (tfs *TagFilters) cloneWithNegativeFilter(tfNegative *tagFilter) *TagFilters {
tfsNew := NewTagFilters(tfs.accountID, tfs.projectID)
for i := range tfs.tfs {
tf := &tfs.tfs[i]
if tf == tfNegative {
if err := tfsNew.Add(tf.key, []byte(".+"), true, true); err != nil {
logger.Panicf("BUG: unexpected error when creating a tag filter key=~'.+': %s", err)
}
} else {
if err := tfsNew.Add(tf.key, tf.value, tf.isNegative, tf.isRegexp); err != nil {
logger.Panicf("BUG: unexpected error when cloning a tag filter %s: %s", tf, err)
}
}
}
return tfsNew
}
// String returns human-readable value for tfs. // String returns human-readable value for tfs.
func (tfs *TagFilters) String() string { func (tfs *TagFilters) String() string {
var bb bytes.Buffer var bb bytes.Buffer