lib/storage: log fatal error inside searchMetricName() instead of propagating it to the caller

This simplifies the code a bit at searchMetricName() and searchMetricNameWithCache() call sites

This is a result of investigating https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4972
This commit is contained in:
Aliaksandr Valialkin 2023-09-22 11:32:59 +02:00
parent 455077cd67
commit 3140ef7261
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
4 changed files with 78 additions and 106 deletions

View file

@ -668,7 +668,8 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer,
// This would help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978 // This would help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978
metricIDs := filter.AppendTo(nil) metricIDs := filter.AppendTo(nil)
qt.Printf("sort %d metricIDs", len(metricIDs)) qt.Printf("sort %d metricIDs", len(metricIDs))
return is.getLabelNamesForMetricIDs(qt, metricIDs, lns, maxLabelNames) is.getLabelNamesForMetricIDs(qt, metricIDs, lns, maxLabelNames)
return nil
} }
var prevLabelName []byte var prevLabelName []byte
ts := &is.ts ts := &is.ts
@ -732,39 +733,34 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer,
return nil return nil
} }
func (is *indexSearch) getLabelNamesForMetricIDs(qt *querytracer.Tracer, metricIDs []uint64, lns map[string]struct{}, maxLabelNames int) error { func (is *indexSearch) getLabelNamesForMetricIDs(qt *querytracer.Tracer, metricIDs []uint64, lns map[string]struct{}, maxLabelNames int) {
lns["__name__"] = struct{}{} lns["__name__"] = struct{}{}
var mn MetricName var mn MetricName
foundLabelNames := 0 foundLabelNames := 0
var buf []byte var buf []byte
for _, metricID := range metricIDs { for _, metricID := range metricIDs {
var err error var ok bool
buf, err = is.searchMetricNameWithCache(buf[:0], metricID) buf, ok = is.searchMetricNameWithCache(buf[:0], metricID)
if err != nil { if !ok {
if err == io.EOF { // It is likely the metricID->metricName entry didn't propagate to inverted index yet.
// It is likely the metricID->metricName entry didn't propagate to inverted index yet. // Skip this metricID for now.
// Skip this metricID for now. continue
continue
}
return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err)
} }
if err := mn.Unmarshal(buf); err != nil { if err := mn.Unmarshal(buf); err != nil {
return fmt.Errorf("cannot unmarshal metricName %q: %w", buf, err) logger.Panicf("FATAL: cannot unmarshal metricName %q: %w", buf, err)
} }
for _, tag := range mn.Tags { for _, tag := range mn.Tags {
_, ok := lns[string(tag.Key)] if _, ok := lns[string(tag.Key)]; !ok {
if !ok {
foundLabelNames++ foundLabelNames++
lns[string(tag.Key)] = struct{}{} lns[string(tag.Key)] = struct{}{}
if len(lns) >= maxLabelNames { if len(lns) >= maxLabelNames {
qt.Printf("hit the limit on the number of unique label names: %d", maxLabelNames) qt.Printf("hit the limit on the number of unique label names: %d", maxLabelNames)
return nil return
} }
} }
} }
} }
qt.Printf("get %d distinct label names from %d metricIDs", foundLabelNames, len(metricIDs)) qt.Printf("get %d distinct label names from %d metricIDs", foundLabelNames, len(metricIDs))
return nil
} }
// SearchLabelValuesWithFiltersOnTimeRange returns label values for the given labelName, tfss and tr. // SearchLabelValuesWithFiltersOnTimeRange returns label values for the given labelName, tfss and tr.
@ -868,7 +864,8 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer
// This would help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978 // This would help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978
metricIDs := filter.AppendTo(nil) metricIDs := filter.AppendTo(nil)
qt.Printf("sort %d metricIDs", len(metricIDs)) qt.Printf("sort %d metricIDs", len(metricIDs))
return is.getLabelValuesForMetricIDs(qt, lvs, labelName, metricIDs, maxLabelValues) is.getLabelValuesForMetricIDs(qt, lvs, labelName, metricIDs, maxLabelValues)
return nil
} }
if labelName == "__name__" { if labelName == "__name__" {
// __name__ label is encoded as empty string in indexdb. // __name__ label is encoded as empty string in indexdb.
@ -927,7 +924,7 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer
return nil return nil
} }
func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, lvs map[string]struct{}, labelName string, metricIDs []uint64, maxLabelValues int) error { func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, lvs map[string]struct{}, labelName string, metricIDs []uint64, maxLabelValues int) {
if labelName == "" { if labelName == "" {
labelName = "__name__" labelName = "__name__"
} }
@ -935,32 +932,27 @@ func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, lvs ma
foundLabelValues := 0 foundLabelValues := 0
var buf []byte var buf []byte
for _, metricID := range metricIDs { for _, metricID := range metricIDs {
var err error var ok bool
buf, err = is.searchMetricNameWithCache(buf[:0], metricID) buf, ok = is.searchMetricNameWithCache(buf[:0], metricID)
if err != nil { if !ok {
if err == io.EOF { // It is likely the metricID->metricName entry didn't propagate to inverted index yet.
// It is likely the metricID->metricName entry didn't propagate to inverted index yet. // Skip this metricID for now.
// Skip this metricID for now. continue
continue
}
return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err)
} }
if err := mn.Unmarshal(buf); err != nil { if err := mn.Unmarshal(buf); err != nil {
return fmt.Errorf("cannot unmarshal metricName %q: %w", buf, err) logger.Panicf("FATAL: cannot unmarshal metricName %q: %s", buf, err)
} }
tagValue := mn.GetTagValue(labelName) tagValue := mn.GetTagValue(labelName)
_, ok := lvs[string(tagValue)] if _, ok := lvs[string(tagValue)]; !ok {
if !ok {
foundLabelValues++ foundLabelValues++
lvs[string(tagValue)] = struct{}{} lvs[string(tagValue)] = struct{}{}
if len(lvs) >= maxLabelValues { if len(lvs) >= maxLabelValues {
qt.Printf("hit the limit on the number of unique label values for label %q: %d", labelName, maxLabelValues) qt.Printf("hit the limit on the number of unique label values for label %q: %d", labelName, maxLabelValues)
return nil return
} }
} }
} }
qt.Printf("get %d distinct values for label %q from %d metricIDs", foundLabelValues, labelName, len(metricIDs)) qt.Printf("get %d distinct values for label %q from %d metricIDs", foundLabelValues, labelName, len(metricIDs))
return nil
} }
// SearchTagValueSuffixes returns all the tag value suffixes for the given tagKey and tagValuePrefix on the given tr. // SearchTagValueSuffixes returns all the tag value suffixes for the given tagKey and tagValuePrefix on the given tr.
@ -1442,38 +1434,35 @@ func (th *topHeap) Pop() interface{} {
// searchMetricNameWithCache appends metric name for the given metricID to dst // searchMetricNameWithCache appends metric name for the given metricID to dst
// and returns the result. // and returns the result.
func (db *indexDB) searchMetricNameWithCache(dst []byte, metricID uint64) ([]byte, error) { func (db *indexDB) searchMetricNameWithCache(dst []byte, metricID uint64) ([]byte, bool) {
metricName := db.getMetricNameFromCache(dst, metricID) metricName := db.getMetricNameFromCache(dst, metricID)
if len(metricName) > len(dst) { if len(metricName) > len(dst) {
return metricName, nil return metricName, true
} }
is := db.getIndexSearch(noDeadline) is := db.getIndexSearch(noDeadline)
var err error var ok bool
dst, err = is.searchMetricName(dst, metricID) dst, ok = is.searchMetricName(dst, metricID)
db.putIndexSearch(is) db.putIndexSearch(is)
if err == nil { if ok {
// There is no need in verifying whether the given metricID is deleted, // There is no need in verifying whether the given metricID is deleted,
// since the filtering must be performed before calling this func. // since the filtering must be performed before calling this func.
db.putMetricNameToCache(metricID, dst) db.putMetricNameToCache(metricID, dst)
return dst, nil return dst, true
}
if err != io.EOF {
return dst, err
} }
// Try searching in the external indexDB. // Try searching in the external indexDB.
if db.doExtDB(func(extDB *indexDB) { if db.doExtDB(func(extDB *indexDB) {
is := extDB.getIndexSearch(noDeadline) is := extDB.getIndexSearch(noDeadline)
dst, err = is.searchMetricName(dst, metricID) dst, ok = is.searchMetricName(dst, metricID)
extDB.putIndexSearch(is) extDB.putIndexSearch(is)
if err == nil { if ok {
// There is no need in verifying whether the given metricID is deleted, // There is no need in verifying whether the given metricID is deleted,
// since the filtering must be performed before calling this func. // since the filtering must be performed before calling this func.
extDB.putMetricNameToCache(metricID, dst) extDB.putMetricNameToCache(metricID, dst)
} }
}) { }) && ok {
return dst, err return dst, true
} }
// Cannot find MetricName for the given metricID. This may be the case // Cannot find MetricName for the given metricID. This may be the case
@ -1484,7 +1473,7 @@ func (db *indexDB) searchMetricNameWithCache(dst []byte, metricID uint64) ([]byt
// Mark the metricID as deleted, so it will be created again when new data point // Mark the metricID as deleted, so it will be created again when new data point
// for the given time series will arrive. // for the given time series will arrive.
db.deleteMetricIDs([]uint64{metricID}) db.deleteMetricIDs([]uint64{metricID})
return dst, io.EOF return dst, false
} }
// DeleteTSIDs marks as deleted all the TSIDs matching the given tfss. // DeleteTSIDs marks as deleted all the TSIDs matching the given tfss.
@ -1820,36 +1809,36 @@ func (is *indexSearch) getTSIDByMetricNameNoExtDB(dst *TSID, metricName []byte,
return false return false
} }
func (is *indexSearch) searchMetricNameWithCache(dst []byte, metricID uint64) ([]byte, error) { func (is *indexSearch) searchMetricNameWithCache(dst []byte, metricID uint64) ([]byte, bool) {
metricName := is.db.getMetricNameFromCache(dst, metricID) metricName := is.db.getMetricNameFromCache(dst, metricID)
if len(metricName) > len(dst) { if len(metricName) > len(dst) {
return metricName, nil return metricName, true
} }
var err error var ok bool
dst, err = is.searchMetricName(dst, metricID) dst, ok = is.searchMetricName(dst, metricID)
if err == nil { if ok {
// There is no need in verifying whether the given metricID is deleted, // There is no need in verifying whether the given metricID is deleted,
// since the filtering must be performed before calling this func. // since the filtering must be performed before calling this func.
is.db.putMetricNameToCache(metricID, dst) is.db.putMetricNameToCache(metricID, dst)
return dst, nil return dst, true
} }
return dst, err return dst, false
} }
func (is *indexSearch) searchMetricName(dst []byte, metricID uint64) ([]byte, error) { func (is *indexSearch) searchMetricName(dst []byte, metricID uint64) ([]byte, bool) {
ts := &is.ts ts := &is.ts
kb := &is.kb kb := &is.kb
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixMetricIDToMetricName) kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixMetricIDToMetricName)
kb.B = encoding.MarshalUint64(kb.B, metricID) kb.B = encoding.MarshalUint64(kb.B, metricID)
if err := ts.FirstItemWithPrefix(kb.B); err != nil { if err := ts.FirstItemWithPrefix(kb.B); err != nil {
if err == io.EOF { if err == io.EOF {
return dst, err return dst, false
} }
return dst, fmt.Errorf("error when searching metricName by metricID; searchPrefix %q: %w", kb.B, err) logger.Panicf("FATAL: error when searching metricName by metricID; searchPrefix %q: %w", kb.B, err)
} }
v := ts.Item[len(kb.B):] v := ts.Item[len(kb.B):]
dst = append(dst, v...) dst = append(dst, v...)
return dst, nil return dst, true
} }
func (is *indexSearch) containsTimeRange(tr TimeRange) (bool, error) { func (is *indexSearch) containsTimeRange(tr TimeRange) (bool, error) {
@ -1928,18 +1917,15 @@ func (is *indexSearch) updateMetricIDsByMetricNameMatch(qt *querytracer.Tracer,
return err return err
} }
} }
var err error var ok bool
metricName.B, err = is.searchMetricNameWithCache(metricName.B[:0], metricID) metricName.B, ok = is.searchMetricNameWithCache(metricName.B[:0], metricID)
if err != nil { if !ok {
if err == io.EOF { // It is likely the metricID->metricName entry didn't propagate to inverted index yet.
// It is likely the metricID->metricName entry didn't propagate to inverted index yet. // Skip this metricID for now.
// Skip this metricID for now. continue
continue
}
return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err)
} }
if err := mn.Unmarshal(metricName.B); err != nil { if err := mn.Unmarshal(metricName.B); err != nil {
return fmt.Errorf("cannot unmarshal metricName %q: %w", metricName.B, err) logger.Panicf("FATAL: cannot unmarshal metricName %q: %s", metricName.B, err)
} }
// Match the mn against tfs. // Match the mn against tfs.

View file

@ -3,7 +3,6 @@ package storage
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"io"
"math/rand" "math/rand"
"os" "os"
"reflect" "reflect"
@ -655,19 +654,19 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
} }
// Search for metric name for the given metricID. // Search for metric name for the given metricID.
var err error var ok bool
metricNameCopy, err = db.searchMetricNameWithCache(metricNameCopy[:0], genTSID.TSID.MetricID) metricNameCopy, ok = db.searchMetricNameWithCache(metricNameCopy[:0], genTSID.TSID.MetricID)
if err != nil { if !ok {
return fmt.Errorf("error in searchMetricNameWithCache for metricID=%d; i=%d: %w", genTSID.TSID.MetricID, i, err) return fmt.Errorf("cannot find metricName for metricID=%d; i=%d", genTSID.TSID.MetricID, i)
} }
if !bytes.Equal(metricName, metricNameCopy) { if !bytes.Equal(metricName, metricNameCopy) {
return fmt.Errorf("unexpected mn for metricID=%d;\ngot\n%q\nwant\n%q", genTSID.TSID.MetricID, metricNameCopy, metricName) return fmt.Errorf("unexpected mn for metricID=%d;\ngot\n%q\nwant\n%q", genTSID.TSID.MetricID, metricNameCopy, metricName)
} }
// Try searching metric name for non-existent MetricID. // Try searching metric name for non-existent MetricID.
buf, err := db.searchMetricNameWithCache(nil, 1) buf, found := db.searchMetricNameWithCache(nil, 1)
if err != io.EOF { if found {
return fmt.Errorf("expecting io.EOF error when searching for non-existing metricID; got %v", err) return fmt.Errorf("unexpected metricName found for non-existing metricID; got %X", buf)
} }
if len(buf) > 0 { if len(buf) > 0 {
return fmt.Errorf("expecting empty buf when searching for non-existent metricID; got %X", buf) return fmt.Errorf("expecting empty buf when searching for non-existent metricID; got %X", buf)

View file

@ -211,16 +211,12 @@ func (s *Search) NextMetricBlock() bool {
// Skip the block, since it contains only data outside the configured retention. // Skip the block, since it contains only data outside the configured retention.
continue continue
} }
var err error var ok bool
s.MetricBlockRef.MetricName, err = s.idb.searchMetricNameWithCache(s.MetricBlockRef.MetricName[:0], tsid.MetricID) s.MetricBlockRef.MetricName, ok = s.idb.searchMetricNameWithCache(s.MetricBlockRef.MetricName[:0], tsid.MetricID)
if err != nil { if !ok {
if err == io.EOF { // Skip missing metricName for tsid.MetricID.
// Skip missing metricName for tsid.MetricID. // It should be automatically fixed. See indexDB.searchMetricNameWithCache for details.
// It should be automatically fixed. See indexDB.searchMetricNameWithCache for details. continue
continue
}
s.err = err
return false
} }
s.prevMetricID = tsid.MetricID s.prevMetricID = tsid.MetricID
} }

View file

@ -1114,15 +1114,12 @@ func (s *Storage) SearchMetricNames(qt *querytracer.Tracer, tfss []*TagFilters,
return nil, err return nil, err
} }
} }
var err error var ok bool
metricName, err = idb.searchMetricNameWithCache(metricName[:0], metricID) metricName, ok = idb.searchMetricNameWithCache(metricName[:0], metricID)
if err != nil { if !ok {
if err == io.EOF { // Skip missing metricName for metricID.
// Skip missing metricName for metricID. // It should be automatically fixed. See indexDB.searchMetricName for details.
// It should be automatically fixed. See indexDB.searchMetricName for details. continue
continue
}
return nil, fmt.Errorf("error when searching metricName for metricID=%d: %w", metricID, err)
} }
if _, ok := metricNamesSeen[string(metricName)]; ok { if _, ok := metricNamesSeen[string(metricName)]; ok {
// The given metric name was already seen; skip it // The given metric name was already seen; skip it
@ -1175,13 +1172,11 @@ func (s *Storage) prefetchMetricNames(qt *querytracer.Tracer, srcMetricIDs []uin
return err return err
} }
} }
metricName, err = is.searchMetricNameWithCache(metricName[:0], metricID) var ok bool
if err != nil { metricName, ok = is.searchMetricNameWithCache(metricName[:0], metricID)
if err == io.EOF { if !ok {
missingMetricIDs = append(missingMetricIDs, metricID) missingMetricIDs = append(missingMetricIDs, metricID)
continue continue
}
return fmt.Errorf("error in pre-fetching metricName for metricID=%d: %w", metricID, err)
} }
} }
idb.doExtDB(func(extDB *indexDB) { idb.doExtDB(func(extDB *indexDB) {
@ -1193,11 +1188,7 @@ func (s *Storage) prefetchMetricNames(qt *querytracer.Tracer, srcMetricIDs []uin
return return
} }
} }
metricName, err = is.searchMetricNameWithCache(metricName[:0], metricID) metricName, _ = is.searchMetricNameWithCache(metricName[:0], metricID)
if err != nil && err != io.EOF {
err = fmt.Errorf("error in pre-fetching metricName for metricID=%d in extDB: %w", metricID, err)
return
}
} }
}) })
if err != nil && err != io.EOF { if err != nil && err != io.EOF {