lib/storage: log fatal error inside searchMetricName() instead of propagating it to the caller

This simplifies the code a bit at searchMetricName() and searchMetricNameWithCache() call sites

This is a result of investigating https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4972
This commit is contained in:
Aliaksandr Valialkin 2023-09-22 11:32:59 +02:00
parent 455077cd67
commit 3140ef7261
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
4 changed files with 78 additions and 106 deletions

View file

@ -668,7 +668,8 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer,
// This would help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978
metricIDs := filter.AppendTo(nil)
qt.Printf("sort %d metricIDs", len(metricIDs))
return is.getLabelNamesForMetricIDs(qt, metricIDs, lns, maxLabelNames)
is.getLabelNamesForMetricIDs(qt, metricIDs, lns, maxLabelNames)
return nil
}
var prevLabelName []byte
ts := &is.ts
@ -732,39 +733,34 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer,
return nil
}
func (is *indexSearch) getLabelNamesForMetricIDs(qt *querytracer.Tracer, metricIDs []uint64, lns map[string]struct{}, maxLabelNames int) error {
func (is *indexSearch) getLabelNamesForMetricIDs(qt *querytracer.Tracer, metricIDs []uint64, lns map[string]struct{}, maxLabelNames int) {
lns["__name__"] = struct{}{}
var mn MetricName
foundLabelNames := 0
var buf []byte
for _, metricID := range metricIDs {
var err error
buf, err = is.searchMetricNameWithCache(buf[:0], metricID)
if err != nil {
if err == io.EOF {
// It is likely the metricID->metricName entry didn't propagate to inverted index yet.
// Skip this metricID for now.
continue
}
return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err)
var ok bool
buf, ok = is.searchMetricNameWithCache(buf[:0], metricID)
if !ok {
// It is likely the metricID->metricName entry didn't propagate to inverted index yet.
// Skip this metricID for now.
continue
}
if err := mn.Unmarshal(buf); err != nil {
return fmt.Errorf("cannot unmarshal metricName %q: %w", buf, err)
logger.Panicf("FATAL: cannot unmarshal metricName %q: %w", buf, err)
}
for _, tag := range mn.Tags {
_, ok := lns[string(tag.Key)]
if !ok {
if _, ok := lns[string(tag.Key)]; !ok {
foundLabelNames++
lns[string(tag.Key)] = struct{}{}
if len(lns) >= maxLabelNames {
qt.Printf("hit the limit on the number of unique label names: %d", maxLabelNames)
return nil
return
}
}
}
}
qt.Printf("get %d distinct label names from %d metricIDs", foundLabelNames, len(metricIDs))
return nil
}
// SearchLabelValuesWithFiltersOnTimeRange returns label values for the given labelName, tfss and tr.
@ -868,7 +864,8 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer
// This would help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978
metricIDs := filter.AppendTo(nil)
qt.Printf("sort %d metricIDs", len(metricIDs))
return is.getLabelValuesForMetricIDs(qt, lvs, labelName, metricIDs, maxLabelValues)
is.getLabelValuesForMetricIDs(qt, lvs, labelName, metricIDs, maxLabelValues)
return nil
}
if labelName == "__name__" {
// __name__ label is encoded as empty string in indexdb.
@ -927,7 +924,7 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer
return nil
}
func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, lvs map[string]struct{}, labelName string, metricIDs []uint64, maxLabelValues int) error {
func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, lvs map[string]struct{}, labelName string, metricIDs []uint64, maxLabelValues int) {
if labelName == "" {
labelName = "__name__"
}
@ -935,32 +932,27 @@ func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, lvs ma
foundLabelValues := 0
var buf []byte
for _, metricID := range metricIDs {
var err error
buf, err = is.searchMetricNameWithCache(buf[:0], metricID)
if err != nil {
if err == io.EOF {
// It is likely the metricID->metricName entry didn't propagate to inverted index yet.
// Skip this metricID for now.
continue
}
return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err)
var ok bool
buf, ok = is.searchMetricNameWithCache(buf[:0], metricID)
if !ok {
// It is likely the metricID->metricName entry didn't propagate to inverted index yet.
// Skip this metricID for now.
continue
}
if err := mn.Unmarshal(buf); err != nil {
return fmt.Errorf("cannot unmarshal metricName %q: %w", buf, err)
logger.Panicf("FATAL: cannot unmarshal metricName %q: %s", buf, err)
}
tagValue := mn.GetTagValue(labelName)
_, ok := lvs[string(tagValue)]
if !ok {
if _, ok := lvs[string(tagValue)]; !ok {
foundLabelValues++
lvs[string(tagValue)] = struct{}{}
if len(lvs) >= maxLabelValues {
qt.Printf("hit the limit on the number of unique label values for label %q: %d", labelName, maxLabelValues)
return nil
return
}
}
}
qt.Printf("get %d distinct values for label %q from %d metricIDs", foundLabelValues, labelName, len(metricIDs))
return nil
}
// SearchTagValueSuffixes returns all the tag value suffixes for the given tagKey and tagValuePrefix on the given tr.
@ -1442,38 +1434,35 @@ func (th *topHeap) Pop() interface{} {
// searchMetricNameWithCache appends metric name for the given metricID to dst
// and returns the result.
func (db *indexDB) searchMetricNameWithCache(dst []byte, metricID uint64) ([]byte, error) {
func (db *indexDB) searchMetricNameWithCache(dst []byte, metricID uint64) ([]byte, bool) {
metricName := db.getMetricNameFromCache(dst, metricID)
if len(metricName) > len(dst) {
return metricName, nil
return metricName, true
}
is := db.getIndexSearch(noDeadline)
var err error
dst, err = is.searchMetricName(dst, metricID)
var ok bool
dst, ok = is.searchMetricName(dst, metricID)
db.putIndexSearch(is)
if err == nil {
if ok {
// There is no need in verifying whether the given metricID is deleted,
// since the filtering must be performed before calling this func.
db.putMetricNameToCache(metricID, dst)
return dst, nil
}
if err != io.EOF {
return dst, err
return dst, true
}
// Try searching in the external indexDB.
if db.doExtDB(func(extDB *indexDB) {
is := extDB.getIndexSearch(noDeadline)
dst, err = is.searchMetricName(dst, metricID)
dst, ok = is.searchMetricName(dst, metricID)
extDB.putIndexSearch(is)
if err == nil {
if ok {
// There is no need in verifying whether the given metricID is deleted,
// since the filtering must be performed before calling this func.
extDB.putMetricNameToCache(metricID, dst)
}
}) {
return dst, err
}) && ok {
return dst, true
}
// Cannot find MetricName for the given metricID. This may be the case
@ -1484,7 +1473,7 @@ func (db *indexDB) searchMetricNameWithCache(dst []byte, metricID uint64) ([]byt
// Mark the metricID as deleted, so it will be created again when new data point
// for the given time series will arrive.
db.deleteMetricIDs([]uint64{metricID})
return dst, io.EOF
return dst, false
}
// DeleteTSIDs marks as deleted all the TSIDs matching the given tfss.
@ -1820,36 +1809,36 @@ func (is *indexSearch) getTSIDByMetricNameNoExtDB(dst *TSID, metricName []byte,
return false
}
func (is *indexSearch) searchMetricNameWithCache(dst []byte, metricID uint64) ([]byte, error) {
func (is *indexSearch) searchMetricNameWithCache(dst []byte, metricID uint64) ([]byte, bool) {
metricName := is.db.getMetricNameFromCache(dst, metricID)
if len(metricName) > len(dst) {
return metricName, nil
return metricName, true
}
var err error
dst, err = is.searchMetricName(dst, metricID)
if err == nil {
var ok bool
dst, ok = is.searchMetricName(dst, metricID)
if ok {
// There is no need in verifying whether the given metricID is deleted,
// since the filtering must be performed before calling this func.
is.db.putMetricNameToCache(metricID, dst)
return dst, nil
return dst, true
}
return dst, err
return dst, false
}
func (is *indexSearch) searchMetricName(dst []byte, metricID uint64) ([]byte, error) {
func (is *indexSearch) searchMetricName(dst []byte, metricID uint64) ([]byte, bool) {
ts := &is.ts
kb := &is.kb
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixMetricIDToMetricName)
kb.B = encoding.MarshalUint64(kb.B, metricID)
if err := ts.FirstItemWithPrefix(kb.B); err != nil {
if err == io.EOF {
return dst, err
return dst, false
}
return dst, fmt.Errorf("error when searching metricName by metricID; searchPrefix %q: %w", kb.B, err)
logger.Panicf("FATAL: error when searching metricName by metricID; searchPrefix %q: %w", kb.B, err)
}
v := ts.Item[len(kb.B):]
dst = append(dst, v...)
return dst, nil
return dst, true
}
func (is *indexSearch) containsTimeRange(tr TimeRange) (bool, error) {
@ -1928,18 +1917,15 @@ func (is *indexSearch) updateMetricIDsByMetricNameMatch(qt *querytracer.Tracer,
return err
}
}
var err error
metricName.B, err = is.searchMetricNameWithCache(metricName.B[:0], metricID)
if err != nil {
if err == io.EOF {
// It is likely the metricID->metricName entry didn't propagate to inverted index yet.
// Skip this metricID for now.
continue
}
return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err)
var ok bool
metricName.B, ok = is.searchMetricNameWithCache(metricName.B[:0], metricID)
if !ok {
// It is likely the metricID->metricName entry didn't propagate to inverted index yet.
// Skip this metricID for now.
continue
}
if err := mn.Unmarshal(metricName.B); err != nil {
return fmt.Errorf("cannot unmarshal metricName %q: %w", metricName.B, err)
logger.Panicf("FATAL: cannot unmarshal metricName %q: %s", metricName.B, err)
}
// Match the mn against tfs.

View file

@ -3,7 +3,6 @@ package storage
import (
"bytes"
"fmt"
"io"
"math/rand"
"os"
"reflect"
@ -655,19 +654,19 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
}
// Search for metric name for the given metricID.
var err error
metricNameCopy, err = db.searchMetricNameWithCache(metricNameCopy[:0], genTSID.TSID.MetricID)
if err != nil {
return fmt.Errorf("error in searchMetricNameWithCache for metricID=%d; i=%d: %w", genTSID.TSID.MetricID, i, err)
var ok bool
metricNameCopy, ok = db.searchMetricNameWithCache(metricNameCopy[:0], genTSID.TSID.MetricID)
if !ok {
return fmt.Errorf("cannot find metricName for metricID=%d; i=%d", genTSID.TSID.MetricID, i)
}
if !bytes.Equal(metricName, metricNameCopy) {
return fmt.Errorf("unexpected mn for metricID=%d;\ngot\n%q\nwant\n%q", genTSID.TSID.MetricID, metricNameCopy, metricName)
}
// Try searching metric name for non-existent MetricID.
buf, err := db.searchMetricNameWithCache(nil, 1)
if err != io.EOF {
return fmt.Errorf("expecting io.EOF error when searching for non-existing metricID; got %v", err)
buf, found := db.searchMetricNameWithCache(nil, 1)
if found {
return fmt.Errorf("unexpected metricName found for non-existing metricID; got %X", buf)
}
if len(buf) > 0 {
return fmt.Errorf("expecting empty buf when searching for non-existent metricID; got %X", buf)

View file

@ -211,16 +211,12 @@ func (s *Search) NextMetricBlock() bool {
// Skip the block, since it contains only data outside the configured retention.
continue
}
var err error
s.MetricBlockRef.MetricName, err = s.idb.searchMetricNameWithCache(s.MetricBlockRef.MetricName[:0], tsid.MetricID)
if err != nil {
if err == io.EOF {
// Skip missing metricName for tsid.MetricID.
// It should be automatically fixed. See indexDB.searchMetricNameWithCache for details.
continue
}
s.err = err
return false
var ok bool
s.MetricBlockRef.MetricName, ok = s.idb.searchMetricNameWithCache(s.MetricBlockRef.MetricName[:0], tsid.MetricID)
if !ok {
// Skip missing metricName for tsid.MetricID.
// It should be automatically fixed. See indexDB.searchMetricNameWithCache for details.
continue
}
s.prevMetricID = tsid.MetricID
}

View file

@ -1114,15 +1114,12 @@ func (s *Storage) SearchMetricNames(qt *querytracer.Tracer, tfss []*TagFilters,
return nil, err
}
}
var err error
metricName, err = idb.searchMetricNameWithCache(metricName[:0], metricID)
if err != nil {
if err == io.EOF {
// Skip missing metricName for metricID.
// It should be automatically fixed. See indexDB.searchMetricName for details.
continue
}
return nil, fmt.Errorf("error when searching metricName for metricID=%d: %w", metricID, err)
var ok bool
metricName, ok = idb.searchMetricNameWithCache(metricName[:0], metricID)
if !ok {
// Skip missing metricName for metricID.
// It should be automatically fixed. See indexDB.searchMetricName for details.
continue
}
if _, ok := metricNamesSeen[string(metricName)]; ok {
// The given metric name was already seen; skip it
@ -1175,13 +1172,11 @@ func (s *Storage) prefetchMetricNames(qt *querytracer.Tracer, srcMetricIDs []uin
return err
}
}
metricName, err = is.searchMetricNameWithCache(metricName[:0], metricID)
if err != nil {
if err == io.EOF {
missingMetricIDs = append(missingMetricIDs, metricID)
continue
}
return fmt.Errorf("error in pre-fetching metricName for metricID=%d: %w", metricID, err)
var ok bool
metricName, ok = is.searchMetricNameWithCache(metricName[:0], metricID)
if !ok {
missingMetricIDs = append(missingMetricIDs, metricID)
continue
}
}
idb.doExtDB(func(extDB *indexDB) {
@ -1193,11 +1188,7 @@ func (s *Storage) prefetchMetricNames(qt *querytracer.Tracer, srcMetricIDs []uin
return
}
}
metricName, err = is.searchMetricNameWithCache(metricName[:0], metricID)
if err != nil && err != io.EOF {
err = fmt.Errorf("error in pre-fetching metricName for metricID=%d in extDB: %w", metricID, err)
return
}
metricName, _ = is.searchMetricNameWithCache(metricName[:0], metricID)
}
})
if err != nil && err != io.EOF {