diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index eb1694238..46f699bdd 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -55,6 +55,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for [native protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-from-victoriametrics). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5748). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5824). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for VictoriaMetrics destination specified via `--vm-*` cmd-line flags used in [InfluxDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-influxdb-1x), [Remote Read protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-by-remote-read-protocol), [OpenTSDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-opentsdb), [Prometheus](https://docs.victoriametrics.com/vmctl/#migrating-data-from-prometheus) and [Promscale](https://docs.victoriametrics.com/vmctl/#migrating-data-from-promscale) migration modes. +* BUGFIX: prevent from automatic deletion of newly registered time series when it is queried immediately after the addition. The probability of this bug has been increased significantly after [v1.99.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.99.0) because of optimizations related to registering new time series. See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5948) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5959) issue. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly set `Host` header in requests to scrape targets if it is specified via [`headers` option](https://docs.victoriametrics.com/sd_configs/#http-api-client-options). Thanks to @fholzer for [the bugreport](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5969) and [the fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5970). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly set `Host` header in requests to scrape targets when [`server_name` option at `tls_config`](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tls_config) is set. Previously the `Host` header was set incorrectly to the target hostname in this case. * BUGFIX: do not drop `match[]` filter at [`/api/v1/series`](https://docs.victoriametrics.com/url-examples/#apiv1series) if `-search.ignoreExtraFiltersAtLabelsAPI` command-line flag is set, since missing `match[]` filter breaks `/api/v1/series` requests. diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index da5f956ac..26728b039 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -1485,14 +1485,45 @@ func (db *indexDB) searchMetricNameWithCache(dst []byte, metricID uint64) ([]byt return dst, true } - // Cannot find MetricName for the given metricID. This may be the case - // when indexDB contains incomplete set of metricID -> metricName entries - // after a snapshot or due to unflushed entries. - db.missingMetricNamesForMetricID.Add(1) + // Cannot find the MetricName for the given metricID. + // There are the following expected cases when this may happen: + // + // 1. The corresponding metricID -> metricName entry isn't visible for search yet. + // The solution is to wait for some time and try the search again. + // It is OK if newly registered time series isn't visible for search during some time. + // This should resolve https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5959 + // + // 2. The metricID -> metricName entry doesn't exist in the indexdb. + // This is possible after unclean shutdown or after restoring of indexdb from a snapshot. + // In this case the metricID must be deleted, so new metricID is registered + // again when new sample for the given metricName is ingested next time. + // + ct := fasttime.UnixTimestamp() + db.s.missingMetricIDsLock.Lock() + if ct > db.s.missingMetricIDsResetDeadline { + db.s.missingMetricIDs = nil + db.s.missingMetricIDsResetDeadline = ct + 2*60 + } + timestamp, ok := db.s.missingMetricIDs[metricID] + if !ok { + if db.s.missingMetricIDs == nil { + db.s.missingMetricIDs = make(map[uint64]uint64) + } + db.s.missingMetricIDs[metricID] = ct + timestamp = ct + } + db.s.missingMetricIDsLock.Unlock() + + if ct > timestamp+60 { + // Cannot find the MetricName for the given metricID for the last 60 seconds. + // It is likely the indexDB contains incomplete set of metricID -> metricName entries + // after unclean shutdown or after restoring from a snapshot. + // Mark the metricID as deleted, so it is created again when new sample + // for the given time series is ingested next time. + db.missingMetricNamesForMetricID.Add(1) + db.deleteMetricIDs([]uint64{metricID}) + } - // Mark the metricID as deleted, so it will be created again when new data point - // for the given time series will arrive. - db.deleteMetricIDs([]uint64{metricID}) return dst, false } diff --git a/lib/storage/storage.go b/lib/storage/storage.go index dab0e7208..8be641051 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -147,6 +147,13 @@ type Storage struct { deletedMetricIDs atomic.Pointer[uint64set.Set] deletedMetricIDsUpdateLock sync.Mutex + // missingMetricIDs maps metricID to the timestamp of first unsuccessful lookup + // of metricName by the given metricID. + // This is used inside searchMetricNameWithCache() for detecting permanently missing metricID->metricName entries. + missingMetricIDsLock sync.Mutex + missingMetricIDs map[uint64]uint64 + missingMetricIDsResetDeadline uint64 + // isReadOnly is set to true when the storage is in read-only mode. isReadOnly atomic.Bool }