From d0743269709da8fa85b1f70ebebe73ee0f128dc1 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 15 Mar 2021 22:38:50 +0200 Subject: [PATCH] app/vmstorage: add `-logNewSeries` command-line flag for determining the source of series churn rate --- app/vmstorage/main.go | 4 ++++ docs/CHANGELOG.md | 1 + docs/Single-server-VictoriaMetrics.md | 2 ++ lib/storage/index_db.go | 12 ++++++++++++ 4 files changed, 19 insertions(+) diff --git a/app/vmstorage/main.go b/app/vmstorage/main.go index e4b9a8a85..34e301c01 100644 --- a/app/vmstorage/main.go +++ b/app/vmstorage/main.go @@ -39,6 +39,9 @@ var ( minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Remove superflouos samples from time series if they are located closer to each other than this duration. "+ "This may be useful for reducing overhead when multiple identically configured Prometheus instances write data to the same VictoriaMetrics. "+ "Deduplication is disabled if the -dedup.minScrapeInterval is 0") + + logNewSeries = flag.Bool("logNewSeries", false, "Whether to log new series. This option is for debug purposes only. It can lead to performance issues "+ + "when big number of new series are ingested into VictoriaMetrics") ) func main() { @@ -50,6 +53,7 @@ func main() { logger.Init() storage.SetMinScrapeIntervalForDeduplication(*minScrapeInterval) + storage.SetLogNewSeries(*logNewSeries) storage.SetFinalMergeDelay(*finalMergeDelay) storage.SetBigMergeWorkersCount(*bigMergeConcurrency) storage.SetSmallMergeWorkersCount(*smallMergeConcurrency) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 26411e4e8..dbb7f85ac 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -14,6 +14,7 @@ * FEATURE: vmauth: allow using regexp paths in `url_map`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1112) for details. * FEATURE: accept `round_digits` query arg at `/api/v1/query` and `/api/v1/query_range` handlers. This option can be set at Prometheus datasource in Grafana for limiting the number of digits after the decimal point in response values. * FEATURE: add `-influx.databaseNames` command-line flag, which can be used for accepting data from some Telegraf plugins such as [fluentd plugin](https://github.com/fangli/fluent-plugin-influxdb). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1124). +* FEATURE: add `-logNewSeries` command-line flag, which can be used for debugging the source of time series churn rate. * BUGFIX: vmagent: prevent from high CPU usage bug during failing scrapes with small `scrape_timeout` (less than a few seconds). * BUGFIX: vmagent: reduce memory usage when Kubernetes service discovery is used in big number of distinct scrape config jobs by sharing Kubernetes object cache. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1113 diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index c68e8d76d..b602c58fc 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -1375,6 +1375,8 @@ See the example of alerting rules for VM components [here](https://github.com/Vi VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date, while `topN` equals to 10. +* New time series can be logged if `-logNewSeries` command-line flag is passed to VictoriaMetrics. + * VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag. This prevents from ingesting metrics with too many labels. It is recommended [monitoring](#monitoring) `vm_metrics_with_dropped_labels_total` metric in order to determine whether `-maxLabelsPerTimeseries` must be adjusted for your workload. diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index bca9f734e..f8acd3261 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -597,9 +597,21 @@ func (db *indexDB) createTSIDByName(dst *TSID, metricName []byte) error { // on db.tb flush via invalidateTagCache flushCallback passed to OpenTable. atomic.AddUint64(&db.newTimeseriesCreated, 1) + if logNewSeries { + logger.Infof("new series created: %s", mn.String()) + } return nil } +// SetLogNewSeries updates new series logging. +// +// This function must be called before any calling any storage functions. +func SetLogNewSeries(ok bool) { + logNewSeries = ok +} + +var logNewSeries = false + func (db *indexDB) generateTSID(dst *TSID, metricName []byte, mn *MetricName) error { // Search the TSID in the external storage. // This is usually the db from the previous period.