app/vmstorage: add -logNewSeries command-line flag for determining the source of series churn rate

This commit is contained in:
Aliaksandr Valialkin 2021-03-15 22:38:50 +02:00
parent c4d0c20de2
commit d074326970
4 changed files with 19 additions and 0 deletions

View file

@ -39,6 +39,9 @@ var (
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Remove superflouos samples from time series if they are located closer to each other than this duration. "+ minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Remove superflouos samples from time series if they are located closer to each other than this duration. "+
"This may be useful for reducing overhead when multiple identically configured Prometheus instances write data to the same VictoriaMetrics. "+ "This may be useful for reducing overhead when multiple identically configured Prometheus instances write data to the same VictoriaMetrics. "+
"Deduplication is disabled if the -dedup.minScrapeInterval is 0") "Deduplication is disabled if the -dedup.minScrapeInterval is 0")
logNewSeries = flag.Bool("logNewSeries", false, "Whether to log new series. This option is for debug purposes only. It can lead to performance issues "+
"when big number of new series are ingested into VictoriaMetrics")
) )
func main() { func main() {
@ -50,6 +53,7 @@ func main() {
logger.Init() logger.Init()
storage.SetMinScrapeIntervalForDeduplication(*minScrapeInterval) storage.SetMinScrapeIntervalForDeduplication(*minScrapeInterval)
storage.SetLogNewSeries(*logNewSeries)
storage.SetFinalMergeDelay(*finalMergeDelay) storage.SetFinalMergeDelay(*finalMergeDelay)
storage.SetBigMergeWorkersCount(*bigMergeConcurrency) storage.SetBigMergeWorkersCount(*bigMergeConcurrency)
storage.SetSmallMergeWorkersCount(*smallMergeConcurrency) storage.SetSmallMergeWorkersCount(*smallMergeConcurrency)

View file

@ -14,6 +14,7 @@
* FEATURE: vmauth: allow using regexp paths in `url_map`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1112) for details. * FEATURE: vmauth: allow using regexp paths in `url_map`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1112) for details.
* FEATURE: accept `round_digits` query arg at `/api/v1/query` and `/api/v1/query_range` handlers. This option can be set at Prometheus datasource in Grafana for limiting the number of digits after the decimal point in response values. * FEATURE: accept `round_digits` query arg at `/api/v1/query` and `/api/v1/query_range` handlers. This option can be set at Prometheus datasource in Grafana for limiting the number of digits after the decimal point in response values.
* FEATURE: add `-influx.databaseNames` command-line flag, which can be used for accepting data from some Telegraf plugins such as [fluentd plugin](https://github.com/fangli/fluent-plugin-influxdb). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1124). * FEATURE: add `-influx.databaseNames` command-line flag, which can be used for accepting data from some Telegraf plugins such as [fluentd plugin](https://github.com/fangli/fluent-plugin-influxdb). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1124).
* FEATURE: add `-logNewSeries` command-line flag, which can be used for debugging the source of time series churn rate.
* BUGFIX: vmagent: prevent from high CPU usage bug during failing scrapes with small `scrape_timeout` (less than a few seconds). * BUGFIX: vmagent: prevent from high CPU usage bug during failing scrapes with small `scrape_timeout` (less than a few seconds).
* BUGFIX: vmagent: reduce memory usage when Kubernetes service discovery is used in big number of distinct scrape config jobs by sharing Kubernetes object cache. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1113 * BUGFIX: vmagent: reduce memory usage when Kubernetes service discovery is used in big number of distinct scrape config jobs by sharing Kubernetes object cache. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1113

View file

@ -1375,6 +1375,8 @@ See the example of alerting rules for VM components [here](https://github.com/Vi
VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date, VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date,
while `topN` equals to 10. while `topN` equals to 10.
* New time series can be logged if `-logNewSeries` command-line flag is passed to VictoriaMetrics.
* VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag. * VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.
This prevents from ingesting metrics with too many labels. It is recommended [monitoring](#monitoring) `vm_metrics_with_dropped_labels_total` This prevents from ingesting metrics with too many labels. It is recommended [monitoring](#monitoring) `vm_metrics_with_dropped_labels_total`
metric in order to determine whether `-maxLabelsPerTimeseries` must be adjusted for your workload. metric in order to determine whether `-maxLabelsPerTimeseries` must be adjusted for your workload.

View file

@ -597,9 +597,21 @@ func (db *indexDB) createTSIDByName(dst *TSID, metricName []byte) error {
// on db.tb flush via invalidateTagCache flushCallback passed to OpenTable. // on db.tb flush via invalidateTagCache flushCallback passed to OpenTable.
atomic.AddUint64(&db.newTimeseriesCreated, 1) atomic.AddUint64(&db.newTimeseriesCreated, 1)
if logNewSeries {
logger.Infof("new series created: %s", mn.String())
}
return nil return nil
} }
// SetLogNewSeries updates new series logging.
//
// This function must be called before any calling any storage functions.
func SetLogNewSeries(ok bool) {
logNewSeries = ok
}
var logNewSeries = false
func (db *indexDB) generateTSID(dst *TSID, metricName []byte, mn *MetricName) error { func (db *indexDB) generateTSID(dst *TSID, metricName []byte, mn *MetricName) error {
// Search the TSID in the external storage. // Search the TSID in the external storage.
// This is usually the db from the previous period. // This is usually the db from the previous period.