Merge branch 'public-single-node' into pmm-6401-read-prometheus-data-files

This commit is contained in:
Aliaksandr Valialkin 2023-02-11 12:09:55 -08:00
commit a38bf70679
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
5 changed files with 54 additions and 17 deletions

View file

@ -4642,12 +4642,12 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": false, "exemplar": false,
"expr": "avg(\n rate(process_cpu_seconds_total{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_storage\", instance=~\"$instance\"}\n)", "expr": "median(\n rate(process_cpu_seconds_total{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_storage\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "avg", "legendFormat": "median",
"range": true, "range": true,
"refId": "C" "refId": "C"
} }
@ -4788,12 +4788,12 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "avg(\n max_over_time(process_resident_memory_bytes{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_storage\", instance=~\"$instance\"}\n)", "expr": "median(\n max_over_time(process_resident_memory_bytes{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_storage\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "avg", "legendFormat": "median",
"range": true, "range": true,
"refId": "C" "refId": "C"
} }
@ -5381,11 +5381,11 @@
"uid": "$ds" "uid": "$ds"
}, },
"editorMode": "code", "editorMode": "code",
"expr": "avg(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)", "expr": "median(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "avg", "legendFormat": "median",
"range": true, "range": true,
"refId": "C" "refId": "C"
} }
@ -6255,12 +6255,12 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "avg(\n rate(process_cpu_seconds_total{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_select\", instance=~\"$instance\"}\n)", "expr": "median(\n rate(process_cpu_seconds_total{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_select\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "avg", "legendFormat": "median",
"range": true, "range": true,
"refId": "C" "refId": "C"
} }
@ -6399,12 +6399,12 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "avg(\n max_over_time(process_resident_memory_bytes{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_select\", instance=~\"$instance\"}\n)", "expr": "median(\n max_over_time(process_resident_memory_bytes{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_select\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "avg", "legendFormat": "median",
"range": true, "range": true,
"refId": "C" "refId": "C"
} }
@ -7364,12 +7364,12 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "avg(\n rate(process_cpu_seconds_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_insert\", instance=~\"$instance\"}\n)", "expr": "median(\n rate(process_cpu_seconds_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "avg", "legendFormat": "median",
"range": true, "range": true,
"refId": "C" "refId": "C"
} }
@ -7508,12 +7508,12 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "avg(\n max_over_time(process_resident_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}\n)", "expr": "median(\n max_over_time(process_resident_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "avg", "legendFormat": "median",
"range": true, "range": true,
"refId": "C" "refId": "C"
} }

View file

@ -64,6 +64,7 @@ See also [case studies](https://docs.victoriametrics.com/CaseStudies.html).
* [Brewblox: InfluxDB to Victoria Metrics](https://www.brewblox.com/dev/decisions/20210718_victoria_metrics.html) * [Brewblox: InfluxDB to Victoria Metrics](https://www.brewblox.com/dev/decisions/20210718_victoria_metrics.html)
* [VictoriaMetrics static scraper](https://blog.differentpla.net/blog/2022/10/16/victoria-metrics-static-scraper/) * [VictoriaMetrics static scraper](https://blog.differentpla.net/blog/2022/10/16/victoria-metrics-static-scraper/)
* [VictoriaMetrics and Open Cosmos boldly takes edge computing to the edge of space](https://www.iot-now.com/2022/07/19/122423-victoriametrics-and-open-cosmos-boldly-takes-edge-computing-to-the-edge-of-space/) * [VictoriaMetrics and Open Cosmos boldly takes edge computing to the edge of space](https://www.iot-now.com/2022/07/19/122423-victoriametrics-and-open-cosmos-boldly-takes-edge-computing-to-the-edge-of-space/)
* [Evaluating Backend Options For Prometheus Metrics](https://www.techetio.com/2022/08/21/evaluating-backend-options-for-prometheus-metrics/)
## Our articles ## Our articles

View file

@ -23,6 +23,8 @@ The following tip changes can be tested by building VictoriaMetrics components f
* FEATURE: [vmalert enterprise](https://docs.victoriametrics.com/vmalert.html): add ability to read alerting and recording rules from S3, GCS or S3-compatible object storage. See [these docs](https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage). * FEATURE: [vmalert enterprise](https://docs.victoriametrics.com/vmalert.html): add ability to read alerting and recording rules from S3, GCS or S3-compatible object storage. See [these docs](https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage).
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `mad_over_time(m[d])` function for calculating the [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) over raw samples on the lookbehind window `d`. See [this feature request](https://github.com/prometheus/prometheus/issues/5514). * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `mad_over_time(m[d])` function for calculating the [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) over raw samples on the lookbehind window `d`. See [this feature request](https://github.com/prometheus/prometheus/issues/5514).
* BUGFIX: prevent from possible data ingestion slowdown and query performance slowdown during [background merges of big parts](https://docs.victoriametrics.com/#storage) on systems with small number of CPU cores (1 or 2 CPU cores). The issue has been introduced in [v1.85.0](https://docs.victoriametrics.com/CHANGELOG.html#v1850) when implementing [this feature](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337). See also [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3790).
## [v1.87.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.87.1) ## [v1.87.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.87.1)
Released at 2023-02-09 Released at 2023-02-09

View file

@ -788,7 +788,18 @@ func (tb *Table) notifyBackgroundMergers() bool {
} }
} }
var flushConcurrencyCh = make(chan struct{}, cgroup.AvailableCPUs()) var flushConcurrencyLimit = func() int {
n := cgroup.AvailableCPUs()
if n < 2 {
// Allow at least 2 concurrent flushers on systems with a single CPU core
// in order to guarantee that in-memory data flushes and background merges can be continued
// when a single flusher is busy with the long merge.
n = 2
}
return n
}()
var flushConcurrencyCh = make(chan struct{}, flushConcurrencyLimit)
func needAssistedMerge(pws []*partWrapper, maxParts int) bool { func needAssistedMerge(pws []*partWrapper, maxParts int) bool {
if len(pws) < maxParts { if len(pws) < maxParts {

View file

@ -615,7 +615,19 @@ func (pt *partition) notifyBackgroundMergers() bool {
} }
} }
var flushConcurrencyCh = make(chan struct{}, cgroup.AvailableCPUs()) var flushConcurrencyLimit = func() int {
n := cgroup.AvailableCPUs()
if n < 3 {
// Allow at least 3 concurrent flushers on systems with a single CPU core
// in order to guarantee that in-memory data flushes and background merges can be continued
// when a single flusher is busy with the long merge of big parts,
// while another flusher is busy with the long merge of small parts.
n = 3
}
return n
}()
var flushConcurrencyCh = make(chan struct{}, flushConcurrencyLimit)
func needAssistedMerge(pws []*partWrapper, maxParts int) bool { func needAssistedMerge(pws []*partWrapper, maxParts int) bool {
if len(pws) < maxParts { if len(pws) < maxParts {
@ -1007,7 +1019,7 @@ func hasActiveMerges(pws []*partWrapper) bool {
return false return false
} }
var mergeWorkersLimitCh = make(chan struct{}, getDefaultMergeConcurrency(16)) var mergeWorkersLimitCh = make(chan struct{}, adjustMergeWorkersLimit(getDefaultMergeConcurrency(16)))
var bigMergeWorkersLimitCh = make(chan struct{}, getDefaultMergeConcurrency(4)) var bigMergeWorkersLimitCh = make(chan struct{}, getDefaultMergeConcurrency(4))
@ -1038,9 +1050,20 @@ func SetMergeWorkersCount(n int) {
// Do nothing // Do nothing
return return
} }
n = adjustMergeWorkersLimit(n)
mergeWorkersLimitCh = make(chan struct{}, n) mergeWorkersLimitCh = make(chan struct{}, n)
} }
func adjustMergeWorkersLimit(n int) int {
if n < 2 {
// Allow at least 2 merge workers on systems with a single CPU core
// in order to guarantee that background merges can be continued
// when a single worker is busy with the long merge of big parts.
return 2
}
return n
}
func (pt *partition) startMergeWorkers() { func (pt *partition) startMergeWorkers() {
// Start a merge worker per available CPU core. // Start a merge worker per available CPU core.
// The actual number of concurrent merges is limited inside mergeWorker() below. // The actual number of concurrent merges is limited inside mergeWorker() below.