vmagent: add vm_promscrape_scrape_pool_targets for scrape jobs like… (#5335)

* vmagent: export `vm_promscrape_scrape_pool_targets` metric to track the number of targets that each scrape_job discovers

* add extra panel for new metric
This commit is contained in:
Hui Wang 2023-12-06 15:44:39 +08:00 committed by GitHub
parent 17e2b4f814
commit 97373b7786
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 466 additions and 6 deletions

View file

@ -3412,7 +3412,7 @@
"refId": "A"
}
],
"title": "Scrape targets UP",
"title": "Scrape targets UP(By Type)",
"type": "timeseries"
},
{
@ -3514,7 +3514,213 @@
"refId": "A"
}
],
"title": "Scrape targets DOWN",
"title": "Scrape targets DOWN(By Type)",
"type": "timeseries"
},
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 45
},
"id": 132,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.2.6",
"targets": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": true,
"expr": "sum(vm_promscrape_scrape_pool_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"}) by(job, scrape_job) > 0",
"format": "time_series",
"interval": "",
"legendFormat": "{{job}}: {{scrape_job}}",
"range": true,
"refId": "A"
}
],
"title": "Scrape targets UP(By Job)",
"type": "timeseries"
},
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 45
},
"id": 133,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.2.6",
"targets": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": true,
"expr": "sum(vm_promscrape_scrape_pool_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"}) by(job, scrape_job) > 0",
"format": "time_series",
"interval": "",
"legendFormat": "{{job}}: {{scrape_job}}",
"range": true,
"refId": "A"
}
],
"title": "Scrape targets DOWN(By Job)",
"type": "timeseries"
},
{

View file

@ -3411,7 +3411,7 @@
"refId": "A"
}
],
"title": "Scrape targets UP",
"title": "Scrape targets UP(By Type)",
"type": "timeseries"
},
{
@ -3513,7 +3513,213 @@
"refId": "A"
}
],
"title": "Scrape targets DOWN",
"title": "Scrape targets DOWN(By Type)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 45
},
"id": 132,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.2.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": true,
"expr": "sum(vm_promscrape_scrape_pool_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"}) by(job, scrape_job) > 0",
"format": "time_series",
"interval": "",
"legendFormat": "{{job}}: {{scrape_job}}",
"range": true,
"refId": "A"
}
],
"title": "Scrape targets UP(By Job)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 45
},
"id": 133,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.2.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": true,
"expr": "sum(vm_promscrape_scrape_pool_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"}) by(job, scrape_job) > 0",
"format": "time_series",
"interval": "",
"legendFormat": "{{job}}: {{scrape_job}}",
"range": true,
"refId": "A"
}
],
"title": "Scrape targets DOWN(By Job)",
"type": "timeseries"
},
{

View file

@ -37,6 +37,7 @@ The sandbox cluster installation is running under the constant load generated by
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): show all the dropped targets together with the reason why they are dropped at `http://vmagent:8429/service-discovery` page. Previously targets, which were dropped because of [target sharding](https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets) weren't displayed on this page. This could complicate service discovery debugging. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5389).
* FEATURE: reduce the default value for `-import.maxLineLen` command-line flag from 100MB to 10MB in order to prevent excessive memory usage during data import via [/api/v1/import](https://docs.victoriametrics.com/#how-to-import-data-in-json-line-format).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `keep_if_contains` and `drop_if_contains` relabeling actions. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling-enhancements) for details.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): export `vm_promscrape_scrape_pool_targets` metric to track the number of targets that each scrape_job discovers. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5311).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): provide `/vmalert/api/v1/rule` and `/api/v1/rule` API endpoints to get the rule object in JSON format. See [these docs](https://docs.victoriametrics.com/vmalert.html#web) for details.
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [day_of_year()](https://docs.victoriametrics.com/MetricsQL.html#day_of_year) function, which returns the day of the year for each of the given unix timestamps. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5345) for details. Thanks to @luckyxiaoqiang for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5368/).
* FEATURE: all VictoriaMetrics binaries: expose additional metrics at `/metrics` page, which may simplify debugging of VictoriaMetrics components (see [this feature request](https://github.com/VictoriaMetrics/metrics/issues/54)):

View file

@ -218,7 +218,6 @@ func areEqualScrapeConfigs(a, b *ScrapeConfig) bool {
return false
}
return true
}
func (sc *ScrapeConfig) unmarshalJSON(data []byte) error {

View file

@ -891,7 +891,7 @@ func (sw *scrapeWork) addAutoMetrics(am *autoMetrics, wc *writeRequestCtx, times
sw.addAutoTimeseries(wc, "scrape_series_added", float64(am.seriesAdded), timestamp)
sw.addAutoTimeseries(wc, "scrape_timeout_seconds", sw.Config.ScrapeTimeout.Seconds(), timestamp)
if sampleLimit := sw.Config.SampleLimit; sampleLimit > 0 {
// Expose scrape_samples_limit metric if sample_limt config is set for the target.
// Expose scrape_samples_limit metric if sample_limit config is set for the target.
// See https://github.com/VictoriaMetrics/operator/issues/497
sw.addAutoTimeseries(wc, "scrape_samples_limit", float64(sampleLimit), timestamp)
}

View file

@ -15,6 +15,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/metrics"
"github.com/cespare/xxhash/v2"
)
@ -103,10 +104,53 @@ func (tsm *targetStatusMap) Reset() {
func (tsm *targetStatusMap) registerJobNames(jobNames []string) {
tsm.mu.Lock()
tsm.registerJobsMetrics(tsm.jobNames, jobNames)
tsm.jobNames = append(tsm.jobNames[:0], jobNames...)
tsm.mu.Unlock()
}
// registerJobsMetrics registers metrics for new jobs and unregisterMetric metrics for removed jobs
func (tsm *targetStatusMap) registerJobsMetrics(prevJobNames, currentJobNames []string) {
prevName := make(map[string]struct{}, len(prevJobNames))
currentName := make(map[string]struct{}, len(currentJobNames))
for _, n := range currentJobNames {
currentName[n] = struct{}{}
}
for _, n := range prevJobNames {
prevName[n] = struct{}{}
if _, ok := currentName[n]; !ok {
metrics.UnregisterMetric(fmt.Sprintf(`vm_promscrape_scrape_pool_targets{scrape_job=%q, status="up"}`, n))
metrics.UnregisterMetric(fmt.Sprintf(`vm_promscrape_scrape_pool_targets{scrape_job=%q, status="down"}`, n))
}
}
for _, n := range currentJobNames {
if _, ok := prevName[n]; !ok {
n := n
_ = metrics.NewGauge(fmt.Sprintf(`vm_promscrape_scrape_pool_targets{scrape_job=%q, status="up"}`, n), func() float64 {
jobStatus := tsm.getTargetsStatusByJob(&requestFilter{
originalJobName: n,
})
var up float64
for _, status := range jobStatus.jobTargetsStatuses {
up = +float64(status.upCount)
}
return up
})
_ = metrics.NewGauge(fmt.Sprintf(`vm_promscrape_scrape_pool_targets{scrape_job=%q, status="down"}`, n), func() float64 {
jobStatus := tsm.getTargetsStatusByJob(&requestFilter{
originalJobName: n,
})
var down float64
for _, status := range jobStatus.jobTargetsStatuses {
down = +float64(status.targetsTotal - status.upCount)
}
return down
})
}
}
}
func (tsm *targetStatusMap) Register(sw *scrapeWork) {
tsm.mu.Lock()
tsm.m[sw] = &targetStatus{
@ -359,6 +403,9 @@ func (tsm *targetStatusMap) getTargetsStatusByJob(filter *requestFilter) *target
tsm.mu.Lock()
for _, ts := range tsm.m {
jobName := ts.sw.Config.jobNameOriginal
if filter.originalJobName != "" && jobName != filter.originalJobName {
continue
}
byJob[jobName] = append(byJob[jobName], *ts)
}
jobNames := append([]string{}, tsm.jobNames...)
@ -494,6 +541,7 @@ type requestFilter struct {
showOnlyUnhealthy bool
endpointSearch string
labelSearch string
originalJobName string
}
func getRequestFilter(r *http.Request) *requestFilter {