Update datasource entries consistently contain type prometheus and uid $ds. (#3393)

Co-authored-by: Timour I. Bakeev <tbakeev@ripe.net>
This commit is contained in:
Timur Bakeyev 2022-11-28 08:37:39 +01:00 committed by Aliaksandr Valialkin
parent d4808d5b84
commit b6064dd645
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
6 changed files with 286 additions and 51 deletions

View file

@ -107,7 +107,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"expr": "sum(increase(vm_tenant_inserted_rows_total{job=~\"$job\", instance=~\"$instance\",accountID=~\"$account\", projectID=~\"$project\"}[1m])/60) by (accountID,projectID) ",
"interval": "",
@ -205,7 +205,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(rate(vm_tenant_select_requests_total{job=~\"$job\", instance=~\"$instance.*\",accountID=~\"$account\", projectID=~\"$project\"}[$__rate_interval])) by (accountID,projectID) ",
@ -311,7 +311,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"expr": "sum(vm_tenant_active_timeseries{job=~\"$job\", instance=~\"$instance.*\",accountID=~\"$account\",projectID=~\"$project\"}) by(accountID,projectID)",
"format": "time_series",
@ -409,7 +409,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"expr": "sum(increase(vm_tenant_timeseries_created_total{job=~\"$job\", instance=~\"$instance\",accountID=~\"$account\", projectID=~\"$project\"}[1m])/60) by(accountID,projectID)",
"interval": "",
@ -504,7 +504,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(vm_tenant_used_tenant_bytes{job=~\"$job\", instance=~\"$instance\",accountID=~\"$account\",projectID=~\"$project\"}) by(accountID,projectID)",
@ -579,6 +579,7 @@
"allValue": ".*",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{version=~\"^vm(insert|select|storage).*\"}, job)",
@ -604,6 +605,7 @@
"allValue": ".*",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{job=~\"$job\"}, instance)",
@ -629,6 +631,7 @@
"allValue": ".*",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_tenant_active_timeseries{job=~\"$job\"},accountID)",
@ -654,6 +657,7 @@
"allValue": ".*",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_tenant_active_timeseries{accountID=~\"$accountID\"},projectID)",
@ -687,4 +691,4 @@
"uid": "IZFqd3lMz",
"version": 7,
"weekStart": ""
}
}

View file

@ -46,7 +46,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"gridPos": {
"h": 3,
@ -66,7 +66,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"description": "Number of objects at kubernetes cluster per each controller",
"fieldConfig": {
@ -118,7 +118,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "max(operator_controller_objects_count{job=~\"$job\",instance=~\"$instance\"}) by (controller)",
@ -133,7 +133,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
@ -184,7 +184,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
@ -207,7 +207,7 @@
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"fill": 1,
"fillGradient": 0,
@ -248,7 +248,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(rate(controller_runtime_reconcile_total{job=~\"$job\",instance=~\"$instance\",result=~\"requeue_after|requeue|success\"}[$__rate_interval])) by(controller)",
@ -294,7 +294,7 @@
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"description": "",
"fill": 1,
@ -336,7 +336,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(rate(operator_log_messages_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by (level)",
@ -395,7 +395,7 @@
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"description": "Non zero metrics indicates about error with CR object definition (typos or incorrect values) or errors with kubernetes API connection.",
"fill": 1,
@ -437,7 +437,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
@ -450,7 +450,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(rate(controller_runtime_reconcile_total{job=~\"$job\",instance=~\"$instance\",result=\"error\"}[$__rate_interval])) by(controller) > 0",
@ -497,7 +497,7 @@
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"description": "Operator limits number of reconcilation events to 5 events per 2 seconds.\n For now, this limit is applied only for vmalert and vmagent controllers.\n It should reduce load at kubernetes cluster and increase operator performance.",
"fill": 1,
@ -538,7 +538,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(rate(operator_reconcile_throttled_events_total[$__rate_interval])) by(controller)",
@ -584,7 +584,7 @@
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"description": "Number of objects waiting in the queue for reconciliation. Non-zero values indicate that operator cannot process CR objects changes with the given resources.",
"fill": 1,
@ -626,7 +626,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "max(workqueue_depth{job=~\"$job\",instance=~\"$instance\"}) by (name)",
@ -672,7 +672,7 @@
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"description": " For controllers with StatefulSet it's ok to see latency greater then 3 seconds. It could be vmalertmanager,vmcluster or vmagent in statefulMode.\n\n For other controllers, latency greater then 1 second may indicate issues with kubernetes cluster or operator's performance.\n ",
"fieldConfig": {
@ -721,7 +721,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "histogram_quantile(0.99,sum(rate(controller_runtime_reconcile_time_seconds_bucket[$__rate_interval])) by(le,controller) )",
@ -780,7 +780,7 @@
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
@ -827,7 +827,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"}) ",
@ -838,7 +838,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"}) ",
@ -850,7 +850,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(go_memstats_stack_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})",
@ -862,7 +862,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"})",
@ -909,7 +909,7 @@
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"fill": 1,
"fillGradient": 0,
@ -949,7 +949,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
@ -995,7 +995,7 @@
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"fill": 1,
"fillGradient": 0,
@ -1035,7 +1035,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"})",
@ -1081,7 +1081,7 @@
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
@ -1128,7 +1128,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(rate(go_gc_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))\n/\nsum(rate(go_gc_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
@ -1203,7 +1203,7 @@
},
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"definition": "label_values(operator_log_messages_total,job)",
"hide": 0,
@ -1229,7 +1229,7 @@
},
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"definition": "label_values(operator_log_messages_total{job=~\"$job\"},instance)",
"hide": 0,
@ -1256,7 +1256,7 @@
},
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
"uid": "$ds"
},
"definition": "label_values(vm_app_version{job=\"$job\", instance=\"$instance\"}, version)",
"hide": 2,
@ -1286,4 +1286,4 @@
"uid": "1H179hunk",
"version": 1,
"weekStart": ""
}
}

View file

@ -62,7 +62,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"enable": true,
"expr": "sum(ALERTS{alertgroup=\"vmcluster\",alertstate=\"firing\",show_at=\"dashboard\"}) by(alertname)",
@ -109,6 +109,7 @@
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
"h": 1,
@ -123,6 +124,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "How many datapoints are in storage",
@ -541,6 +543,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Average disk usage per datapoint.",
@ -882,6 +885,7 @@
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -1111,6 +1115,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of active time series with new data points inserted during the last hour across all storage nodes. High value may result in ingestion slowdown. \n\nSee following link for details:",
@ -1538,6 +1543,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -2564,6 +2570,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -2576,6 +2583,7 @@
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rate and total number of new series created over last 24h.\n\nHigh churn rate tightly connected with database performance and may result in unexpected OOM's or slow queries. It is recommended to always keep an eye on this metric to avoid unexpected cardinality \"explosions\".\n\nThe higher churn rate is, the more resources required to handle it. Consider to keep the churn rate as low as possible.\n\nTo investigate stats about most expensive series use `api/v1/status/tsdb` handler. More details here https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format\n\nGood references to read:\n* https://www.robustperception.io/cardinality-is-key\n* https://valyala.medium.com/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b",
@ -2918,6 +2926,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Slow queries according to `search.logSlowQueryDuration` flag, which is `5s` by default.",
@ -3121,6 +3130,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n\nThis prevents from ingesting metrics with too many labels. The value of `maxLabelsPerTimeseries` must be adjusted for your workload.\n\nWhen limit is exceeded (graph is > 0) - extra labels are dropped, which could result in unexpected identical time series.",
@ -3435,7 +3445,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
@ -3516,7 +3526,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
@ -3565,6 +3575,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -4026,6 +4037,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "The number of rows or bytes that vminesrt internal buffer contains at the moment.",
@ -4250,6 +4262,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -5777,6 +5790,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -6883,6 +6897,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -7867,7 +7882,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"gridPos": {
"h": 2,
@ -8327,6 +8342,7 @@
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{version=~\"^vminsert.*\"}, job)",
@ -8351,6 +8367,7 @@
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{version=~\"^vmselect.*\"}, job)",
@ -8375,6 +8392,7 @@
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{version=~\"^vmstorage.*\"}, job)",
@ -8399,6 +8417,7 @@
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{version=~\"^vm(insert|select|storage).*\"}, job)",
@ -8424,6 +8443,7 @@
"allValue": ".*",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{job=~\"$job\"}, instance)",
@ -8445,7 +8465,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"filters": [],
"hide": 0,
@ -8477,4 +8497,4 @@
"uid": "oS7Bi_0Wz",
"version": 1,
"weekStart": ""
}
}

View file

@ -94,6 +94,7 @@
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -107,6 +108,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
@ -117,6 +119,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "",
@ -135,6 +138,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
@ -145,6 +149,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "How many datapoints are in storage",
@ -195,6 +200,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -332,6 +338,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -349,6 +356,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Total size of allowed memory via flag `-memory.allowedPercent`",
@ -399,6 +407,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -416,6 +425,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -467,6 +477,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -482,6 +493,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of active time series with new data points inserted during the last hour. High value may result in ingestion slowdown. \n\nSee more details here https://docs.victoriametrics.com/FAQ.html#what-is-an-active-time-series",
@ -532,6 +544,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -549,6 +562,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "The minimum free disk space left",
@ -599,6 +613,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -616,6 +631,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Total number of available CPUs for VM process",
@ -670,6 +686,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -687,6 +704,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Total size of available memory for VM process",
@ -737,6 +755,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -755,6 +774,7 @@
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -768,6 +788,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
@ -782,6 +803,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "* `*` - unsupported query path\n* `/write` - insert into VM\n* `/metrics` - query VM system metrics\n* `/query` - query instant values\n* `/query_range` - query over a range of time\n* `/series` - match a certain label set\n* `/label/{}/values` - query a list of label values (variables mostly)",
@ -832,6 +854,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_http_requests_total{job=~\"$job\", instance=~\"$instance\", path!~\"/favicon.ico\"}[$__interval])) by (path) > 0",
@ -882,6 +905,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "The less time it takes is better.\n* `*` - unsupported query path\n* `/write` - insert into VM\n* `/metrics` - query VM system metrics\n* `/query` - query instant values\n* `/query_range` - query over a range of time\n* `/series` - match a certain label set\n* `/label/{}/values` - query a list of label values (variables mostly)",
@ -932,6 +956,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "max(vm_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=~\"(0.5|0.99)\"}) by (path, quantile) > 0",
@ -979,6 +1004,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of active time series with new data points inserted during the last hour. High value may result in ingestion slowdown. \n\nSee following link for details:",
@ -1035,6 +1061,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "vm_cache_entries{job=~\"$job\", instance=~\"$instance\", type=\"storage/hour_metric_ids\"}",
@ -1082,6 +1109,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "* `*` - unsupported query path\n* `/write` - insert into VM\n* `/metrics` - query VM system metrics\n* `/query` - query instant values\n* `/query_range` - query over a range of time\n* `/series` - match a certain label set\n* `/label/{}/values` - query a list of label values (variables mostly)",
@ -1132,6 +1160,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -1181,6 +1210,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows how many ongoing insertions (not API /write calls) on disk are taking place, where:\n* `max` - equal to number of CPUs;\n* `current` - current number of goroutines busy with inserting rows into underlying storage.\n\nEvery successful API /write call results into flush on disk. However, these two actions are separated and controlled via different concurrency limiters. The `max` on this panel can't be changed and always equal to number of CPUs. \n\nWhen `current` hits `max` constantly, it means storage is overloaded and requires more CPU.\n\n",
@ -1239,6 +1269,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(vm_concurrent_addrows_capacity{job=~\"$job\", instance=~\"$instance\"})",
@ -1250,6 +1281,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(vm_concurrent_addrows_current{job=~\"$job\", instance=~\"$instance\"})",
@ -1713,6 +1745,7 @@
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
"h": 1,
@ -2030,6 +2063,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
}
@ -2040,6 +2074,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -2056,6 +2091,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "How many datapoints are inserted into storage per second",
@ -2107,6 +2143,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by (type) > 0",
@ -2155,6 +2192,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the time needed to reach the 100% of disk capacity based on the following params:\n* free disk space;\n* row ingestion rate;\n* dedup rate;\n* compression.\n\nUse this panel for capacity planning in order to estimate the time remaining for running out of the disk space.\n\n",
@ -2206,6 +2244,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"} / ignoring(path) ((rate(vm_rows_added_to_storage_total{job=~\"$job\", instance=~\"$instance\"}[1d]) - ignoring(type) rate(vm_deduplicated_samples_total{job=~\"$job\", instance=~\"$instance\", type=\"merge\"}[1d])) * scalar(sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!=\"indexdb\"}) / sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!=\"indexdb\"})))",
@ -2257,6 +2296,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows how many datapoints are in the storage and what is average disk usage per datapoint.",
@ -2313,6 +2353,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type != \"indexdb\"})",
@ -2324,6 +2365,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!=\"indexdb\"}) / sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type != \"indexdb\"})",
@ -2375,6 +2417,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "How many datapoints are in RAM queue waiting to be written into storage. The number of pending data points should be in the range from 0 to `2*<ingestion_rate>`, since VictoriaMetrics pushes pending data to persistent storage every second.",
@ -2430,6 +2473,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "vm_pending_rows{job=~\"$job\", instance=~\"$instance\", type=\"storage\"}",
@ -2441,6 +2485,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "vm_pending_rows{job=~\"$job\", instance=~\"$instance\", type=\"indexdb\"}",
@ -2490,6 +2535,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows amount of on-disk space occupied by data points and the remaining disk space at `-storageDataPath`",
@ -2541,6 +2587,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!=\"indexdb\"})",
@ -2552,6 +2599,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}",
@ -2602,6 +2650,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Data parts of LSM tree.\nHigh number of parts could be an evidence of slow merge performance - check the resource utilization.\n* `indexdb` - inverted index\n* `storage/small` - recently added parts of data ingested into storage(hot data)\n* `storage/big` - small parts gradually merged into big parts (cold data)",
@ -2652,6 +2701,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(vm_parts{job=~\"$job\", instance=~\"$instance\"}) by (type)",
@ -2701,6 +2751,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows amount of on-disk space occupied by inverted index.",
@ -2751,6 +2802,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -2800,6 +2852,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "The number of on-going merges in storage nodes. It is expected to have high numbers for `storage/small` metric.",
@ -2849,6 +2902,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(vm_active_merges{job=~\"$job\", instance=~\"$instance\"}) by(type)",
@ -2897,6 +2951,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows how many rows were ignored on insertion due to corrupted or out of retention timestamps.",
@ -2947,6 +3002,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -2997,6 +3053,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "The number of rows merged per second by storage nodes.",
@ -3046,6 +3103,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_rows_merged_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(type)",
@ -3094,6 +3152,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs.",
@ -3144,6 +3203,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (level) ",
@ -3190,6 +3250,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
@ -3201,6 +3262,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -3217,6 +3279,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rate and total number of new series created over last 24h.\n\nHigh churn rate tightly connected with database performance and may result in unexpected OOM's or slow queries. It is recommended to always keep an eye on this metric to avoid unexpected cardinality \"explosions\".\n\nThe higher churn rate is, the more resources required to handle it. Consider to keep the churn rate as low as possible.\n\nGood references to read:\n* https://www.robustperception.io/cardinality-is-key\n* https://www.robustperception.io/using-tsdb-analyze-to-investigate-churn-and-cardinality",
@ -3271,6 +3334,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_new_timeseries_created_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
@ -3280,6 +3344,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(increase(vm_new_timeseries_created_total{job=~\"$job\", instance=~\"$instance\"}[24h]))",
@ -3426,6 +3491,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "The percentage of slow inserts comparing to total insertion rate during the last 5 minutes. \n\nThe less value is better. If percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of active time series. \n\nIn general, VictoriaMetrics requires ~1KB or RAM per active time series, so it should be easy calculating the required amounts of RAM for the current workload according to capacity planning docs. But the resulting number may be far from the real number because the required amounts of memory depends on may other factors such as the number of labels per time series and the length of label values.",
@ -3476,6 +3542,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_slow_row_inserts_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) / sum(rate(vm_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
@ -3536,6 +3603,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Slow queries rate according to `search.logSlowQueryDuration` flag, which is `5s` by default.",
@ -3586,6 +3654,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_slow_queries_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
@ -3723,6 +3792,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n\nThis prevents from ingesting metrics with too many labels. The value of `maxLabelsPerTimeseries` must be adjusted for your workload.\n\nWhen limit is exceeded (graph is > 0) - extra labels are dropped, which could result in unexpected identical time series.",
@ -3771,6 +3841,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -3820,6 +3891,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
@ -3831,6 +3903,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -3847,6 +3920,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "",
@ -3897,6 +3971,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})",
@ -3908,6 +3983,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})",
@ -3919,6 +3995,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(go_memstats_stack_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})",
@ -3930,6 +4007,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"})",
@ -3942,6 +4020,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -4116,6 +4195,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Panel shows the number of open file descriptors in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a",
@ -4172,6 +4252,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(process_open_fds{job=~\"$job\", instance=~\"$instance\"})",
@ -4183,6 +4264,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "min(process_max_fds{job=~\"$job\", instance=~\"$instance\"})",
@ -4234,6 +4316,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of bytes read/write from the storage layer.",
@ -4289,6 +4372,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
@ -4301,6 +4385,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
@ -4349,6 +4434,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -4398,6 +4484,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"})",
@ -4446,6 +4533,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows avg GC duration",
@ -4496,6 +4584,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(go_gc_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))\n/\nsum(rate(go_gc_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
@ -4543,6 +4632,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -4592,6 +4682,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(process_num_threads{job=~\"$job\", instance=~\"$instance\"})",
@ -4640,6 +4731,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "",
@ -4690,6 +4782,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"})",
@ -4738,6 +4831,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "",
@ -4788,6 +4882,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_tcplistener_accepts_total{job=~\"$job\", instance=~\"$instance\"}[$__interval]))",
@ -4834,6 +4929,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
@ -4873,6 +4969,7 @@
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{version=~\"victoria-metrics-.*\"}, job)",
@ -4897,6 +4994,7 @@
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{job=~\"$job\", instance=~\"$instance\"}, version)",
@ -4921,6 +5019,7 @@
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{job=~\"$job\"}, instance)",
@ -4945,7 +5044,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"filters": [],
"hide": 0,
@ -4988,4 +5087,4 @@
"uid": "wNf0q_kZk",
"version": 1,
"weekStart": ""
}
}

View file

@ -99,6 +99,7 @@
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -112,6 +113,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
@ -173,6 +175,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
@ -239,6 +242,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
@ -254,6 +258,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows total number of all configured scrape targets in state \"up\".\n\nSee `http://vmagent-host:8429/targets` to get list of all targets. \n",
@ -298,6 +303,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"})",
@ -311,6 +317,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows total number of all configured scrape targets in state \"down\".\n\nSee `http://vmagent-host:8429/targets` to get list of all targets. \n",
@ -365,6 +372,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"})",
@ -378,6 +386,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows number of generated error messages in logs over last 30m. Non-zero value may be a sign of connectivity or missconfiguration errors.",
@ -435,6 +444,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(increase(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[30m]))",
@ -448,6 +458,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Persistent queue size shows size of pending samples in bytes which hasn't been flushed to remote storage yet. \nIncreasing of value might be a sign of connectivity issues. In such cases, vmagent starts to flush pending data on disk with attempt to send it later once connection is restored.",
@ -497,6 +508,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(vm_persistentqueue_bytes_pending{job=~\"$job\", instance=~\"$instance\"})",
@ -607,6 +619,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -658,6 +671,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sort(sum(up{job=~\"$job\", instance=~\"$instance\"}) by (job, instance))",
@ -1064,6 +1078,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the persistent queue size of pending samples in bytes which hasn't been flushed to remote storage yet. \n\nIncreasing of value might be a sign of connectivity issues. In such cases, vmagent starts to flush pending data on disk with attempt to send it later once connection is restored.\n\nRemote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true.",
@ -1119,6 +1134,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -1165,6 +1181,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rate of dropped data blocks in cases when remote storage replies with `400 Bad Request` and `409 Conflict` HTTP responses.\n\nSee https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1149",
@ -1215,6 +1232,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -1261,6 +1279,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows rate of dropped samples from persistent queue. VMagent drops samples from queue if in-memory and on-disk queues are full and it is unable to flush them to remote storage.\nThe max size of on-disk queue is configured by `-remoteWrite.maxDiskUsagePerURL` flag.",
@ -1317,6 +1336,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(increase(vm_persistentqueue_bytes_dropped_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by (path)",
@ -1476,6 +1496,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs.",
@ -1526,6 +1547,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -1573,6 +1595,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -1589,6 +1612,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -1637,6 +1661,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -1684,6 +1709,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -1732,6 +1758,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -2017,6 +2044,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -2065,6 +2093,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "histogram_quantile(0.95, sum(rate(vm_promscrape_scrape_response_size_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(vmrange)) ",
@ -2075,6 +2104,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "histogram_quantile(0.5, sum(rate(vm_promscrape_scrape_response_size_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(vmrange)) ",
@ -2178,6 +2208,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
@ -2189,6 +2220,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -2622,6 +2654,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
@ -2633,6 +2666,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -2945,6 +2979,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows current number of established connections to remote write endpoints.\n\n",
@ -2994,6 +3029,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -3045,6 +3081,7 @@
},
"dataFormat": "tsbuckets",
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the remote write request duration distribution in seconds. Value depends on block size, network quality and remote storage performance.",
@ -3065,6 +3102,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -3213,6 +3251,7 @@
},
"dataFormat": "tsbuckets",
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the remote write request block size distribution in rows.",
@ -3233,6 +3272,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -3272,6 +3312,7 @@
},
"dataFormat": "tsbuckets",
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the remote write request block size distribution in bytes.",
@ -3292,6 +3333,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "buckets_limit(12, prometheus_buckets(sum(rate(vmagent_remotewrite_block_size_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(vmrange)))",
@ -3324,6 +3366,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the current limit usage of unique series over an hourly period. Vmagent will start to drop series once the limit is reached.\n\nPlease note, panel will be blank if `remoteWrite.maxHourlySeries` is not set.",
@ -3364,6 +3407,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -3374,6 +3418,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -3433,6 +3478,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the current limit usage of unique series over a daily period. Vmagent will start to drop series once the limit is reached.\n\nPlease note, panel will be blank if `remoteWrite.maxDailySeries` is not set.",
@ -3473,6 +3519,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -3483,6 +3530,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -3540,6 +3588,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
@ -3613,6 +3662,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -3703,6 +3753,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -3976,6 +4027,7 @@
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"gridPos": {
@ -4100,6 +4152,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Amount of used memory\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.",
@ -4156,6 +4209,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -4166,6 +4220,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
@ -4213,6 +4268,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of bytes read/write from the storage layer when vmagent has to buffer data on disk or read already buffered data.",
@ -4269,6 +4325,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
@ -4281,6 +4338,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
@ -4331,6 +4389,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Network usage shows the bytes rate for data accepted by vmagent and pushed via remotewrite protocol.\nDiscrepancies are possible because of different protocols used for ingesting, scraping and writing data.",
@ -4386,6 +4445,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_tcplistener_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8\n+ sum(rate(vm_promscrape_conn_bytes_read_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8",
@ -4395,6 +4455,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vmagent_remotewrite_conn_bytes_written_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8",
@ -4490,6 +4551,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
@ -4543,6 +4605,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -4592,6 +4655,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(instance)",
@ -4640,6 +4704,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -4689,6 +4754,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "max(go_gc_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"1\"}) by(instance)",
@ -4735,6 +4801,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -4784,6 +4851,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(process_num_threads{job=~\"$job\", instance=~\"$instance\"}) by(instance)",
@ -4829,6 +4897,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"refId": "A"
@ -4869,6 +4938,7 @@
"allValue": ".*",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{version=~\"^vmagent.*\"}, job)",
@ -4894,6 +4964,7 @@
"allValue": ".*",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{job=~\"$job\"}, instance)",
@ -4919,6 +4990,7 @@
"allValue": ".*",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vmagent_remotewrite_requests_total{job=~\"$job\", instance=~\"$instance\"}, url)",
@ -4941,7 +5013,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"filters": [],
"hide": 0,
@ -4973,4 +5045,4 @@
"uid": "G7Z9GzMGz",
"version": 1,
"weekStart": ""
}
}

View file

@ -134,6 +134,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows if the last configuration update was successful. \"Not Ok\" means there was an unsuccessful attempt to update the configuration due to some error. Check the log for details.",
@ -201,6 +202,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -215,6 +217,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the total number of errors generated by recording/alerting rules for selected instances and groups.",
@ -263,6 +266,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -277,6 +281,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the total number of loaded alerting rules across selected instances and groups.",
@ -321,6 +326,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -335,6 +341,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the total number of loaded recording rules across selected instances and groups.",
@ -379,6 +386,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -490,6 +498,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -541,6 +550,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -594,6 +604,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of fired alerts by instance.",
@ -637,6 +648,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -684,6 +696,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Average evaluation duration by group. Basically means how long it takes to execute all the rules per each group.",
@ -733,6 +746,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -778,6 +792,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows how many requests (executions) per second vmalert sends to the configured datasource.",
@ -821,6 +836,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -912,6 +928,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -973,6 +990,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the current active (firing) alerting rules per group.",
@ -1016,6 +1034,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -1061,6 +1080,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the events when rule execution resulted into an error. Check the logs for more details.",
@ -1104,6 +1124,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -1149,6 +1170,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the current pending alerting rules per group.\nBy pending means the rule which remains active less than configured `for` parameter.",
@ -1192,6 +1214,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -1281,6 +1304,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -1374,6 +1398,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -1450,6 +1475,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the top 10 recording rules which generate the most of samples. Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.",
@ -1493,6 +1519,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -1534,6 +1561,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rules which do not produce any samples during the evaluation. Usually it means that such rules are misconfigured, since they give no output during the evaluation.\nPlease check if rule's expression is correct and it is working as expected.",
@ -1613,6 +1641,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -1631,6 +1660,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fill": 0,
@ -1673,6 +1703,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -1853,6 +1884,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Amount of used memory\n\nResident memory shows share which can be freed by OS when needed.\n\nAnonymous shows share for memory allocated by the process itself. This share cannot be freed by the OS, so it must be taken into account by OOM killer.\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.",
@ -1909,6 +1941,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -1919,6 +1952,7 @@
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
@ -2017,6 +2051,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
@ -2071,6 +2106,7 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
@ -2120,6 +2156,7 @@
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(instance)",
@ -2206,6 +2243,7 @@
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{version=~\"^vmalert.*\"}, job)",
@ -2228,6 +2266,7 @@
"allValue": ".*",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vm_app_version{job=~\"$job\"}, instance)",
@ -2250,6 +2289,7 @@
"allValue": ".*",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"definition": "label_values(vmalert_iteration_duration_seconds{job=~\"$job\", instance=~\"$instance\"}, group)",
@ -2271,7 +2311,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
"uid": "$ds"
},
"filters": [],
"hide": 0,
@ -2291,4 +2331,4 @@
"uid": "LzldHAVnz",
"version": 1,
"weekStart": ""
}
}