app/vmbackupmanager: add metrics for better observability (#488)

* app/vmbackupmanager: add metrics for better observability, include more information to `/api/v1/backups` API call response

* app/vmbackupmanager: drop old metrics before creating new ones

* app/vmbackupmanager: use `_total` postfix for counter metrics

* app/vmbackupmanager: remove `_total` postfix for gauge-like metrics

* app/vmbackupmanager: add `_last_run_failed` metrics for backups and retention

* app/vmbackupmanager: address review feedback

* app/vmbackupmanager: fix metric name

* app/vmbackupmanager: address review feedback, remove background updates of metrics, add restoring state of `_last_run_failed` metric from remote storage

* app/vmbackupmanager: improve performance for backup size calculation

* app/vmbackupmanager: refactor backup and retention runs to deduplicate each run logic

* {app/vmbackupmanager,lib/formatutil}: move HumanizeBytes into lib package

* app/vmbackupmanager: fix creating new metrics instead of reusing existing ones

* lit/formatutil: add comment to make linter happy

* app/vmbackupmanager: address review feedback
This commit is contained in:
Zakhar Bessarab 2022-12-21 02:12:04 +04:00 committed by Aliaksandr Valialkin
parent 4e55b67a44
commit 4be4645142
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
6 changed files with 96 additions and 16 deletions

View file

@ -27,11 +27,11 @@ import (
"strconv"
"strings"
"sync"
textTpl "text/template"
"time"
textTpl "text/template"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/formatutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
@ -350,15 +350,7 @@ func templateFuncs() textTpl.FuncMap {
if math.Abs(v) <= 1 || math.IsNaN(v) || math.IsInf(v, 0) {
return fmt.Sprintf("%.4g", v), nil
}
prefix := ""
for _, p := range []string{"ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"} {
if math.Abs(v) < 1024 {
break
}
prefix = p
v /= 1024
}
return fmt.Sprintf("%.4g%s", v, prefix), nil
return formatutil.HumanizeBytes(v), nil
},
// humanizeDuration converts given seconds to a human-readable duration

View file

@ -1,6 +1,7 @@
package templates
import (
"math"
"strings"
"testing"
textTpl "text/template"
@ -50,6 +51,31 @@ func TestTemplateFuncs(t *testing.T) {
if !ok {
t.Fatalf("unexpected mismatch")
}
formatting := func(funcName string, p interface{}, resultExpected string) {
t.Helper()
v := funcs[funcName]
fLocal := v.(func(s interface{}) (string, error))
result, err := fLocal(p)
if err != nil {
t.Fatalf("unexpected error for %s(%f): %s", funcName, p, err)
}
if result != resultExpected {
t.Fatalf("unexpected result for %s(%f); got\n%s\nwant\n%s", funcName, p, result, resultExpected)
}
}
formatting("humanize1024", float64(0), "0")
formatting("humanize1024", math.Inf(0), "+Inf")
formatting("humanize1024", math.NaN(), "NaN")
formatting("humanize1024", float64(127087), "124.1ki")
formatting("humanize1024", float64(130137088), "124.1Mi")
formatting("humanize1024", float64(133260378112), "124.1Gi")
formatting("humanize1024", float64(136458627186688), "124.1Ti")
formatting("humanize1024", float64(139733634239168512), "124.1Pi")
formatting("humanize1024", float64(143087241460908556288), "124.1Ei")
formatting("humanize1024", float64(146521335255970361638912), "124.1Zi")
formatting("humanize1024", float64(150037847302113650318245888), "124.1Yi")
formatting("humanize1024", float64(153638755637364377925883789312), "1.271e+05Yi")
}
func mkTemplate(current, replacement interface{}) textTemplate {

View file

@ -158,7 +158,7 @@ The result on the GCS bucket. We see only 3 daily backups:
* GET `/api/v1/backups` - returns list of backups in remote storage.
Example output:
```json
["daily/2022-10-06","daily/2022-10-10","hourly/2022-10-04:13","hourly/2022-10-06:12","hourly/2022-10-06:13","hourly/2022-10-10:14","hourly/2022-10-10:16","monthly/2022-10","weekly/2022-40","weekly/2022-41"]
[{"name":"daily/2022-11-30","size_bytes":26664689,"size":"25.429Mi"},{"name":"daily/2022-12-01","size_bytes":40160965,"size":"38.300Mi"},{"name":"hourly/2022-11-30:12","size_bytes":5846529,"size":"5.576Mi"},{"name":"hourly/2022-11-30:13","size_bytes":17651847,"size":"16.834Mi"},{"name":"hourly/2022-11-30:13:22","size_bytes":8797831,"size":"8.390Mi"},{"name":"hourly/2022-11-30:14","size_bytes":10680454,"size":"10.186Mi"}]
```
* POST `/api/v1/restore` - saves backup name to restore when [performing restore](#restore-commands).
@ -211,7 +211,7 @@ It can be changed by using flag:
`vmbackupmanager backup list` lists backups in remote storage:
```console
$ ./vmbackupmanager backup list
["daily/2022-10-06","daily/2022-10-10","hourly/2022-10-04:13","hourly/2022-10-06:12","hourly/2022-10-06:13","hourly/2022-10-10:14","hourly/2022-10-10:16","monthly/2022-10","weekly/2022-40","weekly/2022-41"]
[{"name":"daily/2022-11-30","size_bytes":26664689,"size":"25.429Mi"},{"name":"daily/2022-12-01","size_bytes":40160965,"size":"38.300Mi"},{"name":"hourly/2022-11-30:12","size_bytes":5846529,"size":"5.576Mi"},{"name":"hourly/2022-11-30:13","size_bytes":17651847,"size":"16.834Mi"},{"name":"hourly/2022-11-30:13:22","size_bytes":8797831,"size":"8.390Mi"},{"name":"hourly/2022-11-30:14","size_bytes":10680454,"size":"10.186Mi"}]
```
### Restore commands

View file

@ -162,7 +162,7 @@ The result on the GCS bucket. We see only 3 daily backups:
* GET `/api/v1/backups` - returns list of backups in remote storage.
Example output:
```json
["daily/2022-10-06","daily/2022-10-10","hourly/2022-10-04:13","hourly/2022-10-06:12","hourly/2022-10-06:13","hourly/2022-10-10:14","hourly/2022-10-10:16","monthly/2022-10","weekly/2022-40","weekly/2022-41"]
[{"name":"daily/2022-11-30","size_bytes":26664689,"size":"25.429Mi"},{"name":"daily/2022-12-01","size_bytes":40160965,"size":"38.300Mi"},{"name":"hourly/2022-11-30:12","size_bytes":5846529,"size":"5.576Mi"},{"name":"hourly/2022-11-30:13","size_bytes":17651847,"size":"16.834Mi"},{"name":"hourly/2022-11-30:13:22","size_bytes":8797831,"size":"8.390Mi"},{"name":"hourly/2022-11-30:14","size_bytes":10680454,"size":"10.186Mi"}]
```
* POST `/api/v1/restore` - saves backup name to restore when [performing restore](#restore-commands).
@ -215,7 +215,7 @@ It can be changed by using flag:
`vmbackupmanager backup list` lists backups in remote storage:
```console
$ ./vmbackupmanager backup list
["daily/2022-10-06","daily/2022-10-10","hourly/2022-10-04:13","hourly/2022-10-06:12","hourly/2022-10-06:13","hourly/2022-10-10:14","hourly/2022-10-10:16","monthly/2022-10","weekly/2022-40","weekly/2022-41"]
[{"name":"daily/2022-11-30","size_bytes":26664689,"size":"25.429Mi"},{"name":"daily/2022-12-01","size_bytes":40160965,"size":"38.300Mi"},{"name":"hourly/2022-11-30:12","size_bytes":5846529,"size":"5.576Mi"},{"name":"hourly/2022-11-30:13","size_bytes":17651847,"size":"16.834Mi"},{"name":"hourly/2022-11-30:13:22","size_bytes":8797831,"size":"8.390Mi"},{"name":"hourly/2022-11-30:14","size_bytes":10680454,"size":"10.186Mi"}]
```
### Restore commands
@ -274,7 +274,15 @@ If restore mark doesn't exist at `storageDataPath`(restore wasn't requested) `vm
### How to restore in Kubernetes
1. Enter container running `vmbackupmanager`
1. Ensure there is an init container with `vmbackupmanager restore` in `vmstorage` or `vmsingle` pod.
For [VictoriaMetrics operator](https://docs.victoriametrics.com/operator/VictoriaMetrics-Operator.html) deployments it is required to add:
```yaml
vmbackup:
restore:
onStart: "true"
```
See operator `VMStorage` schema [here](https://docs.victoriametrics.com/operator/api.html#vmstorage) and `VMSingle` [here](https://docs.victoriametrics.com/operator/api.html#vmsinglespec).
2. Enter container running `vmbackupmanager`
2. Use `vmbackupmanager backup list` to get list of available backups:
```console
$ /vmbackupmanager-prod backup list
@ -291,6 +299,33 @@ If restore mark doesn't exist at `storageDataPath`(restore wasn't requested) `vm
```
4. Restart pod
#### Restore cluster into another cluster
These steps are assuming that [VictoriaMetrics operator](https://docs.victoriametrics.com/operator/VictoriaMetrics-Operator.html) is used to manage `VMCluster`.
Clusters here are referred to as `source` and `destination`.
1. Create a new cluster with access to *source* cluster `vmbackupmanager` storage and same number of storage nodes.
Add the following section in order to enable restore on start (operator `VMStorage` schema can be found [here](https://docs.victoriametrics.com/operator/api.html#vmstorage):
```yaml
vmbackup:
restore:
onStart: "true"
```
Note: it is safe to leave this section in the cluster configuration, since it will be ignored if restore mark doesn't exist.
> Important! Use different `-dst` for *destination* cluster to avoid overwriting backup data of the *source* cluster.
2. Enter container running `vmbackupmanager` in *source* cluster
2. Use `vmbackupmanager backup list` to get list of available backups:
```console
$ /vmbackupmanager-prod backup list
["daily/2022-10-06","daily/2022-10-10","hourly/2022-10-04:13","hourly/2022-10-06:12","hourly/2022-10-06:13","hourly/2022-10-10:14","hourly/2022-10-10:16","monthly/2022-10","weekly/2022-40","weekly/2022-41"]
```
3. Use `vmbackupmanager restore create` to create restore mark at each pod of the *destination* cluster.
Each pod in *destination* cluster should be restored from backup of respective pod in *source* cluster.
For example: `vmstorage-source-0` in *source* cluster should be restored from `vmstorage-destination-0` in *destination* cluster.
```console
$ /vmbackupmanager-prod restore create s3://source_cluster/vmstorage-source-0/daily/2022-10-06
```
## Configuration
### Flags

View file

@ -11,6 +11,12 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fsnil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
)
var (
bytesUploadedTotal = uint64(0)
bytesUploadedTotalMetric = metrics.NewCounter(`vm_backups_uploaded_bytes_total`)
)
// Backup performs backup according to the provided settings.
@ -163,6 +169,8 @@ func runBackup(src *fslocal.FS, dst common.RemoteFS, origin common.OriginFS, con
n := atomic.LoadUint64(&bytesUploaded)
logger.Infof("uploaded %d out of %d bytes from src %s to dst %s in %s", n, uploadSize, src, dst, elapsed)
})
atomic.AddUint64(&bytesUploadedTotal, bytesUploaded)
bytesUploadedTotalMetric.Set(bytesUploadedTotal)
if err != nil {
return err
}

19
lib/formatutil/human.go Normal file
View file

@ -0,0 +1,19 @@
package formatutil
import (
"fmt"
"math"
)
// HumanizeBytes returns human-readable representation of size in bytes with 1024 base.
func HumanizeBytes(size float64) string {
prefix := ""
for _, p := range []string{"ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"} {
if math.Abs(size) < 1024 {
break
}
prefix = p
size /= 1024
}
return fmt.Sprintf("%.4g%s", size, prefix)
}