From 84227ea2fca64e00f5cc97b59f4b7454ae205dba Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Fri, 9 Oct 2020 13:29:27 +0300 Subject: [PATCH 01/24] app/{vminsert,vmagent}: take into account all the inserted rows before relabeling in `vm_rows_inserted_total` and `vmagent_rows_inserted_total` metrics --- app/vmagent/influx/request_handler.go | 2 +- app/vmagent/native/request_handler.go | 9 ++++++--- app/vmagent/promremotewrite/request_handler.go | 2 +- app/vmagent/vmimport/request_handler.go | 2 +- app/vminsert/influx/request_handler.go | 2 +- app/vminsert/native/request_handler.go | 8 +++++--- app/vminsert/prompush/push.go | 2 +- app/vminsert/promremotewrite/request_handler.go | 2 +- app/vminsert/vmimport/request_handler.go | 2 +- 9 files changed, 18 insertions(+), 13 deletions(-) diff --git a/app/vmagent/influx/request_handler.go b/app/vmagent/influx/request_handler.go index 4ac39982e5..f4447dbae8 100644 --- a/app/vmagent/influx/request_handler.go +++ b/app/vmagent/influx/request_handler.go @@ -62,6 +62,7 @@ func insertRows(db string, rows []parser.Row) error { buf := ctx.buf[:0] for i := range rows { r := &rows[i] + rowsTotal += len(r.Fields) commonLabels = commonLabels[:0] hasDBKey := false for j := range r.Tags { @@ -111,7 +112,6 @@ func insertRows(db string, rows []parser.Row) error { Samples: samples[len(samples)-1:], }) } - rowsTotal += len(r.Fields) } ctx.buf = buf ctx.ctx.WriteRequest.Timeseries = tssDst diff --git a/app/vmagent/native/request_handler.go b/app/vmagent/native/request_handler.go index bda1220d43..e93f5bb2b8 100644 --- a/app/vmagent/native/request_handler.go +++ b/app/vmagent/native/request_handler.go @@ -38,6 +38,12 @@ func insertRows(block *parser.Block, extraLabels []prompbmarshal.Label) error { ctx := common.GetPushCtx() defer common.PutPushCtx(ctx) + // Update rowsInserted and rowsPerInsert before actual inserting, + // since relabeling can prevent from inserting the rows. + rowsLen := len(block.Values) + rowsInserted.Add(rowsLen) + rowsPerInsert.Update(float64(rowsLen)) + tssDst := ctx.WriteRequest.Timeseries[:0] labels := ctx.Labels[:0] samples := ctx.Samples[:0] @@ -71,12 +77,9 @@ func insertRows(block *parser.Block, extraLabels []prompbmarshal.Label) error { Labels: labels[labelsLen:], Samples: samples[samplesLen:], }) - rowsTotal := len(values) ctx.WriteRequest.Timeseries = tssDst ctx.Labels = labels ctx.Samples = samples remotewrite.Push(&ctx.WriteRequest) - rowsInserted.Add(rowsTotal) - rowsPerInsert.Update(float64(rowsTotal)) return nil } diff --git a/app/vmagent/promremotewrite/request_handler.go b/app/vmagent/promremotewrite/request_handler.go index 747092e1c8..00dfcd614e 100644 --- a/app/vmagent/promremotewrite/request_handler.go +++ b/app/vmagent/promremotewrite/request_handler.go @@ -35,6 +35,7 @@ func insertRows(timeseries []prompb.TimeSeries) error { samples := ctx.Samples[:0] for i := range timeseries { ts := ×eries[i] + rowsTotal += len(ts.Samples) labelsLen := len(labels) for i := range ts.Labels { label := &ts.Labels[i] @@ -55,7 +56,6 @@ func insertRows(timeseries []prompb.TimeSeries) error { Labels: labels[labelsLen:], Samples: samples[samplesLen:], }) - rowsTotal += len(ts.Samples) } ctx.WriteRequest.Timeseries = tssDst ctx.Labels = labels diff --git a/app/vmagent/vmimport/request_handler.go b/app/vmagent/vmimport/request_handler.go index 572ce66825..5afb8a37c6 100644 --- a/app/vmagent/vmimport/request_handler.go +++ b/app/vmagent/vmimport/request_handler.go @@ -44,6 +44,7 @@ func insertRows(rows []parser.Row, extraLabels []prompbmarshal.Label) error { samples := ctx.Samples[:0] for i := range rows { r := &rows[i] + rowsTotal += len(r.Values) labelsLen := len(labels) for j := range r.Tags { tag := &r.Tags[j] @@ -69,7 +70,6 @@ func insertRows(rows []parser.Row, extraLabels []prompbmarshal.Label) error { Labels: labels[labelsLen:], Samples: samples[samplesLen:], }) - rowsTotal += len(values) } ctx.WriteRequest.Timeseries = tssDst ctx.Labels = labels diff --git a/app/vminsert/influx/request_handler.go b/app/vminsert/influx/request_handler.go index 4362c61053..93a826cd0e 100644 --- a/app/vminsert/influx/request_handler.go +++ b/app/vminsert/influx/request_handler.go @@ -65,6 +65,7 @@ func insertRows(db string, rows []parser.Row) error { hasRelabeling := relabel.HasRelabeling() for i := range rows { r := &rows[i] + rowsTotal += len(r.Fields) ic.Labels = ic.Labels[:0] hasDBKey := false for j := range r.Tags { @@ -125,7 +126,6 @@ func insertRows(db string, rows []parser.Row) error { } } } - rowsTotal += len(r.Fields) } rowsInserted.Add(rowsTotal) rowsPerInsert.Update(float64(rowsTotal)) diff --git a/app/vminsert/native/request_handler.go b/app/vminsert/native/request_handler.go index c991d3891f..9b0fc6477f 100644 --- a/app/vminsert/native/request_handler.go +++ b/app/vminsert/native/request_handler.go @@ -38,7 +38,12 @@ func insertRows(block *parser.Block, extraLabels []prompbmarshal.Label) error { ctx := getPushCtx() defer putPushCtx(ctx) + // Update rowsInserted and rowsPerInsert before actual inserting, + // since relabeling can prevent from inserting the rows. rowsLen := len(block.Values) + rowsInserted.Add(rowsLen) + rowsPerInsert.Update(float64(rowsLen)) + ic := &ctx.Common ic.Reset(rowsLen) hasRelabeling := relabel.HasRelabeling() @@ -72,9 +77,6 @@ func insertRows(block *parser.Block, extraLabels []prompbmarshal.Label) error { return err } } - rowsTotal := len(values) - rowsInserted.Add(rowsTotal) - rowsPerInsert.Update(float64(rowsTotal)) return ic.FlushBufs() } diff --git a/app/vminsert/prompush/push.go b/app/vminsert/prompush/push.go index 6291114f47..1c6ebe0d78 100644 --- a/app/vminsert/prompush/push.go +++ b/app/vminsert/prompush/push.go @@ -51,6 +51,7 @@ func push(ctx *common.InsertCtx, tss []prompbmarshal.TimeSeries) { rowsTotal := 0 for i := range tss { ts := &tss[i] + rowsTotal += len(ts.Samples) ctx.Labels = ctx.Labels[:0] for j := range ts.Labels { label := &ts.Labels[j] @@ -71,7 +72,6 @@ func push(ctx *common.InsertCtx, tss []prompbmarshal.TimeSeries) { return } } - rowsTotal += len(ts.Samples) } rowsInserted.Add(rowsTotal) rowsPerInsert.Update(float64(rowsTotal)) diff --git a/app/vminsert/promremotewrite/request_handler.go b/app/vminsert/promremotewrite/request_handler.go index 74568920ce..f4ff538149 100644 --- a/app/vminsert/promremotewrite/request_handler.go +++ b/app/vminsert/promremotewrite/request_handler.go @@ -36,6 +36,7 @@ func insertRows(timeseries []prompb.TimeSeries) error { hasRelabeling := relabel.HasRelabeling() for i := range timeseries { ts := ×eries[i] + rowsTotal += len(ts.Samples) ctx.Labels = ctx.Labels[:0] srcLabels := ts.Labels for _, srcLabel := range srcLabels { @@ -58,7 +59,6 @@ func insertRows(timeseries []prompb.TimeSeries) error { return err } } - rowsTotal += len(samples) } rowsInserted.Add(rowsTotal) rowsPerInsert.Update(float64(rowsTotal)) diff --git a/app/vminsert/vmimport/request_handler.go b/app/vminsert/vmimport/request_handler.go index 3094409ecf..6fbc7ab833 100644 --- a/app/vminsert/vmimport/request_handler.go +++ b/app/vminsert/vmimport/request_handler.go @@ -50,6 +50,7 @@ func insertRows(rows []parser.Row, extraLabels []prompbmarshal.Label) error { hasRelabeling := relabel.HasRelabeling() for i := range rows { r := &rows[i] + rowsTotal += len(r.Values) ic.Labels = ic.Labels[:0] for j := range r.Tags { tag := &r.Tags[j] @@ -78,7 +79,6 @@ func insertRows(rows []parser.Row, extraLabels []prompbmarshal.Label) error { return err } } - rowsTotal += len(values) } rowsInserted.Add(rowsTotal) rowsPerInsert.Update(float64(rowsTotal)) From 68f0e007611b153a3c21c3ae71a1b823a2c28c29 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Fri, 9 Oct 2020 13:35:48 +0300 Subject: [PATCH 02/24] app/vmstorage: add `vm_rows_added_to_storage_total` metric, which shows the total number of rows added to storage since app start --- app/vmstorage/main.go | 3 +++ lib/storage/storage.go | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/app/vmstorage/main.go b/app/vmstorage/main.go index 073daf7d90..4380533585 100644 --- a/app/vmstorage/main.go +++ b/app/vmstorage/main.go @@ -441,6 +441,9 @@ func registerStorageMetrics() { return float64(idbm().SizeBytes) }) + metrics.NewGauge(`vm_rows_added_to_storage_total`, func() float64 { + return float64(m().RowsAddedTotal) + }) metrics.NewGauge(`vm_deduplicated_samples_total{type="merge"}`, func() float64 { return float64(m().DedupsDuringMerge) }) diff --git a/lib/storage/storage.go b/lib/storage/storage.go index bbb5da9aeb..dae3606563 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -318,6 +318,7 @@ func (s *Storage) idb() *indexDB { // Metrics contains essential metrics for the Storage. type Metrics struct { + RowsAddedTotal uint64 DedupsDuringMerge uint64 TooSmallTimestampRows uint64 @@ -386,6 +387,7 @@ func (m *Metrics) Reset() { // UpdateMetrics updates m with metrics from s. func (s *Storage) UpdateMetrics(m *Metrics) { + m.RowsAddedTotal = atomic.LoadUint64(&rowsAddedTotal) m.DedupsDuringMerge = atomic.LoadUint64(&dedupsDuringMerge) m.TooSmallTimestampRows += atomic.LoadUint64(&s.tooSmallTimestampRows) @@ -1051,11 +1053,14 @@ func (s *Storage) ForceMergePartitions(partitionNamePrefix string) error { return s.tb.ForceMergePartitions(partitionNamePrefix) } +var rowsAddedTotal uint64 + // AddRows adds the given mrs to s. func (s *Storage) AddRows(mrs []MetricRow, precisionBits uint8) error { if len(mrs) == 0 { return nil } + atomic.AddUint64(&rowsAddedTotal, uint64(len(mrs))) // Limit the number of concurrent goroutines that may add rows to the storage. // This should prevent from out of memory errors and CPU trashing when too many From 272d6976b34f9676a9b5666d895f177b9f84e101 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Fri, 9 Oct 2020 14:21:59 +0300 Subject: [PATCH 03/24] CHANGELOG.md: update with recent changes --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e50be022e8..041532f3eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,11 @@ This should simplify debugging for target relabeling configs. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/651 * FEATURE: add `-finalMergeDelay` command-line flag for configuring the delay before final merge for per-month partitions. The final merge is started after no new data is ingested into per-month partition during `-finalMergeDelay`. +* FEATURE: add `vm_rows_added_to_storage_total` metric, which shows the total number of rows added to storage since app start. + The `sum(rate(vm_rows_added_to_storage_total))` can be smaller than `sum(rate(vm_rows_inserted_total))` if certain metrics are dropped + due to [relabeling](https://victoriametrics.github.io/#relabeling). + +* BUGFIX: vmalert: accept days, weeks and years in `for: ` part of config like Prometheus does. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/817 # [v1.43.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.43.0) From cf5f2874cd0c0e13dab44210c1f6a3230a29ddc5 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Fri, 9 Oct 2020 15:11:28 +0300 Subject: [PATCH 04/24] lib/backup/fslocal: add FS.MustStop() method for stopping bandwidth limiter --- app/vmbackup/main.go | 1 + app/vmrestore/main.go | 1 + lib/backup/fslocal/bandwidth_limiter.go | 22 ++++++++++++++++++++-- lib/backup/fslocal/fslocal.go | 13 ++++++++++++- 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/app/vmbackup/main.go b/app/vmbackup/main.go index 75d8672365..357e4c5ac2 100644 --- a/app/vmbackup/main.go +++ b/app/vmbackup/main.go @@ -90,6 +90,7 @@ func main() { if err := a.Run(); err != nil { logger.Fatalf("cannot create backup: %s", err) } + srcFS.MustStop() } func usage() { diff --git a/app/vmrestore/main.go b/app/vmrestore/main.go index b4d156b6ac..1c0d225f39 100644 --- a/app/vmrestore/main.go +++ b/app/vmrestore/main.go @@ -52,6 +52,7 @@ func main() { if err := a.Run(); err != nil { logger.Fatalf("cannot restore from backup: %s", err) } + dstFS.MustStop() } func usage() { diff --git a/lib/backup/fslocal/bandwidth_limiter.go b/lib/backup/fslocal/bandwidth_limiter.go index db5a2ff3b2..75c866435e 100644 --- a/lib/backup/fslocal/bandwidth_limiter.go +++ b/lib/backup/fslocal/bandwidth_limiter.go @@ -15,6 +15,9 @@ type bandwidthLimiter struct { // quota for the current second quota int + + stopCh chan struct{} + wg sync.WaitGroup } func newBandwidthLimiter(perSecondLimit int) *bandwidthLimiter { @@ -25,10 +28,20 @@ func newBandwidthLimiter(perSecondLimit int) *bandwidthLimiter { bl.perSecondLimit = perSecondLimit var mu sync.Mutex bl.c = sync.NewCond(&mu) - go bl.perSecondUpdater() + bl.stopCh = make(chan struct{}) + bl.wg.Add(1) + go func() { + defer bl.wg.Done() + bl.perSecondUpdater() + }() return &bl } +func (bl *bandwidthLimiter) MustStop() { + close(bl.stopCh) + bl.wg.Wait() +} + func (bl *bandwidthLimiter) NewReadCloser(rc io.ReadCloser) *bandwidthLimitedReader { return &bandwidthLimitedReader{ rc: rc, @@ -83,7 +96,12 @@ func (blw *bandwidthLimitedWriter) Close() error { func (bl *bandwidthLimiter) perSecondUpdater() { tc := time.NewTicker(time.Second) c := bl.c - for range tc.C { + for { + select { + case <-tc.C: + case <-bl.stopCh: + return + } c.L.Lock() bl.quota = bl.perSecondLimit c.Signal() diff --git a/lib/backup/fslocal/fslocal.go b/lib/backup/fslocal/fslocal.go index ced08c2cfb..051182a624 100644 --- a/lib/backup/fslocal/fslocal.go +++ b/lib/backup/fslocal/fslocal.go @@ -27,7 +27,9 @@ type FS struct { bl *bandwidthLimiter } -// Init initializes fs +// Init initializes fs. +// +// The returned fs must be stopped when no long needed with MustStop call. func (fs *FS) Init() error { if fs.MaxBytesPerSecond > 0 { fs.bl = newBandwidthLimiter(fs.MaxBytesPerSecond) @@ -35,6 +37,15 @@ func (fs *FS) Init() error { return nil } +// MustStop stops fs. +func (fs *FS) MustStop() { + if fs.bl == nil { + return + } + fs.bl.MustStop() + fs.bl = nil +} + // String returns user-readable representation for the fs. func (fs *FS) String() string { return fmt.Sprintf("fslocal %q", fs.Dir) From b44960718155aa744d0eeafe5bbb9e9da0f52119 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Fri, 9 Oct 2020 15:31:39 +0300 Subject: [PATCH 05/24] lib/backup: add MustStop() method for all remote filesystems --- app/vmbackup/main.go | 2 ++ app/vmrestore/main.go | 1 + lib/backup/common/fs.go | 6 ++++++ lib/backup/fsnil/fsnil.go | 5 +++++ lib/backup/fsremote/fsremote.go | 5 +++++ lib/backup/gcsremote/gcs.go | 7 +++++++ lib/backup/s3remote/s3.go | 8 ++++++++ 7 files changed, 34 insertions(+) diff --git a/app/vmbackup/main.go b/app/vmbackup/main.go index 357e4c5ac2..49bb728748 100644 --- a/app/vmbackup/main.go +++ b/app/vmbackup/main.go @@ -91,6 +91,8 @@ func main() { logger.Fatalf("cannot create backup: %s", err) } srcFS.MustStop() + dstFS.MustStop() + originFS.MustStop() } func usage() { diff --git a/app/vmrestore/main.go b/app/vmrestore/main.go index 1c0d225f39..0f0515bb48 100644 --- a/app/vmrestore/main.go +++ b/app/vmrestore/main.go @@ -52,6 +52,7 @@ func main() { if err := a.Run(); err != nil { logger.Fatalf("cannot restore from backup: %s", err) } + srcFS.MustStop() dstFS.MustStop() } diff --git a/lib/backup/common/fs.go b/lib/backup/common/fs.go index 94d497d5ec..1282fd583a 100644 --- a/lib/backup/common/fs.go +++ b/lib/backup/common/fs.go @@ -9,6 +9,9 @@ import ( // This filesystem is used for performing server-side file copies // instead of uploading data from local filesystem. type OriginFS interface { + // MustStop must be called when the RemoteFS is no longer needed. + MustStop() + // String must return human-readable representation of OriginFS. String() string @@ -18,6 +21,9 @@ type OriginFS interface { // RemoteFS is a filesystem where backups are stored. type RemoteFS interface { + // MustStop must be called when the RemoteFS is no longer needed. + MustStop() + // String must return human-readable representation of RemoteFS. String() string diff --git a/lib/backup/fsnil/fsnil.go b/lib/backup/fsnil/fsnil.go index 456a1e0c3e..6ef466cdbe 100644 --- a/lib/backup/fsnil/fsnil.go +++ b/lib/backup/fsnil/fsnil.go @@ -7,6 +7,11 @@ import ( // FS represents nil remote filesystem. type FS struct{} +// MustStop stops fs. +func (fs *FS) MustStop() { + // Nothing to do +} + // String returns human-readable string representation for fs. func (fs *FS) String() string { return "fsnil" diff --git a/lib/backup/fsremote/fsremote.go b/lib/backup/fsremote/fsremote.go index 4055ac9eae..3e7c65b12c 100644 --- a/lib/backup/fsremote/fsremote.go +++ b/lib/backup/fsremote/fsremote.go @@ -22,6 +22,11 @@ type FS struct { Dir string } +// MustStop stops fs. +func (fs *FS) MustStop() { + // Nothing to do +} + // String returns human-readable string representation for fs. func (fs *FS) String() string { return fmt.Sprintf("fsremote %q", fs.Dir) diff --git a/lib/backup/gcsremote/gcs.go b/lib/backup/gcsremote/gcs.go index 2857c5e585..2e28da9f6e 100644 --- a/lib/backup/gcsremote/gcs.go +++ b/lib/backup/gcsremote/gcs.go @@ -33,6 +33,8 @@ type FS struct { } // Init initializes fs. +// +// The returned fs must be stopped when no long needed with MustStop call. func (fs *FS) Init() error { if fs.bkt != nil { logger.Panicf("BUG: fs.Init has been already called") @@ -63,6 +65,11 @@ func (fs *FS) Init() error { return nil } +// MustStop stops fs. +func (fs *FS) MustStop() { + fs.bkt = nil +} + // String returns human-readable description for fs. func (fs *FS) String() string { return fmt.Sprintf("GCS{bucket: %q, dir: %q}", fs.Bucket, fs.Dir) diff --git a/lib/backup/s3remote/s3.go b/lib/backup/s3remote/s3.go index b6e31a6e71..9bfc1ecfb3 100644 --- a/lib/backup/s3remote/s3.go +++ b/lib/backup/s3remote/s3.go @@ -45,6 +45,8 @@ type FS struct { } // Init initializes fs. +// +// The returned fs must be stopped when no long needed with MustStop call. func (fs *FS) Init() error { if fs.s3 != nil { logger.Panicf("BUG: Init is already called") @@ -96,6 +98,12 @@ func (fs *FS) Init() error { return nil } +// MustStop stops fs. +func (fs *FS) MustStop() { + fs.s3 = nil + fs.uploader = nil +} + // String returns human-readable description for fs. func (fs *FS) String() string { return fmt.Sprintf("S3{bucket: %q, dir: %q}", fs.Bucket, fs.Dir) From 2749a3c827650018cb11cdfc6e873787ac3f07fe Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Fri, 9 Oct 2020 20:56:19 +0300 Subject: [PATCH 06/24] docs/Single-server-VictoriaMetrics.md: add missing whitespace --- README.md | 2 +- docs/Single-server-VictoriaMetrics.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1c99d4aaf2..3365cc6f2e 100644 --- a/README.md +++ b/README.md @@ -737,7 +737,7 @@ Time series data can be imported via any supported ingestion protocol: * [Prometheus remote_write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write). * Influx line protocol. See [these docs](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for details. -* Graphite plaintext protocol. See[these docs](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) for details. +* Graphite plaintext protocol. See [these docs](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) for details. * OpenTSDB telnet put protocol. See [these docs](#sending-data-via-telnet-put-protocol) for details. * OpenTSDB http `/api/put` protocol. See [these docs](#sending-opentsdb-data-via-http-apiput-requests) for details. * `/api/v1/import` for importing data obtained from [/api/v1/export](#how-to-export-data-in-json-line-format). diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index 1c99d4aaf2..3365cc6f2e 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -737,7 +737,7 @@ Time series data can be imported via any supported ingestion protocol: * [Prometheus remote_write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write). * Influx line protocol. See [these docs](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for details. -* Graphite plaintext protocol. See[these docs](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) for details. +* Graphite plaintext protocol. See [these docs](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) for details. * OpenTSDB telnet put protocol. See [these docs](#sending-data-via-telnet-put-protocol) for details. * OpenTSDB http `/api/put` protocol. See [these docs](#sending-opentsdb-data-via-http-apiput-requests) for details. * `/api/v1/import` for importing data obtained from [/api/v1/export](#how-to-export-data-in-json-line-format). From f7d28bddbf320617fbdb17669fc2f6fd4581cf7a Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Mon, 12 Oct 2020 11:27:46 +0300 Subject: [PATCH 07/24] Revert "app/vmselect/promql: remove metric name from results of certain rollup functions in order to be consistent with Prometheus" This reverts commit e5202a4eaee52b3bf03f59fabdc09a84352857c4. Reason for revert: the previous behavior for VictoriaMetrics is easier to understand and use by users - functions, which don't change the meaning of the time series shouldn't drop metric name. Now the following functions do not drop metric name: * max_over_time * min_over_time * avg_over_time * quantile_over_time * geomean_over_time * mode_over_time * holt_winters * predict_linear Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/674 --- app/vmselect/promql/exec_test.go | 1 + app/vmselect/promql/rollup.go | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 6a83fb4731..6f347e6132 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -3634,6 +3634,7 @@ func TestExecSuccess(t *testing.T) { Values: []float64{7.8, 9.9, 11.9, 13.9, 15.9, 17.9}, Timestamps: timestampsExpected, } + r.MetricName.MetricGroup = []byte("foobar") resultExpected := []netstorage.Result{r} f(q, resultExpected) }) diff --git a/app/vmselect/promql/rollup.go b/app/vmselect/promql/rollup.go index 6d612fee94..3c34d43013 100644 --- a/app/vmselect/promql/rollup.go +++ b/app/vmselect/promql/rollup.go @@ -169,12 +169,20 @@ var rollupFuncsRemoveCounterResets = map[string]bool{ } var rollupFuncsKeepMetricGroup = map[string]bool{ + "holt_winters": true, + "predict_linear": true, "default_rollup": true, + "avg_over_time": true, + "min_over_time": true, + "max_over_time": true, + "quantile_over_time": true, "rollup": true, + "geomean_over_time": true, "hoeffding_bound_lower": true, "hoeffding_bound_upper": true, "first_over_time": true, "last_over_time": true, + "mode_over_time": true, } func getRollupAggrFuncNames(expr metricsql.Expr) ([]string, error) { From a94825b16900b5bdb034e9147277230f11056511 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Mon, 12 Oct 2020 11:36:47 +0300 Subject: [PATCH 08/24] Revert "app/vmselect/promql: remove metric name after applying `clamp_min` and `clamp_max` functions in order to be consistent with Prometheus" This reverts commit bb61a4769bf7e3f4b200c3970138f289a6ff8bdc. Reason for revert: the previous behavior for VictoriaMetrics is easier to understand and use by users - functions, which don't change the meaning of the time series shouldn't drop metric name. Now the following functions do not drop metric name: * clamp_min * clamp_max Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/674 --- app/vmselect/promql/exec_test.go | 2 ++ app/vmselect/promql/transform.go | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 6f347e6132..cf4e1c6e3b 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -636,6 +636,7 @@ func TestExecSuccess(t *testing.T) { Values: []float64{1000, 1200, 1400, 1400, 1400, 1400}, Timestamps: timestampsExpected, } + r.MetricName.MetricGroup = []byte("foobar") resultExpected := []netstorage.Result{r} f(q, resultExpected) }) @@ -647,6 +648,7 @@ func TestExecSuccess(t *testing.T) { Values: []float64{1000, 1200, 1400, 1400, 1400, 1400}, Timestamps: timestampsExpected, } + r.MetricName.MetricGroup = []byte("foobar") resultExpected := []netstorage.Result{r} f(q, resultExpected) }) diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index b43638b13a..cc1e00027b 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -17,6 +17,11 @@ import ( "github.com/valyala/histogram" ) +var transformFuncsKeepMetricGroup = map[string]bool{ + "clamp_max": true, + "clamp_min": true, +} + var transformFuncs = map[string]transformFunc{ // Standard promql funcs // See funcs accepting instant-vector on https://prometheus.io/docs/prometheus/latest/querying/functions/ . @@ -125,8 +130,12 @@ func newTransformFuncOneArg(tf func(v float64) float64) transformFunc { } func doTransformValues(arg []*timeseries, tf func(values []float64), fe *metricsql.FuncExpr) ([]*timeseries, error) { + name := strings.ToLower(fe.Name) + keepMetricGroup := transformFuncsKeepMetricGroup[name] for _, ts := range arg { - ts.MetricName.ResetMetricGroup() + if !keepMetricGroup { + ts.MetricName.ResetMetricGroup() + } tf(ts.Values) } return arg, nil From 45f7cdc532bc2484fa55b57a884dade01992b9cc Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Mon, 12 Oct 2020 11:39:44 +0300 Subject: [PATCH 09/24] Revert "app/vmselect/promql: remove metric name after applying `ceil`, `floor` and `round` functions in order to be more consistent with Prometheus" This reverts commit ac45082216f7ffaa46d8a1d0c51270a4fe1191ab. Reason for revert: the previous behavior for VictoriaMetrics is easier to understand and use by users - functions, which don't change the meaning of the time series shouldn't drop metric name. Now the following functions do not drop metric names: * ceil * floor * round Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/674 --- app/vmselect/promql/transform.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index cc1e00027b..da0e66e733 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -18,8 +18,11 @@ import ( ) var transformFuncsKeepMetricGroup = map[string]bool{ + "ceil": true, "clamp_max": true, "clamp_min": true, + "floor": true, + "round": true, } var transformFuncs = map[string]transformFunc{ From 762c96785597381a3eb7154205e10779f02f5903 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Mon, 12 Oct 2020 11:45:44 +0300 Subject: [PATCH 10/24] app/vmselect/promql: keep metric name after applying more functions, which dont change time series meaning Functions are: * keep_last_value * keep_next_value * interpolate * running_min * running_max * running_avg * range_min * range_max * range_avg * range_first * range_last * range_quantile * smooth_exponential Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/674 --- app/vmselect/promql/transform.go | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index da0e66e733..bc39a112f0 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -18,11 +18,24 @@ import ( ) var transformFuncsKeepMetricGroup = map[string]bool{ - "ceil": true, - "clamp_max": true, - "clamp_min": true, - "floor": true, - "round": true, + "ceil": true, + "clamp_max": true, + "clamp_min": true, + "floor": true, + "round": true, + "keep_last_value": true, + "keep_next_value": true, + "interpolate": true, + "running_min": true, + "running_max": true, + "running_avg": true, + "range_min": true, + "range_max": true, + "range_avg": true, + "range_first": true, + "range_last": true, + "range_quantile": true, + "smooth_exponential": true, } var transformFuncs = map[string]transformFunc{ From 3bba6a2199948e3460659372a3ddf84ff9a09089 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Mon, 12 Oct 2020 12:55:09 +0300 Subject: [PATCH 11/24] CHANGELOG.md: mention that VictoriaMetrics keeps metric names when applying functions which don't change time series meaning --- CHANGELOG.md | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 041532f3eb..1ec54550c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,37 @@ The final merge is started after no new data is ingested into per-month partition during `-finalMergeDelay`. * FEATURE: add `vm_rows_added_to_storage_total` metric, which shows the total number of rows added to storage since app start. The `sum(rate(vm_rows_added_to_storage_total))` can be smaller than `sum(rate(vm_rows_inserted_total))` if certain metrics are dropped - due to [relabeling](https://victoriametrics.github.io/#relabeling). + due to [relabeling](https://victoriametrics.github.io/#relabeling). The `sum(rate(vm_rows_added_to_storage_total))` can be bigger + than `sum(rate(vm_rows_inserted_total))` if [replication](https://victoriametrics.github.io/Cluster-VictoriaMetrics.html#replication-and-data-safety) is enabled. +* FEATURE: keep metric name after applying [MetricsQL](https://victoriametrics.github.io/MetricsQL.html) functions, which don't change time series meaning. + The list of such functions: + * `keep_last_value` + * `keep_next_value` + * `interpolate` + * `running_min` + * `running_max` + * `running_avg` + * `range_min` + * `range_max` + * `range_avg` + * `range_first` + * `range_last` + * `range_quantile` + * `smooth_exponential` + * `ceil` + * `floor` + * `round` + * `clamp_min` + * `clamp_max` + * `max_over_time` + * `min_over_time` + * `avg_over_time` + * `quantile_over_time` + * `mode_over_time` + * `geomean_over_time` + * `holt_winters` + * `predict_linear` + See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/674 * BUGFIX: vmalert: accept days, weeks and years in `for: ` part of config like Prometheus does. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/817 From 7f983d461ab6ddfc249aca85bea40dbd7202ca18 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Mon, 12 Oct 2020 13:25:17 +0300 Subject: [PATCH 12/24] docs/MetricsQL.md: mention that VictoriaMetrics keeps metric names after applying functions which dont change time series meaning --- docs/MetricsQL.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index dad59993d6..b21637f881 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -15,6 +15,8 @@ The following functionality is implemented differently in MetricsQL comparing to * MetricsQL removes all the `NaN` values from the output, so some queries like `(-1)^0.5` return empty results in VictoriaMetrics, while returning a series of `NaN` values in Prometheus. Note that Grafana doesn't draw any lines or dots for `NaN` values, so usually the end result looks the same for both VictoriaMetrics and Prometheus. +* MetricsQL keeps metric names after applying functions, which don't change the meaining of the original time series. For example, `min_over_time(foo)` or `round(foo)` + leave `foo` metric name in the result. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/674) for details. Other PromQL functionality should work the same in MetricsQL. [File an issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you notice discrepancies between PromQL and MetricsQL results other than mentioned above. From 9bd9f67718fed7a59ffccbc035e62e0bc8d18dc7 Mon Sep 17 00:00:00 2001 From: Nikolay Khramchikhin <nik@victoriametrics.com> Date: Mon, 12 Oct 2020 13:38:21 +0300 Subject: [PATCH 13/24] Adds dockerswarm sd (#818) * adds dockerswarm service discovery https://github.com/VictoriaMetrics/VictoriaMetrics/issues/656 Following roles supported: services, tasks and nodes. Basic, token and tls auth supported. Added tests for labels generation. * added unix socket support to discovery utils Co-authored-by: Aliaksandr Valialkin <valyala@gmail.com> --- README.md | 1 + app/vmagent/README.md | 2 + lib/promscrape/config.go | 39 ++ lib/promscrape/discovery/dockerswarm/api.go | 39 ++ .../discovery/dockerswarm/dockerswarm.go | 51 +++ .../discovery/dockerswarm/network.go | 61 +++ .../discovery/dockerswarm/network_test.go | 166 +++++++++ lib/promscrape/discovery/dockerswarm/nodes.go | 90 +++++ .../discovery/dockerswarm/nodes_test.go | 185 +++++++++ .../discovery/dockerswarm/services.go | 139 +++++++ .../discovery/dockerswarm/services_test.go | 293 +++++++++++++++ lib/promscrape/discovery/dockerswarm/tasks.go | 149 ++++++++ .../discovery/dockerswarm/tasks_test.go | 352 ++++++++++++++++++ lib/promscrape/discoveryutils/client.go | 17 +- lib/promscrape/scraper.go | 5 +- 15 files changed, 1586 insertions(+), 3 deletions(-) create mode 100644 lib/promscrape/discovery/dockerswarm/api.go create mode 100644 lib/promscrape/discovery/dockerswarm/dockerswarm.go create mode 100644 lib/promscrape/discovery/dockerswarm/network.go create mode 100644 lib/promscrape/discovery/dockerswarm/network_test.go create mode 100644 lib/promscrape/discovery/dockerswarm/nodes.go create mode 100644 lib/promscrape/discovery/dockerswarm/nodes_test.go create mode 100644 lib/promscrape/discovery/dockerswarm/services.go create mode 100644 lib/promscrape/discovery/dockerswarm/services_test.go create mode 100644 lib/promscrape/discovery/dockerswarm/tasks.go create mode 100644 lib/promscrape/discovery/dockerswarm/tasks_test.go diff --git a/README.md b/README.md index 3365cc6f2e..b32a5ad7f1 100644 --- a/README.md +++ b/README.md @@ -295,6 +295,7 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la * [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) * [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) * [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) +* [dockerswarm_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config) In the future other `*_sd_config` types will be supported. diff --git a/app/vmagent/README.md b/app/vmagent/README.md index 4d8b03f06d..710b44f518 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -151,6 +151,8 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh * `openstack_sd_configs` - for scraping OpenStack targets. See [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) for details. [OpenStack identity API v3](https://docs.openstack.org/api-ref/identity/v3/) is supported only. +* `dockerswarm_sd_configs` - for scraping dockerswarm targets. + See [dockerswarm_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config) for details. File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`. diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 8a402692a1..9595e78335 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -18,6 +18,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dockerswarm" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/ec2" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/gce" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/kubernetes" @@ -72,6 +73,7 @@ type ScrapeConfig struct { KubernetesSDConfigs []kubernetes.SDConfig `yaml:"kubernetes_sd_configs"` OpenStackSDConfigs []openstack.SDConfig `yaml:"openstack_sd_configs"` ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs"` + DockerSwarmConfigs []dockerswarm.SDConfig `yaml:"dockerswarm_sd_configs"` DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs"` EC2SDConfigs []ec2.SDConfig `yaml:"ec2_sd_configs"` GCESDConfigs []gce.SDConfig `yaml:"gce_sd_configs"` @@ -231,6 +233,34 @@ func (cfg *Config) getOpenStackSDScrapeWork(prev []ScrapeWork) []ScrapeWork { return dst } +// getDockerSwarmSDScrapeWork returns `dockerswarm_sd_configs` ScrapeWork from cfg. +func (cfg *Config) getDockerSwarmSDScrapeWork(prev []ScrapeWork) []ScrapeWork { + swsPrevByJob := getSWSByJob(prev) + var dst []ScrapeWork + for i := range cfg.ScrapeConfigs { + sc := &cfg.ScrapeConfigs[i] + dstLen := len(dst) + ok := true + for j := range sc.DockerSwarmConfigs { + sdc := &sc.DockerSwarmConfigs[j] + var okLocal bool + dst, okLocal = appendDockerSwarmScrapeWork(dst, sdc, cfg.baseDir, sc.swc) + if ok { + ok = okLocal + } + } + if ok { + continue + } + swsPrev := swsPrevByJob[sc.swc.jobName] + if len(swsPrev) > 0 { + logger.Errorf("there were errors when discovering dockerswarm targets for job %q, so preserving the previous targets", sc.swc.jobName) + dst = append(dst[:dstLen], swsPrev...) + } + } + return dst +} + // getConsulSDScrapeWork returns `consul_sd_configs` ScrapeWork from cfg. func (cfg *Config) getConsulSDScrapeWork(prev []ScrapeWork) []ScrapeWork { swsPrevByJob := getSWSByJob(prev) @@ -483,6 +513,15 @@ func appendOpenstackScrapeWork(dst []ScrapeWork, sdc *openstack.SDConfig, baseDi return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "openstack_sd_config"), true } +func appendDockerSwarmScrapeWork(dst []ScrapeWork, sdc *dockerswarm.SDConfig, baseDir string, swc *scrapeWorkConfig) ([]ScrapeWork, bool) { + targetLabels, err := dockerswarm.GetLabels(sdc, baseDir) + if err != nil { + logger.Errorf("error when discovering dockerswarm targets for `job_name` %q: %s; skipping it", swc.jobName, err) + return dst, false + } + return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "dockerswarm_sd_config"), true +} + func appendConsulScrapeWork(dst []ScrapeWork, sdc *consul.SDConfig, baseDir string, swc *scrapeWorkConfig) ([]ScrapeWork, bool) { targetLabels, err := consul.GetLabels(sdc, baseDir) if err != nil { diff --git a/lib/promscrape/discovery/dockerswarm/api.go b/lib/promscrape/discovery/dockerswarm/api.go new file mode 100644 index 0000000000..4e8b591d50 --- /dev/null +++ b/lib/promscrape/discovery/dockerswarm/api.go @@ -0,0 +1,39 @@ +package dockerswarm + +import ( + "fmt" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +var configMap = discoveryutils.NewConfigMap() + +type apiConfig struct { + client *discoveryutils.Client + port int +} + +func getAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) { + v, err := configMap.Get(sdc, func() (interface{}, error) { return newAPIConfig(sdc, baseDir) }) + if err != nil { + return nil, err + } + return v.(*apiConfig), nil +} + +func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) { + cfg := &apiConfig{ + port: sdc.Port, + } + config, err := promauth.NewConfig(baseDir, sdc.BasicAuth, sdc.BearerToken, sdc.BearerTokenFile, sdc.TLSConfig) + if err != nil { + return nil, err + } + client, err := discoveryutils.NewClient(sdc.Host, config) + if err != nil { + return nil, fmt.Errorf("cannot create HTTP client for %q: %w", sdc.Host, err) + } + cfg.client = client + return cfg, nil +} diff --git a/lib/promscrape/discovery/dockerswarm/dockerswarm.go b/lib/promscrape/discovery/dockerswarm/dockerswarm.go new file mode 100644 index 0000000000..1b172415c9 --- /dev/null +++ b/lib/promscrape/discovery/dockerswarm/dockerswarm.go @@ -0,0 +1,51 @@ +package dockerswarm + +import ( + "fmt" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" +) + +// SDConfig represents docker swarm service discovery configuration +// +// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config +type SDConfig struct { + Host string `yaml:"host"` + Role string `yaml:"role"` + Port int `yaml:"port"` + TLSConfig *promauth.TLSConfig `yaml:"tls_config"` + BasicAuth *promauth.BasicAuthConfig `yaml:"basic_auth"` + BearerToken string `yaml:"bearer_token"` + BearerTokenFile string `yaml:"bearer_token_file"` +} + +// joinLabels adds labels to destination from source with given key from destination matching given value. +func joinLabels(source []map[string]string, destination map[string]string, key, value string) map[string]string { + for _, sourceLabels := range source { + if sourceLabels[key] == value { + for k, v := range sourceLabels { + destination[k] = v + } + return destination + } + } + return destination +} + +// GetLabels returns gce labels according to sdc. +func GetLabels(sdc *SDConfig, baseDir string) ([]map[string]string, error) { + cfg, err := getAPIConfig(sdc, baseDir) + if err != nil { + return nil, fmt.Errorf("cannot get API config: %w", err) + } + switch sdc.Role { + case "tasks": + return getTasksLabels(cfg) + case "services": + return getServicesLabels(cfg) + case "nodes": + return getNodesLabels(cfg) + default: + return nil, fmt.Errorf("unexpected `role`: %q; must be one of `tasks`, `services` or `nodes`; skipping it", sdc.Role) + } +} diff --git a/lib/promscrape/discovery/dockerswarm/network.go b/lib/promscrape/discovery/dockerswarm/network.go new file mode 100644 index 0000000000..3200d9ee87 --- /dev/null +++ b/lib/promscrape/discovery/dockerswarm/network.go @@ -0,0 +1,61 @@ +package dockerswarm + +import ( + "encoding/json" + "fmt" + "strconv" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +// See https://docs.docker.com/engine/api/v1.40/#tag/Network +type network struct { + ID string + Name string + Scope string + Internal bool + Ingress bool + Labels map[string]string +} + +func getNetworksLabels(cfg *apiConfig) ([]map[string]string, error) { + networks, err := getNetworks(cfg) + if err != nil { + return nil, err + } + return addNetworkLabels(networks), nil +} + +func getNetworks(cfg *apiConfig) ([]network, error) { + resp, err := cfg.client.GetAPIResponse("/networks") + if err != nil { + return nil, fmt.Errorf("cannot query dockerswarm api for networks: %w", err) + } + return parseNetworks(resp) +} + +func parseNetworks(data []byte) ([]network, error) { + var networks []network + if err := json.Unmarshal(data, &networks); err != nil { + return nil, fmt.Errorf("cannot parse networks: %w", err) + } + return networks, nil +} + +func addNetworkLabels(networks []network) []map[string]string { + var ms []map[string]string + for _, network := range networks { + m := map[string]string{ + "__meta_dockerswarm_network_id": network.ID, + "__meta_dockerswarm_network_name": network.Name, + "__meta_dockerswarm_network_scope": network.Scope, + "__meta_dockerswarm_network_internal": strconv.FormatBool(network.Internal), + "__meta_dockerswarm_network_ingress": strconv.FormatBool(network.Ingress), + } + for k, v := range network.Labels { + m["__meta_dockerswarm_network_label_"+discoveryutils.SanitizeLabelName(k)] = v + } + ms = append(ms, m) + } + return ms +} diff --git a/lib/promscrape/discovery/dockerswarm/network_test.go b/lib/promscrape/discovery/dockerswarm/network_test.go new file mode 100644 index 0000000000..7bcbce020f --- /dev/null +++ b/lib/promscrape/discovery/dockerswarm/network_test.go @@ -0,0 +1,166 @@ +package dockerswarm + +import ( + "reflect" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +func Test_addNetworkLabels(t *testing.T) { + type args struct { + networks []network + } + tests := []struct { + name string + args args + want [][]prompbmarshal.Label + }{ + { + name: "ingress network", + args: args{ + networks: []network{ + { + ID: "qs0hog6ldlei9ct11pr3c77v1", + Ingress: true, + Scope: "swarm", + Name: "ingress", + Labels: map[string]string{ + "key1": "value1", + }, + }, + }, + }, + want: [][]prompbmarshal.Label{ + discoveryutils.GetSortedLabels(map[string]string{ + "__meta_dockerswarm_network_id": "qs0hog6ldlei9ct11pr3c77v1", + "__meta_dockerswarm_network_ingress": "true", + "__meta_dockerswarm_network_internal": "false", + "__meta_dockerswarm_network_label_key1": "value1", + "__meta_dockerswarm_network_name": "ingress", + "__meta_dockerswarm_network_scope": "swarm", + })}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := addNetworkLabels(tt.args.networks) + var sortedLabelss [][]prompbmarshal.Label + for _, labels := range got { + sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(labels)) + } + if !reflect.DeepEqual(sortedLabelss, tt.want) { + t.Errorf("addNetworkLabels() \ngot %v, \nwant %v", sortedLabelss, tt.want) + } + }) + } +} + +func Test_parseNetworks(t *testing.T) { + type args struct { + data []byte + } + tests := []struct { + name string + args args + want []network + wantErr bool + }{ + { + name: "parse two networks", + args: args{ + data: []byte(`[ + { + "Name": "ingress", + "Id": "qs0hog6ldlei9ct11pr3c77v1", + "Created": "2020-10-06T08:39:58.957083331Z", + "Scope": "swarm", + "Driver": "overlay", + "EnableIPv6": false, + "IPAM": { + "Driver": "default", + "Options": null, + "Config": [ + { + "Subnet": "10.0.0.0/24", + "Gateway": "10.0.0.1" + } + ] + }, + "Internal": false, + "Attachable": false, + "Ingress": true, + "ConfigFrom": { + "Network": "" + }, + "ConfigOnly": false, + "Containers": null, + "Options": { + "com.docker.network.driver.overlay.vxlanid_list": "4096" + }, + "Labels": { + "key1": "value1" + } + }, + { + "Name": "host", + "Id": "317f0384d7e5f5c26304a0b04599f9f54bc08def4d0535059ece89955e9c4b7b", + "Created": "2020-10-06T08:39:52.843373136Z", + "Scope": "local", + "Driver": "host", + "EnableIPv6": false, + "IPAM": { + "Driver": "default", + "Options": null, + "Config": [] + }, + "Internal": false, + "Attachable": false, + "Ingress": false, + "ConfigFrom": { + "Network": "" + }, + "ConfigOnly": false, + "Containers": {}, + "Options": {}, + "Labels": { + "key": "value" + } + } +]`), + }, + want: []network{ + { + ID: "qs0hog6ldlei9ct11pr3c77v1", + Ingress: true, + Scope: "swarm", + Name: "ingress", + Labels: map[string]string{ + "key1": "value1", + }, + }, + { + ID: "317f0384d7e5f5c26304a0b04599f9f54bc08def4d0535059ece89955e9c4b7b", + Scope: "local", + Name: "host", + Labels: map[string]string{ + "key": "value", + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseNetworks(tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("parseNetworks() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("parseNetworks() \ngot %v, \nwant %v", got, tt.want) + } + }) + } +} diff --git a/lib/promscrape/discovery/dockerswarm/nodes.go b/lib/promscrape/discovery/dockerswarm/nodes.go new file mode 100644 index 0000000000..0474956a45 --- /dev/null +++ b/lib/promscrape/discovery/dockerswarm/nodes.go @@ -0,0 +1,90 @@ +package dockerswarm + +import ( + "encoding/json" + "fmt" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +// See https://docs.docker.com/engine/api/v1.40/#tag/Node +type node struct { + ID string + Spec struct { + Labels map[string]string + Role string + Availability string + } + Description struct { + Hostname string + Platform struct { + Architecture string + OS string + } + Engine struct { + EngineVersion string + } + } + Status struct { + State string + Message string + Addr string + } + ManagerStatus *struct { + Leader bool + Reachability string + Addr string + } +} + +func getNodesLabels(cfg *apiConfig) ([]map[string]string, error) { + nodes, err := getNodes(cfg) + if err != nil { + return nil, err + } + return addNodeLabels(nodes, cfg.port), nil +} + +func getNodes(cfg *apiConfig) ([]node, error) { + resp, err := cfg.client.GetAPIResponse("/nodes") + if err != nil { + return nil, fmt.Errorf("cannot query dockerswarm api for nodes: %w", err) + } + return parseNodes(resp) +} + +func parseNodes(data []byte) ([]node, error) { + var nodes []node + if err := json.Unmarshal(data, &nodes); err != nil { + return nil, fmt.Errorf("cannot parse nodes: %w", err) + } + return nodes, nil +} + +func addNodeLabels(nodes []node, port int) []map[string]string { + var ms []map[string]string + for _, node := range nodes { + m := map[string]string{ + "__address__": discoveryutils.JoinHostPort(node.Status.Addr, port), + "__meta_dockerswarm_node_id": node.ID, + "__meta_dockerswarm_node_address": node.Status.Addr, + "__meta_dockerswarm_node_availability": node.Spec.Availability, + "__meta_dockerswarm_node_engine_version": node.Description.Engine.EngineVersion, + "__meta_dockerswarm_node_hostname": node.Description.Hostname, + "__meta_dockerswarm_node_platform_architecture": node.Description.Platform.Architecture, + "__meta_dockerswarm_node_platform_os": node.Description.Platform.OS, + "__meta_dockerswarm_node_role": node.Spec.Role, + "__meta_dockerswarm_node_status": node.Status.State, + } + if node.ManagerStatus != nil { + m["__meta_dockerswarm_node_manager_address"] = node.ManagerStatus.Addr + m["__meta_dockerswarm_node_manager_manager_reachability"] = node.ManagerStatus.Reachability + m["__meta_dockerswarm_node_manager_leader"] = fmt.Sprintf("%t", node.ManagerStatus.Leader) + } + for k, v := range node.Spec.Labels { + m["__meta_dockerswarm_node_label_"+discoveryutils.SanitizeLabelName(k)] = v + } + ms = append(ms, m) + } + return ms +} diff --git a/lib/promscrape/discovery/dockerswarm/nodes_test.go b/lib/promscrape/discovery/dockerswarm/nodes_test.go new file mode 100644 index 0000000000..6d5c63629d --- /dev/null +++ b/lib/promscrape/discovery/dockerswarm/nodes_test.go @@ -0,0 +1,185 @@ +package dockerswarm + +import ( + "reflect" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +func Test_parseNodes(t *testing.T) { + type args struct { + data []byte + } + tests := []struct { + name string + args args + want []node + wantErr bool + }{ + { + name: "parse ok", + args: args{ + data: []byte(`[ + { + "ID": "qauwmifceyvqs0sipvzu8oslu", + "Version": { + "Index": 16 + }, + "Spec": { + "Role": "manager", + "Availability": "active" + }, + "Description": { + "Hostname": "ip-172-31-40-97", + "Platform": { + "Architecture": "x86_64", + "OS": "linux" + }, + "Resources": { + "NanoCPUs": 1000000000, + "MemoryBytes": 1026158592 + }, + "Engine": { + "EngineVersion": "19.03.11" + } + }, + "Status": { + "State": "ready", + "Addr": "172.31.40.97" + } + } +] +`), + }, + want: []node{ + { + ID: "qauwmifceyvqs0sipvzu8oslu", + Spec: struct { + Labels map[string]string + Role string + Availability string + }{Role: "manager", Availability: "active"}, + Status: struct { + State string + Message string + Addr string + }{State: "ready", Addr: "172.31.40.97"}, + Description: struct { + Hostname string + Platform struct { + Architecture string + OS string + } + Engine struct{ EngineVersion string } + }{ + Hostname: "ip-172-31-40-97", + Platform: struct { + Architecture string + OS string + }{ + Architecture: "x86_64", + OS: "linux", + }, + Engine: struct{ EngineVersion string }{ + EngineVersion: "19.03.11", + }, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseNodes(tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("parseNodes() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("parseNodes() \ngot %v, \nwant %v", got, tt.want) + } + }) + } +} + +func Test_addNodeLabels(t *testing.T) { + type args struct { + nodes []node + port int + } + tests := []struct { + name string + args args + want [][]prompbmarshal.Label + }{ + { + name: "add labels to one node", + args: args{ + nodes: []node{ + { + ID: "qauwmifceyvqs0sipvzu8oslu", + Spec: struct { + Labels map[string]string + Role string + Availability string + }{Role: "manager", Availability: "active"}, + Status: struct { + State string + Message string + Addr string + }{State: "ready", Addr: "172.31.40.97"}, + Description: struct { + Hostname string + Platform struct { + Architecture string + OS string + } + Engine struct{ EngineVersion string } + }{ + Hostname: "ip-172-31-40-97", + Platform: struct { + Architecture string + OS string + }{ + Architecture: "x86_64", + OS: "linux", + }, + Engine: struct{ EngineVersion string }{ + EngineVersion: "19.03.11", + }, + }, + }, + }, + port: 9100, + }, + want: [][]prompbmarshal.Label{ + discoveryutils.GetSortedLabels(map[string]string{ + "__address__": "172.31.40.97:9100", + "__meta_dockerswarm_node_address": "172.31.40.97", + "__meta_dockerswarm_node_availability": "active", + "__meta_dockerswarm_node_engine_version": "19.03.11", + "__meta_dockerswarm_node_hostname": "ip-172-31-40-97", + "__meta_dockerswarm_node_id": "qauwmifceyvqs0sipvzu8oslu", + "__meta_dockerswarm_node_platform_architecture": "x86_64", + "__meta_dockerswarm_node_platform_os": "linux", + "__meta_dockerswarm_node_role": "manager", + "__meta_dockerswarm_node_status": "ready", + })}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := addNodeLabels(tt.args.nodes, tt.args.port) + + var sortedLabelss [][]prompbmarshal.Label + for _, labels := range got { + sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(labels)) + } + if !reflect.DeepEqual(sortedLabelss, tt.want) { + t.Errorf("addNodeLabels() \ngot %v, \nwant %v", sortedLabelss, tt.want) + } + }) + } +} diff --git a/lib/promscrape/discovery/dockerswarm/services.go b/lib/promscrape/discovery/dockerswarm/services.go new file mode 100644 index 0000000000..708b4d2ec6 --- /dev/null +++ b/lib/promscrape/discovery/dockerswarm/services.go @@ -0,0 +1,139 @@ +package dockerswarm + +import ( + "encoding/json" + "fmt" + "net" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +// https://docs.docker.com/engine/api/v1.40/#tag/Service +type service struct { + ID string + Spec struct { + Labels map[string]string + Name string + TaskTemplate struct { + ContainerSpec struct { + Hostname string + Image string + } + } + Mode struct { + Global interface{} + Replicated interface{} + } + } + UpdateStatus *struct { + State string + } + Endpoint struct { + Ports []portConfig + VirtualIPs []struct { + NetworkID string + Addr string + } + } +} + +type portConfig struct { + Protocol string + Name string + PublishMode string + PublishedPort int +} + +func getServicesLabels(cfg *apiConfig) ([]map[string]string, error) { + services, err := getServices(cfg) + if err != nil { + return nil, err + } + networksLabels, err := getNetworksLabels(cfg) + if err != nil { + return nil, err + } + return addServicesLabels(services, networksLabels, cfg.port), nil +} + +func getServices(cfg *apiConfig) ([]service, error) { + data, err := cfg.client.GetAPIResponse("/services") + if err != nil { + return nil, fmt.Errorf("cannot query dockerswarm api for services: %w", err) + } + return parseServicesResponse(data) +} + +func parseServicesResponse(data []byte) ([]service, error) { + var services []service + if err := json.Unmarshal(data, &services); err != nil { + return nil, fmt.Errorf("cannot parse services: %w", err) + } + return services, nil +} + +func getServiceMode(svc service) string { + if svc.Spec.Mode.Global != nil { + return "global" + } + if svc.Spec.Mode.Replicated != nil { + return "replicated" + } + return "" +} + +func addServicesLabels(services []service, networksLabels []map[string]string, port int) []map[string]string { + var ms []map[string]string + for _, service := range services { + m := map[string]string{ + "__meta_dockerswarm_service_id": service.ID, + "__meta_dockerswarm_service_name": service.Spec.Name, + "__meta_dockerswarm_service_task_container_hostname": service.Spec.TaskTemplate.ContainerSpec.Hostname, + "__meta_dockerswarm_service_task_container_image": service.Spec.TaskTemplate.ContainerSpec.Image, + "__meta_dockerswarm_service_mode": getServiceMode(service), + } + if service.UpdateStatus != nil { + m["__meta_dockerswarm_service_updating_status"] = service.UpdateStatus.State + } + for k, v := range service.Spec.Labels { + m["__meta_dockerswarm_service_label_"+discoveryutils.SanitizeLabelName(k)] = v + } + for _, vip := range service.Endpoint.VirtualIPs { + var added bool + ip, _, err := net.ParseCIDR(vip.Addr) + if err != nil { + logger.Errorf("cannot parse: %q as cidr for service label add, err: %v", vip.Addr, err) + continue + } + for _, ep := range service.Endpoint.Ports { + if ep.Protocol != "tcp" { + continue + } + lbls := map[string]string{ + "__meta_dockerswarm_service_endpoint_port_name": ep.Name, + "__meta_dockerswarm_service_endpoint_port_publish_mode": ep.PublishMode, + "__address__": discoveryutils.JoinHostPort(ip.String(), ep.PublishedPort), + } + for k, v := range m { + lbls[k] = v + } + lbls = joinLabels(networksLabels, lbls, "__meta_dockerswarm_network_id", vip.NetworkID) + added = true + ms = append(ms, lbls) + } + if !added { + lbls := make(map[string]string, len(m)) + for k, v := range m { + lbls[k] = v + } + lbls = joinLabels(networksLabels, lbls, "__meta_dockerswarm_network_id", vip.NetworkID) + lbls["__address__"] = discoveryutils.JoinHostPort(ip.String(), port) + ms = append(ms, lbls) + } + } + + } + return ms +} diff --git a/lib/promscrape/discovery/dockerswarm/services_test.go b/lib/promscrape/discovery/dockerswarm/services_test.go new file mode 100644 index 0000000000..615b21af20 --- /dev/null +++ b/lib/promscrape/discovery/dockerswarm/services_test.go @@ -0,0 +1,293 @@ +package dockerswarm + +import ( + "reflect" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +func Test_parseServicesResponse(t *testing.T) { + type args struct { + data []byte + } + tests := []struct { + name string + args args + want []service + wantErr bool + }{ + { + name: "parse ok", + args: args{ + data: []byte(`[ + { + "ID": "tgsci5gd31aai3jyudv98pqxf", + "Version": { + "Index": 25 + }, + "CreatedAt": "2020-10-06T11:17:31.948808444Z", + "UpdatedAt": "2020-10-06T11:17:31.950195138Z", + "Spec": { + "Name": "redis2", + "Labels": {}, + "TaskTemplate": { + "ContainerSpec": { + "Image": "redis:3.0.6@sha256:6a692a76c2081888b589e26e6ec835743119fe453d67ecf03df7de5b73d69842", + "Init": false, + "DNSConfig": {}, + "Isolation": "default" + }, + "Resources": { + "Limits": {}, + "Reservations": {} + } + }, + "Mode": { + "Replicated": {} + }, + "EndpointSpec": { + "Mode": "vip", + "Ports": [ + { + "Protocol": "tcp", + "TargetPort": 6379, + "PublishedPort": 8081, + "PublishMode": "ingress" + } + ] + } + }, + "Endpoint": { + "Spec": { + "Mode": "vip", + "Ports": [ + { + "Protocol": "tcp", + "TargetPort": 6379, + "PublishedPort": 8081, + "PublishMode": "ingress" + } + ] + }, + "Ports": [ + { + "Protocol": "tcp", + "TargetPort": 6379, + "PublishedPort": 8081, + "PublishMode": "ingress" + } + ], + "VirtualIPs": [ + { + "NetworkID": "qs0hog6ldlei9ct11pr3c77v1", + "Addr": "10.0.0.3/24" + } + ] + } + } +]`), + }, + want: []service{ + { + ID: "tgsci5gd31aai3jyudv98pqxf", + Spec: struct { + Labels map[string]string + Name string + TaskTemplate struct { + ContainerSpec struct { + Hostname string + Image string + } + } + Mode struct { + Global interface{} + Replicated interface{} + } + }{ + Labels: map[string]string{}, + Name: "redis2", + TaskTemplate: struct { + ContainerSpec struct { + Hostname string + Image string + } + }{ + ContainerSpec: struct { + Hostname string + Image string + }{ + Hostname: "", + Image: "redis:3.0.6@sha256:6a692a76c2081888b589e26e6ec835743119fe453d67ecf03df7de5b73d69842", + }, + }, + Mode: struct { + Global interface{} + Replicated interface{} + }{ + Replicated: map[string]interface{}{}, + }, + }, + Endpoint: struct { + Ports []portConfig + VirtualIPs []struct { + NetworkID string + Addr string + } + }{Ports: []portConfig{ + { + Protocol: "tcp", + PublishMode: "ingress", + PublishedPort: 8081, + }, + }, VirtualIPs: []struct { + NetworkID string + Addr string + }{ + { + NetworkID: "qs0hog6ldlei9ct11pr3c77v1", + Addr: "10.0.0.3/24", + }, + }, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseServicesResponse(tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("parseServicesResponse() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("parseServicesResponse() \ngot %v, \nwant %v", got, tt.want) + } + }) + } +} + +func Test_addServicesLabels(t *testing.T) { + type args struct { + services []service + networksLabels []map[string]string + port int + } + tests := []struct { + name string + args args + want [][]prompbmarshal.Label + }{ + { + name: "add 2 services with network labels join", + args: args{ + port: 9100, + networksLabels: []map[string]string{ + { + "__meta_dockerswarm_network_id": "qs0hog6ldlei9ct11pr3c77v1", + "__meta_dockerswarm_network_ingress": "true", + "__meta_dockerswarm_network_internal": "false", + "__meta_dockerswarm_network_label_key1": "value1", + "__meta_dockerswarm_network_name": "ingress", + "__meta_dockerswarm_network_scope": "swarm", + }, + }, + services: []service{ + { + ID: "tgsci5gd31aai3jyudv98pqxf", + Spec: struct { + Labels map[string]string + Name string + TaskTemplate struct { + ContainerSpec struct { + Hostname string + Image string + } + } + Mode struct { + Global interface{} + Replicated interface{} + } + }{ + Labels: map[string]string{}, + Name: "redis2", + TaskTemplate: struct { + ContainerSpec struct { + Hostname string + Image string + } + }{ + ContainerSpec: struct { + Hostname string + Image string + }{ + Hostname: "node1", + Image: "redis:3.0.6@sha256:6a692a76c2081888b589e26e6ec835743119fe453d67ecf03df7de5b73d69842", + }, + }, + Mode: struct { + Global interface{} + Replicated interface{} + }{ + Replicated: map[string]interface{}{}, + }, + }, + Endpoint: struct { + Ports []portConfig + VirtualIPs []struct { + NetworkID string + Addr string + } + }{Ports: []portConfig{ + { + Protocol: "tcp", + Name: "redis", + PublishMode: "ingress", + }, + }, VirtualIPs: []struct { + NetworkID string + Addr string + }{ + { + NetworkID: "qs0hog6ldlei9ct11pr3c77v1", + Addr: "10.0.0.3/24", + }, + }, + }, + }, + }, + }, + want: [][]prompbmarshal.Label{ + discoveryutils.GetSortedLabels(map[string]string{ + "__address__": "10.0.0.3:0", + "__meta_dockerswarm_network_id": "qs0hog6ldlei9ct11pr3c77v1", + "__meta_dockerswarm_network_ingress": "true", + "__meta_dockerswarm_network_internal": "false", + "__meta_dockerswarm_network_label_key1": "value1", + "__meta_dockerswarm_network_name": "ingress", + "__meta_dockerswarm_network_scope": "swarm", + "__meta_dockerswarm_service_endpoint_port_name": "redis", + "__meta_dockerswarm_service_endpoint_port_publish_mode": "ingress", + "__meta_dockerswarm_service_id": "tgsci5gd31aai3jyudv98pqxf", + "__meta_dockerswarm_service_mode": "replicated", + "__meta_dockerswarm_service_name": "redis2", + "__meta_dockerswarm_service_task_container_hostname": "node1", + "__meta_dockerswarm_service_task_container_image": "redis:3.0.6@sha256:6a692a76c2081888b589e26e6ec835743119fe453d67ecf03df7de5b73d69842", + })}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := addServicesLabels(tt.args.services, tt.args.networksLabels, tt.args.port) + var sortedLabelss [][]prompbmarshal.Label + for _, labels := range got { + sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(labels)) + } + if !reflect.DeepEqual(sortedLabelss, tt.want) { + t.Errorf("addServicesLabels() \ngot %v, \nwant %v", sortedLabelss, tt.want) + } + }) + } +} diff --git a/lib/promscrape/discovery/dockerswarm/tasks.go b/lib/promscrape/discovery/dockerswarm/tasks.go new file mode 100644 index 0000000000..cd0821d901 --- /dev/null +++ b/lib/promscrape/discovery/dockerswarm/tasks.go @@ -0,0 +1,149 @@ +package dockerswarm + +import ( + "encoding/json" + "fmt" + "net" + "strconv" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +// See https://docs.docker.com/engine/api/v1.40/#tag/Task +type task struct { + ID string + ServiceID string + NodeID string + Labels map[string]string + DesiredState string + NetworksAttachments []struct { + Addresses []string + Network struct { + ID string + } + } + Status struct { + State string + ContainerStatus *struct { + ContainerID string + } + PortStatus struct { + Ports []portConfig + } + } + Slot int +} + +func getTasksLabels(cfg *apiConfig) ([]map[string]string, error) { + tasks, err := getTasks(cfg) + if err != nil { + return nil, err + } + services, err := getServices(cfg) + if err != nil { + return nil, err + } + networkLabels, err := getNetworksLabels(cfg) + if err != nil { + return nil, err + } + svcLabels := addServicesLabels(services, networkLabels, cfg.port) + nodeLabels, err := getNodesLabels(cfg) + if err != nil { + return nil, err + } + return addTasksLabels(tasks, nodeLabels, svcLabels, networkLabels, services, cfg.port), nil +} + +func getTasks(cfg *apiConfig) ([]task, error) { + resp, err := cfg.client.GetAPIResponse("/tasks") + if err != nil { + return nil, fmt.Errorf("cannot query dockerswarm api for tasks: %w", err) + } + return parseTasks(resp) +} + +func parseTasks(data []byte) ([]task, error) { + var tasks []task + if err := json.Unmarshal(data, &tasks); err != nil { + return nil, fmt.Errorf("cannot parse tasks: %w", err) + } + return tasks, nil +} + +func addTasksLabels(tasks []task, nodesLabels, servicesLabels, networksLabels []map[string]string, services []service, port int) []map[string]string { + var ms []map[string]string + for _, task := range tasks { + m := map[string]string{ + "__meta_dockerswarm_task_id": task.ID, + "__meta_dockerswarm_task_desired_state": task.DesiredState, + "__meta_dockerswarm_task_state": task.Status.State, + "__meta_dockerswarm_task_slot": strconv.Itoa(task.Slot), + } + if task.Status.ContainerStatus != nil { + m["__meta_dockerswarm_task_container_id"] = task.Status.ContainerStatus.ContainerID + } + for k, v := range task.Labels { + m["__meta_dockerswarm_task_label_"+discoveryutils.SanitizeLabelName(k)] = v + } + var svcPorts []portConfig + for i, v := range services { + if v.ID == task.ServiceID { + svcPorts = services[i].Endpoint.Ports + break + } + } + m = joinLabels(servicesLabels, m, "__meta_dockerswarm_service_id", task.ServiceID) + m = joinLabels(nodesLabels, m, "__meta_dockerswarm_node_id", task.NodeID) + + for _, port := range task.Status.PortStatus.Ports { + if port.Protocol != "tcp" { + continue + } + lbls := make(map[string]string, len(m)) + lbls["__meta_dockerswarm_task_port_publish_mode"] = port.PublishMode + lbls["__address__"] = discoveryutils.JoinHostPort(m["__meta_dockerswarm_node_address"], port.PublishedPort) + for k, v := range m { + lbls[k] = v + } + ms = append(ms, lbls) + } + for _, na := range task.NetworksAttachments { + for _, address := range na.Addresses { + ip, _, err := net.ParseCIDR(address) + if err != nil { + logger.Errorf("cannot parse task network attachments address: %s as net CIDR: %v", address, err) + continue + } + var added bool + for _, v := range svcPorts { + if v.Protocol != "tcp" { + continue + } + lbls := make(map[string]string, len(m)) + for k, v := range m { + lbls[k] = v + } + lbls = joinLabels(networksLabels, lbls, "__meta_dockerswarm_network_id", na.Network.ID) + lbls["__address"] = discoveryutils.JoinHostPort(ip.String(), v.PublishedPort) + lbls["__meta_dockerswarm_task_port_publish_mode"] = v.PublishMode + ms = append(ms, lbls) + added = true + } + + if !added { + lbls := make(map[string]string, len(m)) + for k, v := range m { + lbls[k] = v + } + lbls = joinLabels(networksLabels, lbls, "__meta_dockerswarm_network_id", na.Network.ID) + lbls["__address__"] = discoveryutils.JoinHostPort(ip.String(), port) + ms = append(ms, lbls) + } + } + } + } + return ms +} diff --git a/lib/promscrape/discovery/dockerswarm/tasks_test.go b/lib/promscrape/discovery/dockerswarm/tasks_test.go new file mode 100644 index 0000000000..0d32c4cfba --- /dev/null +++ b/lib/promscrape/discovery/dockerswarm/tasks_test.go @@ -0,0 +1,352 @@ +package dockerswarm + +import ( + "reflect" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +func Test_parseTasks(t *testing.T) { + type args struct { + data []byte + } + tests := []struct { + name string + args args + want []task + wantErr bool + }{ + { + name: "parse ok", + args: args{ + data: []byte(`[ + { + "ID": "t4rdm7j2y9yctbrksiwvsgpu5", + "Version": { + "Index": 23 + }, + "Labels": {}, + "Spec": { + "ContainerSpec": { + "Image": "redis:3.0.6@sha256:6a692a76c2081888b589e26e6ec835743119fe453d67ecf03df7de5b73d69842", + "Init": false + }, + "Resources": { + "Limits": {}, + "Reservations": {} + }, + "Placement": { + "Platforms": [ + { + "Architecture": "amd64", + "OS": "linux" + } + ] + }, + "ForceUpdate": 0 + }, + "ServiceID": "t91nf284wzle1ya09lqvyjgnq", + "Slot": 1, + "NodeID": "qauwmifceyvqs0sipvzu8oslu", + "Status": { + "State": "running", + "ContainerStatus": { + "ContainerID": "33034b69f6fa5f808098208752fd1fe4e0e1ca86311988cea6a73b998cdc62e8", + "ExitCode": 0 + }, + "PortStatus": {} + }, + "DesiredState": "running" + } +] +`), + }, + want: []task{ + { + ID: "t4rdm7j2y9yctbrksiwvsgpu5", + ServiceID: "t91nf284wzle1ya09lqvyjgnq", + NodeID: "qauwmifceyvqs0sipvzu8oslu", + Labels: map[string]string{}, + DesiredState: "running", + Slot: 1, + Status: struct { + State string + ContainerStatus *struct{ ContainerID string } + PortStatus struct{ Ports []portConfig } + }{ + State: "running", + ContainerStatus: &struct{ ContainerID string }{ + ContainerID: "33034b69f6fa5f808098208752fd1fe4e0e1ca86311988cea6a73b998cdc62e8", + }, + PortStatus: struct{ Ports []portConfig }{}}, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseTasks(tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("parseTasks() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("parseTasks() got = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_addTasksLabels(t *testing.T) { + type args struct { + tasks []task + nodesLabels []map[string]string + servicesLabels []map[string]string + networksLabels []map[string]string + services []service + port int + } + tests := []struct { + name string + args args + want [][]prompbmarshal.Label + }{ + { + name: "adds 1 task with nodes labels", + args: args{ + port: 9100, + tasks: []task{ + { + ID: "t4rdm7j2y9yctbrksiwvsgpu5", + ServiceID: "t91nf284wzle1ya09lqvyjgnq", + NodeID: "qauwmifceyvqs0sipvzu8oslu", + Labels: map[string]string{}, + DesiredState: "running", + Slot: 1, + Status: struct { + State string + ContainerStatus *struct{ ContainerID string } + PortStatus struct{ Ports []portConfig } + }{ + State: "running", + ContainerStatus: &struct{ ContainerID string }{ + ContainerID: "33034b69f6fa5f808098208752fd1fe4e0e1ca86311988cea6a73b998cdc62e8", + }, + PortStatus: struct{ Ports []portConfig }{ + Ports: []portConfig{ + { + PublishMode: "ingress", + Name: "redis", + Protocol: "tcp", + PublishedPort: 6379, + }, + }, + }}, + }, + }, + nodesLabels: []map[string]string{ + { + "__address__": "172.31.40.97:9100", + "__meta_dockerswarm_node_address": "172.31.40.97", + "__meta_dockerswarm_node_availability": "active", + "__meta_dockerswarm_node_engine_version": "19.03.11", + "__meta_dockerswarm_node_hostname": "ip-172-31-40-97", + "__meta_dockerswarm_node_id": "qauwmifceyvqs0sipvzu8oslu", + "__meta_dockerswarm_node_platform_architecture": "x86_64", + "__meta_dockerswarm_node_platform_os": "linux", + "__meta_dockerswarm_node_role": "manager", + "__meta_dockerswarm_node_status": "ready", + }, + }, + }, + want: [][]prompbmarshal.Label{ + discoveryutils.GetSortedLabels(map[string]string{ + "__address__": "172.31.40.97:9100", + "__meta_dockerswarm_node_address": "172.31.40.97", + "__meta_dockerswarm_node_availability": "active", + "__meta_dockerswarm_node_engine_version": "19.03.11", + "__meta_dockerswarm_node_hostname": "ip-172-31-40-97", + "__meta_dockerswarm_node_id": "qauwmifceyvqs0sipvzu8oslu", + "__meta_dockerswarm_node_platform_architecture": "x86_64", + "__meta_dockerswarm_node_platform_os": "linux", + "__meta_dockerswarm_node_role": "manager", + "__meta_dockerswarm_node_status": "ready", + "__meta_dockerswarm_task_container_id": "33034b69f6fa5f808098208752fd1fe4e0e1ca86311988cea6a73b998cdc62e8", + "__meta_dockerswarm_task_desired_state": "running", + "__meta_dockerswarm_task_id": "t4rdm7j2y9yctbrksiwvsgpu5", + "__meta_dockerswarm_task_port_publish_mode": "ingress", + "__meta_dockerswarm_task_slot": "1", + "__meta_dockerswarm_task_state": "running", + })}, + }, + { + name: "adds 1 task with nodes, network and services labels", + args: args{ + port: 9100, + tasks: []task{ + { + ID: "t4rdm7j2y9yctbrksiwvsgpu5", + ServiceID: "tgsci5gd31aai3jyudv98pqxf", + NodeID: "qauwmifceyvqs0sipvzu8oslu", + Labels: map[string]string{}, + DesiredState: "running", + Slot: 1, + NetworksAttachments: []struct { + Addresses []string + Network struct{ ID string } + }{ + { + Network: struct { + ID string + }{ + ID: "qs0hog6ldlei9ct11pr3c77v1", + }, + Addresses: []string{"10.10.15.15/24"}, + }, + }, + Status: struct { + State string + ContainerStatus *struct{ ContainerID string } + PortStatus struct{ Ports []portConfig } + }{ + State: "running", + ContainerStatus: &struct{ ContainerID string }{ + ContainerID: "33034b69f6fa5f808098208752fd1fe4e0e1ca86311988cea6a73b998cdc62e8", + }, + PortStatus: struct{ Ports []portConfig }{}}, + }, + }, + networksLabels: []map[string]string{ + { + "__meta_dockerswarm_network_id": "qs0hog6ldlei9ct11pr3c77v1", + "__meta_dockerswarm_network_ingress": "true", + "__meta_dockerswarm_network_internal": "false", + "__meta_dockerswarm_network_label_key1": "value1", + "__meta_dockerswarm_network_name": "ingress", + "__meta_dockerswarm_network_scope": "swarm", + }, + }, + nodesLabels: []map[string]string{ + { + "__address__": "172.31.40.97:9100", + "__meta_dockerswarm_node_address": "172.31.40.97", + "__meta_dockerswarm_node_availability": "active", + "__meta_dockerswarm_node_engine_version": "19.03.11", + "__meta_dockerswarm_node_hostname": "ip-172-31-40-97", + "__meta_dockerswarm_node_id": "qauwmifceyvqs0sipvzu8oslu", + "__meta_dockerswarm_node_platform_architecture": "x86_64", + "__meta_dockerswarm_node_platform_os": "linux", + "__meta_dockerswarm_node_role": "manager", + "__meta_dockerswarm_node_status": "ready", + }, + }, + services: []service{ + { + ID: "tgsci5gd31aai3jyudv98pqxf", + Spec: struct { + Labels map[string]string + Name string + TaskTemplate struct { + ContainerSpec struct { + Hostname string + Image string + } + } + Mode struct { + Global interface{} + Replicated interface{} + } + }{ + Labels: map[string]string{}, + Name: "redis2", + TaskTemplate: struct { + ContainerSpec struct { + Hostname string + Image string + } + }{ + ContainerSpec: struct { + Hostname string + Image string + }{ + Hostname: "node1", + Image: "redis:3.0.6@sha256:6a692a76c2081888b589e26e6ec835743119fe453d67ecf03df7de5b73d69842", + }, + }, + Mode: struct { + Global interface{} + Replicated interface{} + }{ + Replicated: map[string]interface{}{}, + }, + }, + Endpoint: struct { + Ports []portConfig + VirtualIPs []struct { + NetworkID string + Addr string + } + }{Ports: []portConfig{ + { + Protocol: "tcp", + Name: "redis", + PublishMode: "ingress", + }, + }, VirtualIPs: []struct { + NetworkID string + Addr string + }{ + { + NetworkID: "qs0hog6ldlei9ct11pr3c77v1", + Addr: "10.0.0.3/24", + }, + }, + }, + }, + }, + servicesLabels: []map[string]string{}, + }, + want: [][]prompbmarshal.Label{ + discoveryutils.GetSortedLabels(map[string]string{ + "__address": "10.10.15.15:0", + "__address__": "172.31.40.97:9100", + "__meta_dockerswarm_network_id": "qs0hog6ldlei9ct11pr3c77v1", + "__meta_dockerswarm_network_ingress": "true", + "__meta_dockerswarm_network_internal": "false", + "__meta_dockerswarm_network_label_key1": "value1", + "__meta_dockerswarm_network_name": "ingress", + "__meta_dockerswarm_network_scope": "swarm", + "__meta_dockerswarm_node_address": "172.31.40.97", + "__meta_dockerswarm_node_availability": "active", + "__meta_dockerswarm_node_engine_version": "19.03.11", + "__meta_dockerswarm_node_hostname": "ip-172-31-40-97", + "__meta_dockerswarm_node_id": "qauwmifceyvqs0sipvzu8oslu", + "__meta_dockerswarm_node_platform_architecture": "x86_64", + "__meta_dockerswarm_node_platform_os": "linux", + "__meta_dockerswarm_node_role": "manager", + "__meta_dockerswarm_node_status": "ready", + "__meta_dockerswarm_task_container_id": "33034b69f6fa5f808098208752fd1fe4e0e1ca86311988cea6a73b998cdc62e8", + "__meta_dockerswarm_task_desired_state": "running", + "__meta_dockerswarm_task_id": "t4rdm7j2y9yctbrksiwvsgpu5", + "__meta_dockerswarm_task_port_publish_mode": "ingress", + "__meta_dockerswarm_task_slot": "1", + "__meta_dockerswarm_task_state": "running", + }), + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := addTasksLabels(tt.args.tasks, tt.args.nodesLabels, tt.args.servicesLabels, tt.args.networksLabels, tt.args.services, tt.args.port) + var sortedLabelss [][]prompbmarshal.Label + for _, labels := range got { + sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(labels)) + } + if !reflect.DeepEqual(sortedLabelss, tt.want) { + t.Errorf("addTasksLabels() \ngot %v, \nwant %v", sortedLabelss, tt.want) + } + }) + } +} diff --git a/lib/promscrape/discoveryutils/client.go b/lib/promscrape/discoveryutils/client.go index f0bc4b592b..014edced41 100644 --- a/lib/promscrape/discoveryutils/client.go +++ b/lib/promscrape/discoveryutils/client.go @@ -41,11 +41,23 @@ type Client struct { // NewClient returns new Client for the given apiServer and the given ac. func NewClient(apiServer string, ac *promauth.Config) (*Client, error) { - var u fasthttp.URI + var ( + dialFunc fasthttp.DialFunc + tlsCfg *tls.Config + u fasthttp.URI + ) u.Update(apiServer) + + // special case for unix socket connection + if string(u.Scheme()) == "unix" { + dialAddr := string(u.Path()) + apiServer = "http://" + dialFunc = func(_ string) (net.Conn, error) { + return net.Dial("unix", dialAddr) + } + } hostPort := string(u.Host()) isTLS := string(u.Scheme()) == "https" - var tlsCfg *tls.Config if isTLS && ac != nil { tlsCfg = ac.NewTLSConfig() } @@ -66,6 +78,7 @@ func NewClient(apiServer string, ac *promauth.Config) (*Client, error) { WriteTimeout: 10 * time.Second, MaxResponseBodySize: 300 * 1024 * 1024, MaxConns: 2 * *maxConcurrency, + Dial: dialFunc, } return &Client{ hc: hc, diff --git a/lib/promscrape/scraper.go b/lib/promscrape/scraper.go index cab51cafdb..bd611c13ef 100644 --- a/lib/promscrape/scraper.go +++ b/lib/promscrape/scraper.go @@ -36,9 +36,11 @@ var ( gceSDCheckInterval = flag.Duration("promscrape.gceSDCheckInterval", time.Minute, "Interval for checking for changes in gce. "+ "This works only if `gce_sd_configs` is configured in '-promscrape.config' file. "+ "See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config for details") + dockerswarmSDCheckInterval = flag.Duration("promscrape.dockerswarmSDCheckInterval", 30*time.Second, "Interval for checking for changes in dockerswarm. "+ + "This works only if `dockerswarm_sd_configs` is configured in '-promscrape.config' file. "+ + "See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config for details") promscrapeConfigFile = flag.String("promscrape.config", "", "Optional path to Prometheus config file with 'scrape_configs' section containing targets to scrape. "+ "See https://victoriametrics.github.io/#how-to-scrape-prometheus-exporters-such-as-node-exporter for details") - suppressDuplicateScrapeTargetErrors = flag.Bool("promscrape.suppressDuplicateScrapeTargetErrors", false, "Whether to suppress `duplicate scrape target` errors; "+ "see https://victoriametrics.github.io/vmagent.html#troubleshooting for details") ) @@ -96,6 +98,7 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest) scs.add("dns_sd_configs", *dnsSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getDNSSDScrapeWork(swsPrev) }) scs.add("ec2_sd_configs", *ec2SDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getEC2SDScrapeWork(swsPrev) }) scs.add("gce_sd_configs", *gceSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getGCESDScrapeWork(swsPrev) }) + scs.add("dockerswarm_sd_configs", *dockerswarmSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getDockerSwarmSDScrapeWork(swsPrev) }) sighupCh := procutil.NewSighupChan() From 63c4999e06279ef7c10eaa809160a45719ffaecf Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Mon, 12 Oct 2020 16:12:36 +0300 Subject: [PATCH 14/24] lib/promscrape: code prettifying after 9bd9f67718fed7a59ffccbc035e62e0bc8d18dc7 --- app/vmagent/README.md | 2 +- docs/Single-server-VictoriaMetrics.md | 1 + docs/vmagent.md | 2 + lib/promscrape/discovery/dockerswarm/api.go | 4 +- .../discovery/dockerswarm/dockerswarm.go | 25 ++---- .../discovery/dockerswarm/network.go | 12 +-- .../discovery/dockerswarm/network_test.go | 11 ++- lib/promscrape/discovery/dockerswarm/nodes.go | 12 ++- .../discovery/dockerswarm/nodes_test.go | 3 + .../discovery/dockerswarm/services.go | 48 ++++++----- .../discovery/dockerswarm/services_test.go | 7 +- lib/promscrape/discovery/dockerswarm/tasks.go | 85 +++++++++++-------- .../discovery/dockerswarm/tasks_test.go | 59 +++++++------ lib/promscrape/discovery/openstack/api.go | 4 +- 14 files changed, 150 insertions(+), 125 deletions(-) diff --git a/app/vmagent/README.md b/app/vmagent/README.md index 710b44f518..428ea6e978 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -151,7 +151,7 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh * `openstack_sd_configs` - for scraping OpenStack targets. See [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) for details. [OpenStack identity API v3](https://docs.openstack.org/api-ref/identity/v3/) is supported only. -* `dockerswarm_sd_configs` - for scraping dockerswarm targets. +* `dockerswarm_sd_configs` - for scraping Docker Swarm targets. See [dockerswarm_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config) for details. File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`. diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index 3365cc6f2e..b32a5ad7f1 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -295,6 +295,7 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la * [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) * [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) * [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) +* [dockerswarm_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config) In the future other `*_sd_config` types will be supported. diff --git a/docs/vmagent.md b/docs/vmagent.md index 4d8b03f06d..428ea6e978 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -151,6 +151,8 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh * `openstack_sd_configs` - for scraping OpenStack targets. See [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) for details. [OpenStack identity API v3](https://docs.openstack.org/api-ref/identity/v3/) is supported only. +* `dockerswarm_sd_configs` - for scraping Docker Swarm targets. + See [dockerswarm_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config) for details. File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`. diff --git a/lib/promscrape/discovery/dockerswarm/api.go b/lib/promscrape/discovery/dockerswarm/api.go index 4e8b591d50..25dffe2074 100644 --- a/lib/promscrape/discovery/dockerswarm/api.go +++ b/lib/promscrape/discovery/dockerswarm/api.go @@ -26,11 +26,11 @@ func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) { cfg := &apiConfig{ port: sdc.Port, } - config, err := promauth.NewConfig(baseDir, sdc.BasicAuth, sdc.BearerToken, sdc.BearerTokenFile, sdc.TLSConfig) + ac, err := promauth.NewConfig(baseDir, sdc.BasicAuth, sdc.BearerToken, sdc.BearerTokenFile, sdc.TLSConfig) if err != nil { return nil, err } - client, err := discoveryutils.NewClient(sdc.Host, config) + client, err := discoveryutils.NewClient(sdc.Host, ac) if err != nil { return nil, fmt.Errorf("cannot create HTTP client for %q: %w", sdc.Host, err) } diff --git a/lib/promscrape/discovery/dockerswarm/dockerswarm.go b/lib/promscrape/discovery/dockerswarm/dockerswarm.go index 1b172415c9..59baecbe91 100644 --- a/lib/promscrape/discovery/dockerswarm/dockerswarm.go +++ b/lib/promscrape/discovery/dockerswarm/dockerswarm.go @@ -10,29 +10,18 @@ import ( // // See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config type SDConfig struct { - Host string `yaml:"host"` - Role string `yaml:"role"` - Port int `yaml:"port"` - TLSConfig *promauth.TLSConfig `yaml:"tls_config"` + Host string `yaml:"host"` + // TODO: add support for proxy_url + TLSConfig *promauth.TLSConfig `yaml:"tls_config"` + Role string `yaml:"role"` + Port int `yaml:"port"` + // refresh_interval is obtained from `-promscrape.dockerswarmSDCheckInterval` command-line option BasicAuth *promauth.BasicAuthConfig `yaml:"basic_auth"` BearerToken string `yaml:"bearer_token"` BearerTokenFile string `yaml:"bearer_token_file"` } -// joinLabels adds labels to destination from source with given key from destination matching given value. -func joinLabels(source []map[string]string, destination map[string]string, key, value string) map[string]string { - for _, sourceLabels := range source { - if sourceLabels[key] == value { - for k, v := range sourceLabels { - destination[k] = v - } - return destination - } - } - return destination -} - -// GetLabels returns gce labels according to sdc. +// GetLabels returns dockerswarm labels according to sdc. func GetLabels(sdc *SDConfig, baseDir string) ([]map[string]string, error) { cfg, err := getAPIConfig(sdc, baseDir) if err != nil { diff --git a/lib/promscrape/discovery/dockerswarm/network.go b/lib/promscrape/discovery/dockerswarm/network.go index 3200d9ee87..bcf21dd0a4 100644 --- a/lib/promscrape/discovery/dockerswarm/network.go +++ b/lib/promscrape/discovery/dockerswarm/network.go @@ -18,12 +18,12 @@ type network struct { Labels map[string]string } -func getNetworksLabels(cfg *apiConfig) ([]map[string]string, error) { +func getNetworksLabelsByNetworkID(cfg *apiConfig) (map[string]map[string]string, error) { networks, err := getNetworks(cfg) if err != nil { return nil, err } - return addNetworkLabels(networks), nil + return getNetworkLabelsByNetworkID(networks), nil } func getNetworks(cfg *apiConfig) ([]network, error) { @@ -42,20 +42,20 @@ func parseNetworks(data []byte) ([]network, error) { return networks, nil } -func addNetworkLabels(networks []network) []map[string]string { - var ms []map[string]string +func getNetworkLabelsByNetworkID(networks []network) map[string]map[string]string { + ms := make(map[string]map[string]string) for _, network := range networks { m := map[string]string{ "__meta_dockerswarm_network_id": network.ID, "__meta_dockerswarm_network_name": network.Name, - "__meta_dockerswarm_network_scope": network.Scope, "__meta_dockerswarm_network_internal": strconv.FormatBool(network.Internal), "__meta_dockerswarm_network_ingress": strconv.FormatBool(network.Ingress), + "__meta_dockerswarm_network_scope": network.Scope, } for k, v := range network.Labels { m["__meta_dockerswarm_network_label_"+discoveryutils.SanitizeLabelName(k)] = v } - ms = append(ms, m) + ms[network.ID] = m } return ms } diff --git a/lib/promscrape/discovery/dockerswarm/network_test.go b/lib/promscrape/discovery/dockerswarm/network_test.go index 7bcbce020f..3441bb4b71 100644 --- a/lib/promscrape/discovery/dockerswarm/network_test.go +++ b/lib/promscrape/discovery/dockerswarm/network_test.go @@ -2,6 +2,7 @@ package dockerswarm import ( "reflect" + "sort" "testing" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" @@ -45,9 +46,15 @@ func Test_addNetworkLabels(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := addNetworkLabels(tt.args.networks) + got := getNetworkLabelsByNetworkID(tt.args.networks) + var networkIDs []string + for networkID := range got { + networkIDs = append(networkIDs, networkID) + } + sort.Strings(networkIDs) var sortedLabelss [][]prompbmarshal.Label - for _, labels := range got { + for _, networkID := range networkIDs { + labels := got[networkID] sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(labels)) } if !reflect.DeepEqual(sortedLabelss, tt.want) { diff --git a/lib/promscrape/discovery/dockerswarm/nodes.go b/lib/promscrape/discovery/dockerswarm/nodes.go index 0474956a45..d5eec44cc2 100644 --- a/lib/promscrape/discovery/dockerswarm/nodes.go +++ b/lib/promscrape/discovery/dockerswarm/nodes.go @@ -30,7 +30,7 @@ type node struct { Message string Addr string } - ManagerStatus *struct { + ManagerStatus struct { Leader bool Reachability string Addr string @@ -66,21 +66,19 @@ func addNodeLabels(nodes []node, port int) []map[string]string { for _, node := range nodes { m := map[string]string{ "__address__": discoveryutils.JoinHostPort(node.Status.Addr, port), - "__meta_dockerswarm_node_id": node.ID, "__meta_dockerswarm_node_address": node.Status.Addr, "__meta_dockerswarm_node_availability": node.Spec.Availability, "__meta_dockerswarm_node_engine_version": node.Description.Engine.EngineVersion, "__meta_dockerswarm_node_hostname": node.Description.Hostname, + "__meta_dockerswarm_node_id": node.ID, + "__meta_dockerswarm_node_manager_address": node.ManagerStatus.Addr, + "__meta_dockerswarm_node_manager_leader": fmt.Sprintf("%t", node.ManagerStatus.Leader), + "__meta_dockerswarm_node_manager_reachability": node.ManagerStatus.Reachability, "__meta_dockerswarm_node_platform_architecture": node.Description.Platform.Architecture, "__meta_dockerswarm_node_platform_os": node.Description.Platform.OS, "__meta_dockerswarm_node_role": node.Spec.Role, "__meta_dockerswarm_node_status": node.Status.State, } - if node.ManagerStatus != nil { - m["__meta_dockerswarm_node_manager_address"] = node.ManagerStatus.Addr - m["__meta_dockerswarm_node_manager_manager_reachability"] = node.ManagerStatus.Reachability - m["__meta_dockerswarm_node_manager_leader"] = fmt.Sprintf("%t", node.ManagerStatus.Leader) - } for k, v := range node.Spec.Labels { m["__meta_dockerswarm_node_label_"+discoveryutils.SanitizeLabelName(k)] = v } diff --git a/lib/promscrape/discovery/dockerswarm/nodes_test.go b/lib/promscrape/discovery/dockerswarm/nodes_test.go index 6d5c63629d..c7348f9b54 100644 --- a/lib/promscrape/discovery/dockerswarm/nodes_test.go +++ b/lib/promscrape/discovery/dockerswarm/nodes_test.go @@ -161,6 +161,9 @@ func Test_addNodeLabels(t *testing.T) { "__meta_dockerswarm_node_availability": "active", "__meta_dockerswarm_node_engine_version": "19.03.11", "__meta_dockerswarm_node_hostname": "ip-172-31-40-97", + "__meta_dockerswarm_node_manager_address": "", + "__meta_dockerswarm_node_manager_leader": "false", + "__meta_dockerswarm_node_manager_reachability": "", "__meta_dockerswarm_node_id": "qauwmifceyvqs0sipvzu8oslu", "__meta_dockerswarm_node_platform_architecture": "x86_64", "__meta_dockerswarm_node_platform_os": "linux", diff --git a/lib/promscrape/discovery/dockerswarm/services.go b/lib/promscrape/discovery/dockerswarm/services.go index 708b4d2ec6..147c610cce 100644 --- a/lib/promscrape/discovery/dockerswarm/services.go +++ b/lib/promscrape/discovery/dockerswarm/services.go @@ -27,7 +27,7 @@ type service struct { Replicated interface{} } } - UpdateStatus *struct { + UpdateStatus struct { State string } Endpoint struct { @@ -51,7 +51,7 @@ func getServicesLabels(cfg *apiConfig) ([]map[string]string, error) { if err != nil { return nil, err } - networksLabels, err := getNetworksLabels(cfg) + networksLabels, err := getNetworksLabelsByNetworkID(cfg) if err != nil { return nil, err } @@ -84,56 +84,58 @@ func getServiceMode(svc service) string { return "" } -func addServicesLabels(services []service, networksLabels []map[string]string, port int) []map[string]string { +func addServicesLabels(services []service, networksLabels map[string]map[string]string, port int) []map[string]string { var ms []map[string]string for _, service := range services { - m := map[string]string{ + commonLabels := map[string]string{ "__meta_dockerswarm_service_id": service.ID, "__meta_dockerswarm_service_name": service.Spec.Name, + "__meta_dockerswarm_service_mode": getServiceMode(service), "__meta_dockerswarm_service_task_container_hostname": service.Spec.TaskTemplate.ContainerSpec.Hostname, "__meta_dockerswarm_service_task_container_image": service.Spec.TaskTemplate.ContainerSpec.Image, - "__meta_dockerswarm_service_mode": getServiceMode(service), - } - if service.UpdateStatus != nil { - m["__meta_dockerswarm_service_updating_status"] = service.UpdateStatus.State + "__meta_dockerswarm_service_updating_status": service.UpdateStatus.State, } for k, v := range service.Spec.Labels { - m["__meta_dockerswarm_service_label_"+discoveryutils.SanitizeLabelName(k)] = v + commonLabels["__meta_dockerswarm_service_label_"+discoveryutils.SanitizeLabelName(k)] = v } for _, vip := range service.Endpoint.VirtualIPs { - var added bool ip, _, err := net.ParseCIDR(vip.Addr) if err != nil { logger.Errorf("cannot parse: %q as cidr for service label add, err: %v", vip.Addr, err) continue } + added := false for _, ep := range service.Endpoint.Ports { if ep.Protocol != "tcp" { continue } - lbls := map[string]string{ + m := map[string]string{ + "__address__": discoveryutils.JoinHostPort(ip.String(), ep.PublishedPort), "__meta_dockerswarm_service_endpoint_port_name": ep.Name, "__meta_dockerswarm_service_endpoint_port_publish_mode": ep.PublishMode, - "__address__": discoveryutils.JoinHostPort(ip.String(), ep.PublishedPort), } - for k, v := range m { - lbls[k] = v + for k, v := range commonLabels { + m[k] = v + } + for k, v := range networksLabels[vip.NetworkID] { + m[k] = v } - lbls = joinLabels(networksLabels, lbls, "__meta_dockerswarm_network_id", vip.NetworkID) added = true - ms = append(ms, lbls) + ms = append(ms, m) } if !added { - lbls := make(map[string]string, len(m)) - for k, v := range m { - lbls[k] = v + m := map[string]string{ + "__address__": discoveryutils.JoinHostPort(ip.String(), port), } - lbls = joinLabels(networksLabels, lbls, "__meta_dockerswarm_network_id", vip.NetworkID) - lbls["__address__"] = discoveryutils.JoinHostPort(ip.String(), port) - ms = append(ms, lbls) + for k, v := range commonLabels { + m[k] = v + } + for k, v := range networksLabels[vip.NetworkID] { + m[k] = v + } + ms = append(ms, m) } } - } return ms } diff --git a/lib/promscrape/discovery/dockerswarm/services_test.go b/lib/promscrape/discovery/dockerswarm/services_test.go index 615b21af20..0706f12d02 100644 --- a/lib/promscrape/discovery/dockerswarm/services_test.go +++ b/lib/promscrape/discovery/dockerswarm/services_test.go @@ -172,7 +172,7 @@ func Test_parseServicesResponse(t *testing.T) { func Test_addServicesLabels(t *testing.T) { type args struct { services []service - networksLabels []map[string]string + networksLabels map[string]map[string]string port int } tests := []struct { @@ -184,8 +184,8 @@ func Test_addServicesLabels(t *testing.T) { name: "add 2 services with network labels join", args: args{ port: 9100, - networksLabels: []map[string]string{ - { + networksLabels: map[string]map[string]string{ + "qs0hog6ldlei9ct11pr3c77v1": { "__meta_dockerswarm_network_id": "qs0hog6ldlei9ct11pr3c77v1", "__meta_dockerswarm_network_ingress": "true", "__meta_dockerswarm_network_internal": "false", @@ -275,6 +275,7 @@ func Test_addServicesLabels(t *testing.T) { "__meta_dockerswarm_service_name": "redis2", "__meta_dockerswarm_service_task_container_hostname": "node1", "__meta_dockerswarm_service_task_container_image": "redis:3.0.6@sha256:6a692a76c2081888b589e26e6ec835743119fe453d67ecf03df7de5b73d69842", + "__meta_dockerswarm_service_updating_status": "", })}, }, } diff --git a/lib/promscrape/discovery/dockerswarm/tasks.go b/lib/promscrape/discovery/dockerswarm/tasks.go index cd0821d901..e8724c2cba 100644 --- a/lib/promscrape/discovery/dockerswarm/tasks.go +++ b/lib/promscrape/discovery/dockerswarm/tasks.go @@ -26,7 +26,7 @@ type task struct { } Status struct { State string - ContainerStatus *struct { + ContainerStatus struct { ContainerID string } PortStatus struct { @@ -45,7 +45,7 @@ func getTasksLabels(cfg *apiConfig) ([]map[string]string, error) { if err != nil { return nil, err } - networkLabels, err := getNetworksLabels(cfg) + networkLabels, err := getNetworksLabelsByNetworkID(cfg) if err != nil { return nil, err } @@ -73,20 +73,18 @@ func parseTasks(data []byte) ([]task, error) { return tasks, nil } -func addTasksLabels(tasks []task, nodesLabels, servicesLabels, networksLabels []map[string]string, services []service, port int) []map[string]string { +func addTasksLabels(tasks []task, nodesLabels, servicesLabels []map[string]string, networksLabels map[string]map[string]string, services []service, port int) []map[string]string { var ms []map[string]string for _, task := range tasks { - m := map[string]string{ + commonLabels := map[string]string{ "__meta_dockerswarm_task_id": task.ID, + "__meta_dockerswarm_task_container_id": task.Status.ContainerStatus.ContainerID, "__meta_dockerswarm_task_desired_state": task.DesiredState, - "__meta_dockerswarm_task_state": task.Status.State, "__meta_dockerswarm_task_slot": strconv.Itoa(task.Slot), - } - if task.Status.ContainerStatus != nil { - m["__meta_dockerswarm_task_container_id"] = task.Status.ContainerStatus.ContainerID + "__meta_dockerswarm_task_state": task.Status.State, } for k, v := range task.Labels { - m["__meta_dockerswarm_task_label_"+discoveryutils.SanitizeLabelName(k)] = v + commonLabels["__meta_dockerswarm_task_label_"+discoveryutils.SanitizeLabelName(k)] = v } var svcPorts []portConfig for i, v := range services { @@ -95,20 +93,21 @@ func addTasksLabels(tasks []task, nodesLabels, servicesLabels, networksLabels [] break } } - m = joinLabels(servicesLabels, m, "__meta_dockerswarm_service_id", task.ServiceID) - m = joinLabels(nodesLabels, m, "__meta_dockerswarm_node_id", task.NodeID) + addLabels(commonLabels, servicesLabels, "__meta_dockerswarm_service_id", task.ServiceID) + addLabels(commonLabels, nodesLabels, "__meta_dockerswarm_node_id", task.NodeID) for _, port := range task.Status.PortStatus.Ports { if port.Protocol != "tcp" { continue } - lbls := make(map[string]string, len(m)) - lbls["__meta_dockerswarm_task_port_publish_mode"] = port.PublishMode - lbls["__address__"] = discoveryutils.JoinHostPort(m["__meta_dockerswarm_node_address"], port.PublishedPort) - for k, v := range m { - lbls[k] = v + m := map[string]string{ + "__address__": discoveryutils.JoinHostPort(commonLabels["__meta_dockerswarm_node_address"], port.PublishedPort), + "__meta_dockerswarm_task_port_publish_mode": port.PublishMode, } - ms = append(ms, lbls) + for k, v := range commonLabels { + m[k] = v + } + ms = append(ms, m) } for _, na := range task.NetworksAttachments { for _, address := range na.Addresses { @@ -117,33 +116,51 @@ func addTasksLabels(tasks []task, nodesLabels, servicesLabels, networksLabels [] logger.Errorf("cannot parse task network attachments address: %s as net CIDR: %v", address, err) continue } - var added bool - for _, v := range svcPorts { - if v.Protocol != "tcp" { + added := false + for _, ep := range svcPorts { + if ep.Protocol != "tcp" { continue } - lbls := make(map[string]string, len(m)) - for k, v := range m { - lbls[k] = v + m := map[string]string{ + "__address": discoveryutils.JoinHostPort(ip.String(), ep.PublishedPort), + "__meta_dockerswarm_task_port_publish_mode": ep.PublishMode, } - lbls = joinLabels(networksLabels, lbls, "__meta_dockerswarm_network_id", na.Network.ID) - lbls["__address"] = discoveryutils.JoinHostPort(ip.String(), v.PublishedPort) - lbls["__meta_dockerswarm_task_port_publish_mode"] = v.PublishMode - ms = append(ms, lbls) + for k, v := range commonLabels { + m[k] = v + } + for k, v := range networksLabels[na.Network.ID] { + m[k] = v + } + ms = append(ms, m) added = true } - if !added { - lbls := make(map[string]string, len(m)) - for k, v := range m { - lbls[k] = v + m := map[string]string{ + "__address__": discoveryutils.JoinHostPort(ip.String(), port), } - lbls = joinLabels(networksLabels, lbls, "__meta_dockerswarm_network_id", na.Network.ID) - lbls["__address__"] = discoveryutils.JoinHostPort(ip.String(), port) - ms = append(ms, lbls) + for k, v := range commonLabels { + m[k] = v + } + for k, v := range networksLabels[na.Network.ID] { + m[k] = v + } + ms = append(ms, m) } } } } return ms } + +// addLabels adds lables from src to dst if they contain the given `key: value` pair. +func addLabels(dst map[string]string, src []map[string]string, key, value string) { + for _, m := range src { + if m[key] != value { + continue + } + for k, v := range m { + dst[k] = v + } + return + } +} diff --git a/lib/promscrape/discovery/dockerswarm/tasks_test.go b/lib/promscrape/discovery/dockerswarm/tasks_test.go index 0d32c4cfba..7831229391 100644 --- a/lib/promscrape/discovery/dockerswarm/tasks_test.go +++ b/lib/promscrape/discovery/dockerswarm/tasks_test.go @@ -27,7 +27,9 @@ func Test_parseTasks(t *testing.T) { "Version": { "Index": 23 }, - "Labels": {}, + "Labels": { + "label1": "value1" + }, "Spec": { "ContainerSpec": { "Image": "redis:3.0.6@sha256:6a692a76c2081888b589e26e6ec835743119fe453d67ecf03df7de5b73d69842", @@ -65,19 +67,21 @@ func Test_parseTasks(t *testing.T) { }, want: []task{ { - ID: "t4rdm7j2y9yctbrksiwvsgpu5", - ServiceID: "t91nf284wzle1ya09lqvyjgnq", - NodeID: "qauwmifceyvqs0sipvzu8oslu", - Labels: map[string]string{}, + ID: "t4rdm7j2y9yctbrksiwvsgpu5", + ServiceID: "t91nf284wzle1ya09lqvyjgnq", + NodeID: "qauwmifceyvqs0sipvzu8oslu", + Labels: map[string]string{ + "label1": "value1", + }, DesiredState: "running", Slot: 1, Status: struct { State string - ContainerStatus *struct{ ContainerID string } + ContainerStatus struct{ ContainerID string } PortStatus struct{ Ports []portConfig } }{ State: "running", - ContainerStatus: &struct{ ContainerID string }{ + ContainerStatus: struct{ ContainerID string }{ ContainerID: "33034b69f6fa5f808098208752fd1fe4e0e1ca86311988cea6a73b998cdc62e8", }, PortStatus: struct{ Ports []portConfig }{}}, @@ -104,7 +108,7 @@ func Test_addTasksLabels(t *testing.T) { tasks []task nodesLabels []map[string]string servicesLabels []map[string]string - networksLabels []map[string]string + networksLabels map[string]map[string]string services []service port int } @@ -127,11 +131,11 @@ func Test_addTasksLabels(t *testing.T) { Slot: 1, Status: struct { State string - ContainerStatus *struct{ ContainerID string } + ContainerStatus struct{ ContainerID string } PortStatus struct{ Ports []portConfig } }{ State: "running", - ContainerStatus: &struct{ ContainerID string }{ + ContainerStatus: struct{ ContainerID string }{ ContainerID: "33034b69f6fa5f808098208752fd1fe4e0e1ca86311988cea6a73b998cdc62e8", }, PortStatus: struct{ Ports []portConfig }{ @@ -208,18 +212,18 @@ func Test_addTasksLabels(t *testing.T) { }, Status: struct { State string - ContainerStatus *struct{ ContainerID string } + ContainerStatus struct{ ContainerID string } PortStatus struct{ Ports []portConfig } }{ State: "running", - ContainerStatus: &struct{ ContainerID string }{ + ContainerStatus: struct{ ContainerID string }{ ContainerID: "33034b69f6fa5f808098208752fd1fe4e0e1ca86311988cea6a73b998cdc62e8", }, PortStatus: struct{ Ports []portConfig }{}}, }, }, - networksLabels: []map[string]string{ - { + networksLabels: map[string]map[string]string{ + "qs0hog6ldlei9ct11pr3c77v1": { "__meta_dockerswarm_network_id": "qs0hog6ldlei9ct11pr3c77v1", "__meta_dockerswarm_network_ingress": "true", "__meta_dockerswarm_network_internal": "false", @@ -288,22 +292,23 @@ func Test_addTasksLabels(t *testing.T) { NetworkID string Addr string } - }{Ports: []portConfig{ - { - Protocol: "tcp", - Name: "redis", - PublishMode: "ingress", - }, - }, VirtualIPs: []struct { - NetworkID string - Addr string }{ - { - NetworkID: "qs0hog6ldlei9ct11pr3c77v1", - Addr: "10.0.0.3/24", + Ports: []portConfig{ + { + Protocol: "tcp", + Name: "redis", + PublishMode: "ingress", + }, + }, VirtualIPs: []struct { + NetworkID string + Addr string + }{ + { + NetworkID: "qs0hog6ldlei9ct11pr3c77v1", + Addr: "10.0.0.3/24", + }, }, }, - }, }, }, servicesLabels: []map[string]string{}, diff --git a/lib/promscrape/discovery/openstack/api.go b/lib/promscrape/discovery/openstack/api.go index 21e0e0745f..8c15d3187d 100644 --- a/lib/promscrape/discovery/openstack/api.go +++ b/lib/promscrape/discovery/openstack/api.go @@ -75,12 +75,12 @@ func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) { port: sdc.Port, } if sdc.TLSConfig != nil { - config, err := promauth.NewConfig(baseDir, nil, "", "", sdc.TLSConfig) + ac, err := promauth.NewConfig(baseDir, nil, "", "", sdc.TLSConfig) if err != nil { return nil, err } cfg.client.Transport = &http.Transport{ - TLSClientConfig: config.NewTLSConfig(), + TLSClientConfig: ac.NewTLSConfig(), } } // use public compute endpoint by default From 4cc6574cea6295f85eca81747a7b640a3b974126 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Mon, 12 Oct 2020 16:17:42 +0300 Subject: [PATCH 15/24] CHANGELOG.md: mention about added Docker Swarm service discovery --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ec54550c1..2a83529085 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ ``` node_filesystem_files{ host="$host", mountpoint="/" } - node_filesystem_files_free ``` +* FEATURE: vmagent: add Docker Swarm service discovery (aka [dockerswarm_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config)). + See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/656 * FEATURE: vmagent: add `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag for suppressing `duplicate scrape target` errors. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/651 and https://victoriametrics.github.io/vmagent.html#troubleshooting . * FEATURE: vmagent: show original labels before relabeling is applied on `duplicate scrape target` errors. This should simplify debugging for incorrect relabeling. From 4f16a964e3b0e59e5ef6ecd02ab8766f40975344 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Mon, 12 Oct 2020 20:01:51 +0300 Subject: [PATCH 16/24] app/vmselect: add ability to export data in CSV format via `/api/v1/export/csv` --- CHANGELOG.md | 1 + README.md | 26 + app/vmselect/main.go | 11 + app/vmselect/prometheus/export.qtpl | 73 +++ app/vmselect/prometheus/export.qtpl.go | 703 ++++++++++++++++--------- app/vmselect/prometheus/prometheus.go | 85 +++ docs/Single-server-VictoriaMetrics.md | 26 + 7 files changed, 667 insertions(+), 258 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a83529085..cf37cfcad0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ``` * FEATURE: vmagent: add Docker Swarm service discovery (aka [dockerswarm_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config)). See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/656 +* FEATURE: add ability to export data in CSV format. See [these docs](https://victoriametrics.github.io/#how-to-export-csv-data) for details. * FEATURE: vmagent: add `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag for suppressing `duplicate scrape target` errors. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/651 and https://victoriametrics.github.io/vmagent.html#troubleshooting . * FEATURE: vmagent: show original labels before relabeling is applied on `duplicate scrape target` errors. This should simplify debugging for incorrect relabeling. diff --git a/README.md b/README.md index b32a5ad7f1..7e379bfdbf 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,7 @@ See [features available for enterprise customers](https://github.com/VictoriaMet * [How to export time series](#how-to-export-time-series) * [How to export data in native format](#how-to-export-data-in-native-format) * [How to export data in JSON line format](#how-to-export-data-in-json-line-format) + * [How to export CSV data](#how-to-export-csv-data) * [How to import time series data](#how-to-import-time-series-data) * [How to import data in native format](#how-to-import-data-in-native-format) * [How to import data in json line format](#how-to-import-data-in-json-line-format) @@ -683,6 +684,7 @@ VictoriaMetrics provides the following handlers for exporting data: * `/api/v1/export/native` for exporting data in native binary format. This is the most efficient format for data export. See [these docs](#how-to-export-data-in-native-format) for details. * `/api/v1/export` for exporing data in JSON line format. See [these docs](#how-to-export-data-in-json-line-format) for details. +* `/api/v1/export/csv` for exporting data in CSV. See [these docs](#how-to-export-csv-data) for details. #### How to export data in native format @@ -732,6 +734,30 @@ The maximum duration for each request to `/api/v1/export` is limited by `-search Exported data can be imported via POST'ing it to [/api/v1/import](#how-to-import-data-in-json-line-format). +#### How to export CSV data + +Send a request to `http://<victoriametrics-addr>:8428/api/v1/export/csv?format=<format>&match=<timeseries_selector_for_export>`, +where: + +* `<format>` must contain comma-delimited label names for the exported CSV. The following special label names are supported: + * `__name__` - metric name + * `__value__` - sample value + * `__timestamp__:<ts_format>` - sample timestamp. `<ts_format>` can have the following values: + * `unix_s` - unix seconds + * `unix_ms` - unix milliseconds + * `unix_ns` - unix nanoseconds + * `rfc3339` - [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) time + * `custom:<layout>` - custom layout for time that is supported by [time.Format](https://golang.org/pkg/time/#Time.Format) function from Go. + +* `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) +for metrics to export. + +Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either +unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values. + +The exported CSV data can be imported to VictoriaMetrics via [/api/v1/import/csv](#how-to-import-csv-data). + + ### How to import time series data Time series data can be imported via any supported ingestion protocol: diff --git a/app/vmselect/main.go b/app/vmselect/main.go index a1dfd3375a..761e75f397 100644 --- a/app/vmselect/main.go +++ b/app/vmselect/main.go @@ -203,6 +203,14 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool { return true } return true + case "/api/v1/export/csv": + exportCSVRequests.Inc() + if err := prometheus.ExportCSVHandler(startTime, w, r); err != nil { + exportCSVErrors.Inc() + httpserver.Errorf(w, r, "error in %q: %s", r.URL.Path, err) + return true + } + return true case "/api/v1/export/native": exportNativeRequests.Inc() if err := prometheus.ExportNativeHandler(startTime, w, r); err != nil { @@ -329,6 +337,9 @@ var ( exportRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/export"}`) exportErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/export"}`) + exportCSVRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/export/csv"}`) + exportCSVErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/export/csv"}`) + exportNativeRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/export/native"}`) exportNativeErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/export/native"}`) diff --git a/app/vmselect/prometheus/export.qtpl b/app/vmselect/prometheus/export.qtpl index 990120e406..cdfd3f6a39 100644 --- a/app/vmselect/prometheus/export.qtpl +++ b/app/vmselect/prometheus/export.qtpl @@ -1,10 +1,83 @@ {% import ( + "bytes" + "strings" + "time" + "github.com/valyala/quicktemplate" "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" ) %} {% stripspace %} +{% func ExportCSVLine(xb *exportBlock, fieldNames []string) %} + {% if len(xb.timestamps) == 0 || len(fieldNames) == 0 %}{% return %}{% endif %} + {% for i, timestamp := range xb.timestamps %} + {% code value := xb.values[i] %} + {%= exportCSVField(xb.mn, fieldNames[0], timestamp, value) %} + {% for _, fieldName := range fieldNames[1:] %} + , + {%= exportCSVField(xb.mn, fieldName, timestamp, value) %} + {% endfor %} + {% newline %} + {% endfor %} +{% endfunc %} + +{% func exportCSVField(mn *storage.MetricName, fieldName string, timestamp int64, value float64) %} + {% if fieldName == "__value__" %} + {%f= value %} + {% return %} + {% endif %} + {% if fieldName == "__timestamp__" %} + {%dl timestamp %} + {% return %} + {% endif %} + {% if strings.HasPrefix(fieldName, "__timestamp__:") %} + {% code timeFormat := fieldName[len("__timestamp__:"):] %} + {% switch timeFormat %} + {% case "unix_s" %} + {%dl= timestamp/1000 %} + {% case "unix_ms" %} + {%dl= timestamp %} + {% case "unix_ns" %} + {%dl= timestamp*1e6 %} + {% case "rfc3339" %} + {% code + bb := quicktemplate.AcquireByteBuffer() + bb.B = time.Unix(timestamp/1000, (timestamp%1000)*1e6).AppendFormat(bb.B[:0], time.RFC3339) + %} + {%z= bb.B %} + {% code + quicktemplate.ReleaseByteBuffer(bb) + %} + {% default %} + {% if strings.HasPrefix(timeFormat, "custom:") %} + {% code + layout := timeFormat[len("custom:"):] + bb := quicktemplate.AcquireByteBuffer() + bb.B = time.Unix(timestamp/1000, (timestamp%1000)*1e6).AppendFormat(bb.B[:0], layout) + %} + {% if bytes.ContainsAny(bb.B, `"`+",\n") %} + {%qz bb.B %} + {% else %} + {%z= bb.B %} + {% endif %} + {% code + quicktemplate.ReleaseByteBuffer(bb) + %} + {% else %} + Unsupported timeFormat={%s= timeFormat %} + {% endif %} + {% endswitch %} + {% return %} + {% endif %} + {% code v := mn.GetTagValue(fieldName) %} + {% if bytes.ContainsAny(v, `"`+",\n") %} + {%qz= v %} + {% else %} + {%z= v %} + {% endif %} +{% endfunc %} + {% func ExportPrometheusLine(xb *exportBlock) %} {% if len(xb.timestamps) == 0 %}{% return %}{% endif %} {% code bb := quicktemplate.AcquireByteBuffer() %} diff --git a/app/vmselect/prometheus/export.qtpl.go b/app/vmselect/prometheus/export.qtpl.go index 2082b9020d..903bd2a9f5 100644 --- a/app/vmselect/prometheus/export.qtpl.go +++ b/app/vmselect/prometheus/export.qtpl.go @@ -6,306 +6,201 @@ package prometheus //line app/vmselect/prometheus/export.qtpl:1 import ( + "bytes" + "strings" + "time" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" "github.com/valyala/quicktemplate" ) -//line app/vmselect/prometheus/export.qtpl:8 +//line app/vmselect/prometheus/export.qtpl:12 import ( qtio422016 "io" qt422016 "github.com/valyala/quicktemplate" ) -//line app/vmselect/prometheus/export.qtpl:8 +//line app/vmselect/prometheus/export.qtpl:12 var ( _ = qtio422016.Copy _ = qt422016.AcquireByteBuffer ) -//line app/vmselect/prometheus/export.qtpl:8 -func StreamExportPrometheusLine(qw422016 *qt422016.Writer, xb *exportBlock) { -//line app/vmselect/prometheus/export.qtpl:9 - if len(xb.timestamps) == 0 { -//line app/vmselect/prometheus/export.qtpl:9 - return -//line app/vmselect/prometheus/export.qtpl:9 - } -//line app/vmselect/prometheus/export.qtpl:10 - bb := quicktemplate.AcquireByteBuffer() - -//line app/vmselect/prometheus/export.qtpl:11 - writeprometheusMetricName(bb, xb.mn) - //line app/vmselect/prometheus/export.qtpl:12 - for i, ts := range xb.timestamps { +func StreamExportCSVLine(qw422016 *qt422016.Writer, xb *exportBlock, fieldNames []string) { //line app/vmselect/prometheus/export.qtpl:13 - qw422016.N().Z(bb.B) + if len(xb.timestamps) == 0 || len(fieldNames) == 0 { //line app/vmselect/prometheus/export.qtpl:13 - qw422016.N().S(` `) + return +//line app/vmselect/prometheus/export.qtpl:13 + } //line app/vmselect/prometheus/export.qtpl:14 - qw422016.N().F(xb.values[i]) -//line app/vmselect/prometheus/export.qtpl:14 - qw422016.N().S(` `) -//line app/vmselect/prometheus/export.qtpl:15 - qw422016.N().DL(ts) + for i, timestamp := range xb.timestamps { //line app/vmselect/prometheus/export.qtpl:15 + value := xb.values[i] + +//line app/vmselect/prometheus/export.qtpl:16 + streamexportCSVField(qw422016, xb.mn, fieldNames[0], timestamp, value) +//line app/vmselect/prometheus/export.qtpl:17 + for _, fieldName := range fieldNames[1:] { +//line app/vmselect/prometheus/export.qtpl:17 + qw422016.N().S(`,`) +//line app/vmselect/prometheus/export.qtpl:19 + streamexportCSVField(qw422016, xb.mn, fieldName, timestamp, value) +//line app/vmselect/prometheus/export.qtpl:20 + } +//line app/vmselect/prometheus/export.qtpl:21 qw422016.N().S(` `) -//line app/vmselect/prometheus/export.qtpl:16 +//line app/vmselect/prometheus/export.qtpl:22 } -//line app/vmselect/prometheus/export.qtpl:17 - quicktemplate.ReleaseByteBuffer(bb) - -//line app/vmselect/prometheus/export.qtpl:18 +//line app/vmselect/prometheus/export.qtpl:23 } -//line app/vmselect/prometheus/export.qtpl:18 -func WriteExportPrometheusLine(qq422016 qtio422016.Writer, xb *exportBlock) { -//line app/vmselect/prometheus/export.qtpl:18 +//line app/vmselect/prometheus/export.qtpl:23 +func WriteExportCSVLine(qq422016 qtio422016.Writer, xb *exportBlock, fieldNames []string) { +//line app/vmselect/prometheus/export.qtpl:23 qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vmselect/prometheus/export.qtpl:18 - StreamExportPrometheusLine(qw422016, xb) -//line app/vmselect/prometheus/export.qtpl:18 +//line app/vmselect/prometheus/export.qtpl:23 + StreamExportCSVLine(qw422016, xb, fieldNames) +//line app/vmselect/prometheus/export.qtpl:23 qt422016.ReleaseWriter(qw422016) -//line app/vmselect/prometheus/export.qtpl:18 +//line app/vmselect/prometheus/export.qtpl:23 } -//line app/vmselect/prometheus/export.qtpl:18 -func ExportPrometheusLine(xb *exportBlock) string { -//line app/vmselect/prometheus/export.qtpl:18 +//line app/vmselect/prometheus/export.qtpl:23 +func ExportCSVLine(xb *exportBlock, fieldNames []string) string { +//line app/vmselect/prometheus/export.qtpl:23 qb422016 := qt422016.AcquireByteBuffer() -//line app/vmselect/prometheus/export.qtpl:18 - WriteExportPrometheusLine(qb422016, xb) -//line app/vmselect/prometheus/export.qtpl:18 +//line app/vmselect/prometheus/export.qtpl:23 + WriteExportCSVLine(qb422016, xb, fieldNames) +//line app/vmselect/prometheus/export.qtpl:23 qs422016 := string(qb422016.B) -//line app/vmselect/prometheus/export.qtpl:18 +//line app/vmselect/prometheus/export.qtpl:23 qt422016.ReleaseByteBuffer(qb422016) -//line app/vmselect/prometheus/export.qtpl:18 +//line app/vmselect/prometheus/export.qtpl:23 return qs422016 -//line app/vmselect/prometheus/export.qtpl:18 +//line app/vmselect/prometheus/export.qtpl:23 } -//line app/vmselect/prometheus/export.qtpl:20 -func StreamExportJSONLine(qw422016 *qt422016.Writer, xb *exportBlock) { -//line app/vmselect/prometheus/export.qtpl:21 - if len(xb.timestamps) == 0 { -//line app/vmselect/prometheus/export.qtpl:21 - return -//line app/vmselect/prometheus/export.qtpl:21 - } -//line app/vmselect/prometheus/export.qtpl:21 - qw422016.N().S(`{"metric":`) -//line app/vmselect/prometheus/export.qtpl:23 - streammetricNameObject(qw422016, xb.mn) -//line app/vmselect/prometheus/export.qtpl:23 - qw422016.N().S(`,"values":[`) //line app/vmselect/prometheus/export.qtpl:25 - if len(xb.values) > 0 { +func streamexportCSVField(qw422016 *qt422016.Writer, mn *storage.MetricName, fieldName string, timestamp int64, value float64) { //line app/vmselect/prometheus/export.qtpl:26 - values := xb.values - + if fieldName == "__value__" { //line app/vmselect/prometheus/export.qtpl:27 - qw422016.N().F(values[0]) + qw422016.N().F(value) //line app/vmselect/prometheus/export.qtpl:28 - values = values[1:] - + return //line app/vmselect/prometheus/export.qtpl:29 - for _, v := range values { -//line app/vmselect/prometheus/export.qtpl:29 - qw422016.N().S(`,`) + } //line app/vmselect/prometheus/export.qtpl:30 - qw422016.N().F(v) + if fieldName == "__timestamp__" { //line app/vmselect/prometheus/export.qtpl:31 - } + qw422016.N().DL(timestamp) //line app/vmselect/prometheus/export.qtpl:32 + return +//line app/vmselect/prometheus/export.qtpl:33 } -//line app/vmselect/prometheus/export.qtpl:32 - qw422016.N().S(`],"timestamps":[`) +//line app/vmselect/prometheus/export.qtpl:34 + if strings.HasPrefix(fieldName, "__timestamp__:") { //line app/vmselect/prometheus/export.qtpl:35 - if len(xb.timestamps) > 0 { + timeFormat := fieldName[len("__timestamp__:"):] + //line app/vmselect/prometheus/export.qtpl:36 - timestamps := xb.timestamps - + switch timeFormat { //line app/vmselect/prometheus/export.qtpl:37 - qw422016.N().DL(timestamps[0]) + case "unix_s": //line app/vmselect/prometheus/export.qtpl:38 - timestamps = timestamps[1:] - + qw422016.N().DL(timestamp / 1000) //line app/vmselect/prometheus/export.qtpl:39 - for _, ts := range timestamps { -//line app/vmselect/prometheus/export.qtpl:39 - qw422016.N().S(`,`) + case "unix_ms": //line app/vmselect/prometheus/export.qtpl:40 - qw422016.N().DL(ts) + qw422016.N().DL(timestamp) //line app/vmselect/prometheus/export.qtpl:41 - } + case "unix_ns": //line app/vmselect/prometheus/export.qtpl:42 - } -//line app/vmselect/prometheus/export.qtpl:42 - qw422016.N().S(`]}`) -//line app/vmselect/prometheus/export.qtpl:44 - qw422016.N().S(` -`) + qw422016.N().DL(timestamp * 1e6) +//line app/vmselect/prometheus/export.qtpl:43 + case "rfc3339": //line app/vmselect/prometheus/export.qtpl:45 -} + bb := quicktemplate.AcquireByteBuffer() + bb.B = time.Unix(timestamp/1000, (timestamp%1000)*1e6).AppendFormat(bb.B[:0], time.RFC3339) -//line app/vmselect/prometheus/export.qtpl:45 -func WriteExportJSONLine(qq422016 qtio422016.Writer, xb *exportBlock) { -//line app/vmselect/prometheus/export.qtpl:45 - qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vmselect/prometheus/export.qtpl:45 - StreamExportJSONLine(qw422016, xb) -//line app/vmselect/prometheus/export.qtpl:45 - qt422016.ReleaseWriter(qw422016) -//line app/vmselect/prometheus/export.qtpl:45 -} - -//line app/vmselect/prometheus/export.qtpl:45 -func ExportJSONLine(xb *exportBlock) string { -//line app/vmselect/prometheus/export.qtpl:45 - qb422016 := qt422016.AcquireByteBuffer() -//line app/vmselect/prometheus/export.qtpl:45 - WriteExportJSONLine(qb422016, xb) -//line app/vmselect/prometheus/export.qtpl:45 - qs422016 := string(qb422016.B) -//line app/vmselect/prometheus/export.qtpl:45 - qt422016.ReleaseByteBuffer(qb422016) -//line app/vmselect/prometheus/export.qtpl:45 - return qs422016 -//line app/vmselect/prometheus/export.qtpl:45 -} - -//line app/vmselect/prometheus/export.qtpl:47 -func StreamExportPromAPILine(qw422016 *qt422016.Writer, xb *exportBlock) { -//line app/vmselect/prometheus/export.qtpl:47 - qw422016.N().S(`{"metric":`) -//line app/vmselect/prometheus/export.qtpl:49 - streammetricNameObject(qw422016, xb.mn) -//line app/vmselect/prometheus/export.qtpl:49 - qw422016.N().S(`,"values":`) -//line app/vmselect/prometheus/export.qtpl:50 - streamvaluesWithTimestamps(qw422016, xb.values, xb.timestamps) -//line app/vmselect/prometheus/export.qtpl:50 - qw422016.N().S(`}`) -//line app/vmselect/prometheus/export.qtpl:52 -} - -//line app/vmselect/prometheus/export.qtpl:52 -func WriteExportPromAPILine(qq422016 qtio422016.Writer, xb *exportBlock) { -//line app/vmselect/prometheus/export.qtpl:52 - qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vmselect/prometheus/export.qtpl:52 - StreamExportPromAPILine(qw422016, xb) -//line app/vmselect/prometheus/export.qtpl:52 - qt422016.ReleaseWriter(qw422016) -//line app/vmselect/prometheus/export.qtpl:52 -} - -//line app/vmselect/prometheus/export.qtpl:52 -func ExportPromAPILine(xb *exportBlock) string { -//line app/vmselect/prometheus/export.qtpl:52 - qb422016 := qt422016.AcquireByteBuffer() -//line app/vmselect/prometheus/export.qtpl:52 - WriteExportPromAPILine(qb422016, xb) -//line app/vmselect/prometheus/export.qtpl:52 - qs422016 := string(qb422016.B) -//line app/vmselect/prometheus/export.qtpl:52 - qt422016.ReleaseByteBuffer(qb422016) -//line app/vmselect/prometheus/export.qtpl:52 - return qs422016 -//line app/vmselect/prometheus/export.qtpl:52 -} - -//line app/vmselect/prometheus/export.qtpl:54 -func StreamExportPromAPIResponse(qw422016 *qt422016.Writer, resultsCh <-chan *quicktemplate.ByteBuffer) { -//line app/vmselect/prometheus/export.qtpl:54 - qw422016.N().S(`{"status":"success","data":{"resultType":"matrix","result":[`) -//line app/vmselect/prometheus/export.qtpl:60 - bb, ok := <-resultsCh - -//line app/vmselect/prometheus/export.qtpl:61 - if ok { -//line app/vmselect/prometheus/export.qtpl:62 - qw422016.N().Z(bb.B) -//line app/vmselect/prometheus/export.qtpl:63 - quicktemplate.ReleaseByteBuffer(bb) - -//line app/vmselect/prometheus/export.qtpl:64 - for bb := range resultsCh { -//line app/vmselect/prometheus/export.qtpl:64 - qw422016.N().S(`,`) -//line app/vmselect/prometheus/export.qtpl:65 +//line app/vmselect/prometheus/export.qtpl:48 qw422016.N().Z(bb.B) -//line app/vmselect/prometheus/export.qtpl:66 +//line app/vmselect/prometheus/export.qtpl:50 quicktemplate.ReleaseByteBuffer(bb) +//line app/vmselect/prometheus/export.qtpl:52 + default: +//line app/vmselect/prometheus/export.qtpl:53 + if strings.HasPrefix(timeFormat, "custom:") { +//line app/vmselect/prometheus/export.qtpl:55 + layout := timeFormat[len("custom:"):] + bb := quicktemplate.AcquireByteBuffer() + bb.B = time.Unix(timestamp/1000, (timestamp%1000)*1e6).AppendFormat(bb.B[:0], layout) + +//line app/vmselect/prometheus/export.qtpl:59 + if bytes.ContainsAny(bb.B, `"`+",\n") { +//line app/vmselect/prometheus/export.qtpl:60 + qw422016.E().QZ(bb.B) +//line app/vmselect/prometheus/export.qtpl:61 + } else { +//line app/vmselect/prometheus/export.qtpl:62 + qw422016.N().Z(bb.B) +//line app/vmselect/prometheus/export.qtpl:63 + } +//line app/vmselect/prometheus/export.qtpl:65 + quicktemplate.ReleaseByteBuffer(bb) + //line app/vmselect/prometheus/export.qtpl:67 + } else { +//line app/vmselect/prometheus/export.qtpl:67 + qw422016.N().S(`Unsupported timeFormat=`) +//line app/vmselect/prometheus/export.qtpl:68 + qw422016.N().S(timeFormat) +//line app/vmselect/prometheus/export.qtpl:69 + } +//line app/vmselect/prometheus/export.qtpl:70 } -//line app/vmselect/prometheus/export.qtpl:68 +//line app/vmselect/prometheus/export.qtpl:71 + return +//line app/vmselect/prometheus/export.qtpl:72 } -//line app/vmselect/prometheus/export.qtpl:68 - qw422016.N().S(`]}}`) -//line app/vmselect/prometheus/export.qtpl:72 -} - -//line app/vmselect/prometheus/export.qtpl:72 -func WriteExportPromAPIResponse(qq422016 qtio422016.Writer, resultsCh <-chan *quicktemplate.ByteBuffer) { -//line app/vmselect/prometheus/export.qtpl:72 - qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vmselect/prometheus/export.qtpl:72 - StreamExportPromAPIResponse(qw422016, resultsCh) -//line app/vmselect/prometheus/export.qtpl:72 - qt422016.ReleaseWriter(qw422016) -//line app/vmselect/prometheus/export.qtpl:72 -} - -//line app/vmselect/prometheus/export.qtpl:72 -func ExportPromAPIResponse(resultsCh <-chan *quicktemplate.ByteBuffer) string { -//line app/vmselect/prometheus/export.qtpl:72 - qb422016 := qt422016.AcquireByteBuffer() -//line app/vmselect/prometheus/export.qtpl:72 - WriteExportPromAPIResponse(qb422016, resultsCh) -//line app/vmselect/prometheus/export.qtpl:72 - qs422016 := string(qb422016.B) -//line app/vmselect/prometheus/export.qtpl:72 - qt422016.ReleaseByteBuffer(qb422016) -//line app/vmselect/prometheus/export.qtpl:72 - return qs422016 -//line app/vmselect/prometheus/export.qtpl:72 -} +//line app/vmselect/prometheus/export.qtpl:73 + v := mn.GetTagValue(fieldName) //line app/vmselect/prometheus/export.qtpl:74 -func StreamExportStdResponse(qw422016 *qt422016.Writer, resultsCh <-chan *quicktemplate.ByteBuffer) { + if bytes.ContainsAny(v, `"`+",\n") { //line app/vmselect/prometheus/export.qtpl:75 - for bb := range resultsCh { + qw422016.N().QZ(v) //line app/vmselect/prometheus/export.qtpl:76 - qw422016.N().Z(bb.B) + } else { //line app/vmselect/prometheus/export.qtpl:77 - quicktemplate.ReleaseByteBuffer(bb) - + qw422016.N().Z(v) //line app/vmselect/prometheus/export.qtpl:78 } //line app/vmselect/prometheus/export.qtpl:79 } //line app/vmselect/prometheus/export.qtpl:79 -func WriteExportStdResponse(qq422016 qtio422016.Writer, resultsCh <-chan *quicktemplate.ByteBuffer) { +func writeexportCSVField(qq422016 qtio422016.Writer, mn *storage.MetricName, fieldName string, timestamp int64, value float64) { //line app/vmselect/prometheus/export.qtpl:79 qw422016 := qt422016.AcquireWriter(qq422016) //line app/vmselect/prometheus/export.qtpl:79 - StreamExportStdResponse(qw422016, resultsCh) + streamexportCSVField(qw422016, mn, fieldName, timestamp, value) //line app/vmselect/prometheus/export.qtpl:79 qt422016.ReleaseWriter(qw422016) //line app/vmselect/prometheus/export.qtpl:79 } //line app/vmselect/prometheus/export.qtpl:79 -func ExportStdResponse(resultsCh <-chan *quicktemplate.ByteBuffer) string { +func exportCSVField(mn *storage.MetricName, fieldName string, timestamp int64, value float64) string { //line app/vmselect/prometheus/export.qtpl:79 qb422016 := qt422016.AcquireByteBuffer() //line app/vmselect/prometheus/export.qtpl:79 - WriteExportStdResponse(qb422016, resultsCh) + writeexportCSVField(qb422016, mn, fieldName, timestamp, value) //line app/vmselect/prometheus/export.qtpl:79 qs422016 := string(qb422016.B) //line app/vmselect/prometheus/export.qtpl:79 @@ -316,69 +211,361 @@ func ExportStdResponse(resultsCh <-chan *quicktemplate.ByteBuffer) string { } //line app/vmselect/prometheus/export.qtpl:81 -func streamprometheusMetricName(qw422016 *qt422016.Writer, mn *storage.MetricName) { +func StreamExportPrometheusLine(qw422016 *qt422016.Writer, xb *exportBlock) { //line app/vmselect/prometheus/export.qtpl:82 - qw422016.N().Z(mn.MetricGroup) + if len(xb.timestamps) == 0 { +//line app/vmselect/prometheus/export.qtpl:82 + return +//line app/vmselect/prometheus/export.qtpl:82 + } //line app/vmselect/prometheus/export.qtpl:83 - if len(mn.Tags) > 0 { -//line app/vmselect/prometheus/export.qtpl:83 - qw422016.N().S(`{`) + bb := quicktemplate.AcquireByteBuffer() + +//line app/vmselect/prometheus/export.qtpl:84 + writeprometheusMetricName(bb, xb.mn) + //line app/vmselect/prometheus/export.qtpl:85 - tags := mn.Tags - + for i, ts := range xb.timestamps { //line app/vmselect/prometheus/export.qtpl:86 - qw422016.N().Z(tags[0].Key) + qw422016.N().Z(bb.B) //line app/vmselect/prometheus/export.qtpl:86 - qw422016.N().S(`=`) -//line app/vmselect/prometheus/export.qtpl:86 - qw422016.N().QZ(tags[0].Value) + qw422016.N().S(` `) //line app/vmselect/prometheus/export.qtpl:87 - tags = tags[1:] - + qw422016.N().F(xb.values[i]) +//line app/vmselect/prometheus/export.qtpl:87 + qw422016.N().S(` `) //line app/vmselect/prometheus/export.qtpl:88 - for i := range tags { + qw422016.N().DL(ts) +//line app/vmselect/prometheus/export.qtpl:88 + qw422016.N().S(` +`) //line app/vmselect/prometheus/export.qtpl:89 - tag := &tags[i] + } +//line app/vmselect/prometheus/export.qtpl:90 + quicktemplate.ReleaseByteBuffer(bb) -//line app/vmselect/prometheus/export.qtpl:89 - qw422016.N().S(`,`) -//line app/vmselect/prometheus/export.qtpl:90 - qw422016.N().Z(tag.Key) -//line app/vmselect/prometheus/export.qtpl:90 - qw422016.N().S(`=`) -//line app/vmselect/prometheus/export.qtpl:90 - qw422016.N().QZ(tag.Value) //line app/vmselect/prometheus/export.qtpl:91 - } +} + //line app/vmselect/prometheus/export.qtpl:91 - qw422016.N().S(`}`) +func WriteExportPrometheusLine(qq422016 qtio422016.Writer, xb *exportBlock) { +//line app/vmselect/prometheus/export.qtpl:91 + qw422016 := qt422016.AcquireWriter(qq422016) +//line app/vmselect/prometheus/export.qtpl:91 + StreamExportPrometheusLine(qw422016, xb) +//line app/vmselect/prometheus/export.qtpl:91 + qt422016.ReleaseWriter(qw422016) +//line app/vmselect/prometheus/export.qtpl:91 +} + +//line app/vmselect/prometheus/export.qtpl:91 +func ExportPrometheusLine(xb *exportBlock) string { +//line app/vmselect/prometheus/export.qtpl:91 + qb422016 := qt422016.AcquireByteBuffer() +//line app/vmselect/prometheus/export.qtpl:91 + WriteExportPrometheusLine(qb422016, xb) +//line app/vmselect/prometheus/export.qtpl:91 + qs422016 := string(qb422016.B) +//line app/vmselect/prometheus/export.qtpl:91 + qt422016.ReleaseByteBuffer(qb422016) +//line app/vmselect/prometheus/export.qtpl:91 + return qs422016 +//line app/vmselect/prometheus/export.qtpl:91 +} + //line app/vmselect/prometheus/export.qtpl:93 +func StreamExportJSONLine(qw422016 *qt422016.Writer, xb *exportBlock) { +//line app/vmselect/prometheus/export.qtpl:94 + if len(xb.timestamps) == 0 { +//line app/vmselect/prometheus/export.qtpl:94 + return +//line app/vmselect/prometheus/export.qtpl:94 } //line app/vmselect/prometheus/export.qtpl:94 + qw422016.N().S(`{"metric":`) +//line app/vmselect/prometheus/export.qtpl:96 + streammetricNameObject(qw422016, xb.mn) +//line app/vmselect/prometheus/export.qtpl:96 + qw422016.N().S(`,"values":[`) +//line app/vmselect/prometheus/export.qtpl:98 + if len(xb.values) > 0 { +//line app/vmselect/prometheus/export.qtpl:99 + values := xb.values + +//line app/vmselect/prometheus/export.qtpl:100 + qw422016.N().F(values[0]) +//line app/vmselect/prometheus/export.qtpl:101 + values = values[1:] + +//line app/vmselect/prometheus/export.qtpl:102 + for _, v := range values { +//line app/vmselect/prometheus/export.qtpl:102 + qw422016.N().S(`,`) +//line app/vmselect/prometheus/export.qtpl:103 + qw422016.N().F(v) +//line app/vmselect/prometheus/export.qtpl:104 + } +//line app/vmselect/prometheus/export.qtpl:105 + } +//line app/vmselect/prometheus/export.qtpl:105 + qw422016.N().S(`],"timestamps":[`) +//line app/vmselect/prometheus/export.qtpl:108 + if len(xb.timestamps) > 0 { +//line app/vmselect/prometheus/export.qtpl:109 + timestamps := xb.timestamps + +//line app/vmselect/prometheus/export.qtpl:110 + qw422016.N().DL(timestamps[0]) +//line app/vmselect/prometheus/export.qtpl:111 + timestamps = timestamps[1:] + +//line app/vmselect/prometheus/export.qtpl:112 + for _, ts := range timestamps { +//line app/vmselect/prometheus/export.qtpl:112 + qw422016.N().S(`,`) +//line app/vmselect/prometheus/export.qtpl:113 + qw422016.N().DL(ts) +//line app/vmselect/prometheus/export.qtpl:114 + } +//line app/vmselect/prometheus/export.qtpl:115 + } +//line app/vmselect/prometheus/export.qtpl:115 + qw422016.N().S(`]}`) +//line app/vmselect/prometheus/export.qtpl:117 + qw422016.N().S(` +`) +//line app/vmselect/prometheus/export.qtpl:118 } -//line app/vmselect/prometheus/export.qtpl:94 -func writeprometheusMetricName(qq422016 qtio422016.Writer, mn *storage.MetricName) { -//line app/vmselect/prometheus/export.qtpl:94 +//line app/vmselect/prometheus/export.qtpl:118 +func WriteExportJSONLine(qq422016 qtio422016.Writer, xb *exportBlock) { +//line app/vmselect/prometheus/export.qtpl:118 qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vmselect/prometheus/export.qtpl:94 - streamprometheusMetricName(qw422016, mn) -//line app/vmselect/prometheus/export.qtpl:94 +//line app/vmselect/prometheus/export.qtpl:118 + StreamExportJSONLine(qw422016, xb) +//line app/vmselect/prometheus/export.qtpl:118 qt422016.ReleaseWriter(qw422016) -//line app/vmselect/prometheus/export.qtpl:94 +//line app/vmselect/prometheus/export.qtpl:118 } -//line app/vmselect/prometheus/export.qtpl:94 -func prometheusMetricName(mn *storage.MetricName) string { -//line app/vmselect/prometheus/export.qtpl:94 +//line app/vmselect/prometheus/export.qtpl:118 +func ExportJSONLine(xb *exportBlock) string { +//line app/vmselect/prometheus/export.qtpl:118 qb422016 := qt422016.AcquireByteBuffer() -//line app/vmselect/prometheus/export.qtpl:94 - writeprometheusMetricName(qb422016, mn) -//line app/vmselect/prometheus/export.qtpl:94 +//line app/vmselect/prometheus/export.qtpl:118 + WriteExportJSONLine(qb422016, xb) +//line app/vmselect/prometheus/export.qtpl:118 qs422016 := string(qb422016.B) -//line app/vmselect/prometheus/export.qtpl:94 +//line app/vmselect/prometheus/export.qtpl:118 qt422016.ReleaseByteBuffer(qb422016) -//line app/vmselect/prometheus/export.qtpl:94 +//line app/vmselect/prometheus/export.qtpl:118 return qs422016 -//line app/vmselect/prometheus/export.qtpl:94 +//line app/vmselect/prometheus/export.qtpl:118 +} + +//line app/vmselect/prometheus/export.qtpl:120 +func StreamExportPromAPILine(qw422016 *qt422016.Writer, xb *exportBlock) { +//line app/vmselect/prometheus/export.qtpl:120 + qw422016.N().S(`{"metric":`) +//line app/vmselect/prometheus/export.qtpl:122 + streammetricNameObject(qw422016, xb.mn) +//line app/vmselect/prometheus/export.qtpl:122 + qw422016.N().S(`,"values":`) +//line app/vmselect/prometheus/export.qtpl:123 + streamvaluesWithTimestamps(qw422016, xb.values, xb.timestamps) +//line app/vmselect/prometheus/export.qtpl:123 + qw422016.N().S(`}`) +//line app/vmselect/prometheus/export.qtpl:125 +} + +//line app/vmselect/prometheus/export.qtpl:125 +func WriteExportPromAPILine(qq422016 qtio422016.Writer, xb *exportBlock) { +//line app/vmselect/prometheus/export.qtpl:125 + qw422016 := qt422016.AcquireWriter(qq422016) +//line app/vmselect/prometheus/export.qtpl:125 + StreamExportPromAPILine(qw422016, xb) +//line app/vmselect/prometheus/export.qtpl:125 + qt422016.ReleaseWriter(qw422016) +//line app/vmselect/prometheus/export.qtpl:125 +} + +//line app/vmselect/prometheus/export.qtpl:125 +func ExportPromAPILine(xb *exportBlock) string { +//line app/vmselect/prometheus/export.qtpl:125 + qb422016 := qt422016.AcquireByteBuffer() +//line app/vmselect/prometheus/export.qtpl:125 + WriteExportPromAPILine(qb422016, xb) +//line app/vmselect/prometheus/export.qtpl:125 + qs422016 := string(qb422016.B) +//line app/vmselect/prometheus/export.qtpl:125 + qt422016.ReleaseByteBuffer(qb422016) +//line app/vmselect/prometheus/export.qtpl:125 + return qs422016 +//line app/vmselect/prometheus/export.qtpl:125 +} + +//line app/vmselect/prometheus/export.qtpl:127 +func StreamExportPromAPIResponse(qw422016 *qt422016.Writer, resultsCh <-chan *quicktemplate.ByteBuffer) { +//line app/vmselect/prometheus/export.qtpl:127 + qw422016.N().S(`{"status":"success","data":{"resultType":"matrix","result":[`) +//line app/vmselect/prometheus/export.qtpl:133 + bb, ok := <-resultsCh + +//line app/vmselect/prometheus/export.qtpl:134 + if ok { +//line app/vmselect/prometheus/export.qtpl:135 + qw422016.N().Z(bb.B) +//line app/vmselect/prometheus/export.qtpl:136 + quicktemplate.ReleaseByteBuffer(bb) + +//line app/vmselect/prometheus/export.qtpl:137 + for bb := range resultsCh { +//line app/vmselect/prometheus/export.qtpl:137 + qw422016.N().S(`,`) +//line app/vmselect/prometheus/export.qtpl:138 + qw422016.N().Z(bb.B) +//line app/vmselect/prometheus/export.qtpl:139 + quicktemplate.ReleaseByteBuffer(bb) + +//line app/vmselect/prometheus/export.qtpl:140 + } +//line app/vmselect/prometheus/export.qtpl:141 + } +//line app/vmselect/prometheus/export.qtpl:141 + qw422016.N().S(`]}}`) +//line app/vmselect/prometheus/export.qtpl:145 +} + +//line app/vmselect/prometheus/export.qtpl:145 +func WriteExportPromAPIResponse(qq422016 qtio422016.Writer, resultsCh <-chan *quicktemplate.ByteBuffer) { +//line app/vmselect/prometheus/export.qtpl:145 + qw422016 := qt422016.AcquireWriter(qq422016) +//line app/vmselect/prometheus/export.qtpl:145 + StreamExportPromAPIResponse(qw422016, resultsCh) +//line app/vmselect/prometheus/export.qtpl:145 + qt422016.ReleaseWriter(qw422016) +//line app/vmselect/prometheus/export.qtpl:145 +} + +//line app/vmselect/prometheus/export.qtpl:145 +func ExportPromAPIResponse(resultsCh <-chan *quicktemplate.ByteBuffer) string { +//line app/vmselect/prometheus/export.qtpl:145 + qb422016 := qt422016.AcquireByteBuffer() +//line app/vmselect/prometheus/export.qtpl:145 + WriteExportPromAPIResponse(qb422016, resultsCh) +//line app/vmselect/prometheus/export.qtpl:145 + qs422016 := string(qb422016.B) +//line app/vmselect/prometheus/export.qtpl:145 + qt422016.ReleaseByteBuffer(qb422016) +//line app/vmselect/prometheus/export.qtpl:145 + return qs422016 +//line app/vmselect/prometheus/export.qtpl:145 +} + +//line app/vmselect/prometheus/export.qtpl:147 +func StreamExportStdResponse(qw422016 *qt422016.Writer, resultsCh <-chan *quicktemplate.ByteBuffer) { +//line app/vmselect/prometheus/export.qtpl:148 + for bb := range resultsCh { +//line app/vmselect/prometheus/export.qtpl:149 + qw422016.N().Z(bb.B) +//line app/vmselect/prometheus/export.qtpl:150 + quicktemplate.ReleaseByteBuffer(bb) + +//line app/vmselect/prometheus/export.qtpl:151 + } +//line app/vmselect/prometheus/export.qtpl:152 +} + +//line app/vmselect/prometheus/export.qtpl:152 +func WriteExportStdResponse(qq422016 qtio422016.Writer, resultsCh <-chan *quicktemplate.ByteBuffer) { +//line app/vmselect/prometheus/export.qtpl:152 + qw422016 := qt422016.AcquireWriter(qq422016) +//line app/vmselect/prometheus/export.qtpl:152 + StreamExportStdResponse(qw422016, resultsCh) +//line app/vmselect/prometheus/export.qtpl:152 + qt422016.ReleaseWriter(qw422016) +//line app/vmselect/prometheus/export.qtpl:152 +} + +//line app/vmselect/prometheus/export.qtpl:152 +func ExportStdResponse(resultsCh <-chan *quicktemplate.ByteBuffer) string { +//line app/vmselect/prometheus/export.qtpl:152 + qb422016 := qt422016.AcquireByteBuffer() +//line app/vmselect/prometheus/export.qtpl:152 + WriteExportStdResponse(qb422016, resultsCh) +//line app/vmselect/prometheus/export.qtpl:152 + qs422016 := string(qb422016.B) +//line app/vmselect/prometheus/export.qtpl:152 + qt422016.ReleaseByteBuffer(qb422016) +//line app/vmselect/prometheus/export.qtpl:152 + return qs422016 +//line app/vmselect/prometheus/export.qtpl:152 +} + +//line app/vmselect/prometheus/export.qtpl:154 +func streamprometheusMetricName(qw422016 *qt422016.Writer, mn *storage.MetricName) { +//line app/vmselect/prometheus/export.qtpl:155 + qw422016.N().Z(mn.MetricGroup) +//line app/vmselect/prometheus/export.qtpl:156 + if len(mn.Tags) > 0 { +//line app/vmselect/prometheus/export.qtpl:156 + qw422016.N().S(`{`) +//line app/vmselect/prometheus/export.qtpl:158 + tags := mn.Tags + +//line app/vmselect/prometheus/export.qtpl:159 + qw422016.N().Z(tags[0].Key) +//line app/vmselect/prometheus/export.qtpl:159 + qw422016.N().S(`=`) +//line app/vmselect/prometheus/export.qtpl:159 + qw422016.N().QZ(tags[0].Value) +//line app/vmselect/prometheus/export.qtpl:160 + tags = tags[1:] + +//line app/vmselect/prometheus/export.qtpl:161 + for i := range tags { +//line app/vmselect/prometheus/export.qtpl:162 + tag := &tags[i] + +//line app/vmselect/prometheus/export.qtpl:162 + qw422016.N().S(`,`) +//line app/vmselect/prometheus/export.qtpl:163 + qw422016.N().Z(tag.Key) +//line app/vmselect/prometheus/export.qtpl:163 + qw422016.N().S(`=`) +//line app/vmselect/prometheus/export.qtpl:163 + qw422016.N().QZ(tag.Value) +//line app/vmselect/prometheus/export.qtpl:164 + } +//line app/vmselect/prometheus/export.qtpl:164 + qw422016.N().S(`}`) +//line app/vmselect/prometheus/export.qtpl:166 + } +//line app/vmselect/prometheus/export.qtpl:167 +} + +//line app/vmselect/prometheus/export.qtpl:167 +func writeprometheusMetricName(qq422016 qtio422016.Writer, mn *storage.MetricName) { +//line app/vmselect/prometheus/export.qtpl:167 + qw422016 := qt422016.AcquireWriter(qq422016) +//line app/vmselect/prometheus/export.qtpl:167 + streamprometheusMetricName(qw422016, mn) +//line app/vmselect/prometheus/export.qtpl:167 + qt422016.ReleaseWriter(qw422016) +//line app/vmselect/prometheus/export.qtpl:167 +} + +//line app/vmselect/prometheus/export.qtpl:167 +func prometheusMetricName(mn *storage.MetricName) string { +//line app/vmselect/prometheus/export.qtpl:167 + qb422016 := qt422016.AcquireByteBuffer() +//line app/vmselect/prometheus/export.qtpl:167 + writeprometheusMetricName(qb422016, mn) +//line app/vmselect/prometheus/export.qtpl:167 + qs422016 := string(qb422016.B) +//line app/vmselect/prometheus/export.qtpl:167 + qt422016.ReleaseByteBuffer(qb422016) +//line app/vmselect/prometheus/export.qtpl:167 + return qs422016 +//line app/vmselect/prometheus/export.qtpl:167 } diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index 6f7714c2ee..db1f89e764 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -8,6 +8,7 @@ import ( "runtime" "sort" "strconv" + "strings" "sync" "time" @@ -112,6 +113,90 @@ func FederateHandler(startTime time.Time, w http.ResponseWriter, r *http.Request var federateDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/federate"}`) +// ExportCSVHandler exports data in CSV format from /api/v1/export/csv +func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error { + ct := startTime.UnixNano() / 1e6 + if err := r.ParseForm(); err != nil { + return fmt.Errorf("cannot parse request form values: %w", err) + } + format := r.FormValue("format") + if len(format) == 0 { + return fmt.Errorf("missing `format` arg; see https://victoriametrics.github.io/#how-to-export-csv-data") + } + fieldNames := strings.Split(format, ",") + matches := r.Form["match[]"] + if len(matches) == 0 { + // Maintain backwards compatibility + match := r.FormValue("match") + if len(match) == 0 { + return fmt.Errorf("missing `match[]` arg") + } + matches = []string{match} + } + start, err := searchutils.GetTime(r, "start", 0) + if err != nil { + return err + } + end, err := searchutils.GetTime(r, "end", ct) + if err != nil { + return err + } + deadline := searchutils.GetDeadlineForExport(r, startTime) + tagFilterss, err := getTagFilterssFromMatches(matches) + if err != nil { + return err + } + sq := &storage.SearchQuery{ + MinTimestamp: start, + MaxTimestamp: end, + TagFilterss: tagFilterss, + } + w.Header().Set("Content-Type", "text/csv") + bw := bufferedwriter.Get(w) + defer bufferedwriter.Put(bw) + + resultsCh := make(chan *quicktemplate.ByteBuffer, runtime.GOMAXPROCS(-1)) + doneCh := make(chan error) + go func() { + err := netstorage.ExportBlocks(sq, deadline, func(mn *storage.MetricName, b *storage.Block, tr storage.TimeRange) error { + if err := bw.Error(); err != nil { + return err + } + if err := b.UnmarshalData(); err != nil { + return fmt.Errorf("cannot unmarshal block during export: %s", err) + } + xb := exportBlockPool.Get().(*exportBlock) + xb.mn = mn + xb.timestamps, xb.values = b.AppendRowsWithTimeRangeFilter(xb.timestamps[:0], xb.values[:0], tr) + if len(xb.timestamps) > 0 { + bb := quicktemplate.AcquireByteBuffer() + WriteExportCSVLine(bb, xb, fieldNames) + resultsCh <- bb + } + xb.reset() + exportBlockPool.Put(xb) + return nil + }) + close(resultsCh) + doneCh <- err + }() + for bb := range resultsCh { + bw.Write(bb.B) + quicktemplate.ReleaseByteBuffer(bb) + } + if err := bw.Flush(); err != nil { + return err + } + err = <-doneCh + if err != nil { + return fmt.Errorf("error during exporting data to csv: %w", err) + } + exportCSVDuration.UpdateDuration(startTime) + return nil +} + +var exportCSVDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/export/csv"}`) + // ExportNativeHandler exports data in native format from /api/v1/export/native. func ExportNativeHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error { ct := startTime.UnixNano() / 1e6 diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index b32a5ad7f1..7e379bfdbf 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -118,6 +118,7 @@ See [features available for enterprise customers](https://github.com/VictoriaMet * [How to export time series](#how-to-export-time-series) * [How to export data in native format](#how-to-export-data-in-native-format) * [How to export data in JSON line format](#how-to-export-data-in-json-line-format) + * [How to export CSV data](#how-to-export-csv-data) * [How to import time series data](#how-to-import-time-series-data) * [How to import data in native format](#how-to-import-data-in-native-format) * [How to import data in json line format](#how-to-import-data-in-json-line-format) @@ -683,6 +684,7 @@ VictoriaMetrics provides the following handlers for exporting data: * `/api/v1/export/native` for exporting data in native binary format. This is the most efficient format for data export. See [these docs](#how-to-export-data-in-native-format) for details. * `/api/v1/export` for exporing data in JSON line format. See [these docs](#how-to-export-data-in-json-line-format) for details. +* `/api/v1/export/csv` for exporting data in CSV. See [these docs](#how-to-export-csv-data) for details. #### How to export data in native format @@ -732,6 +734,30 @@ The maximum duration for each request to `/api/v1/export` is limited by `-search Exported data can be imported via POST'ing it to [/api/v1/import](#how-to-import-data-in-json-line-format). +#### How to export CSV data + +Send a request to `http://<victoriametrics-addr>:8428/api/v1/export/csv?format=<format>&match=<timeseries_selector_for_export>`, +where: + +* `<format>` must contain comma-delimited label names for the exported CSV. The following special label names are supported: + * `__name__` - metric name + * `__value__` - sample value + * `__timestamp__:<ts_format>` - sample timestamp. `<ts_format>` can have the following values: + * `unix_s` - unix seconds + * `unix_ms` - unix milliseconds + * `unix_ns` - unix nanoseconds + * `rfc3339` - [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) time + * `custom:<layout>` - custom layout for time that is supported by [time.Format](https://golang.org/pkg/time/#Time.Format) function from Go. + +* `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) +for metrics to export. + +Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either +unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values. + +The exported CSV data can be imported to VictoriaMetrics via [/api/v1/import/csv](#how-to-import-csv-data). + + ### How to import time series data Time series data can be imported via any supported ingestion protocol: From 1e274202431cc9b6199b8c33bd41cc2617560394 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Tue, 13 Oct 2020 09:35:13 +0300 Subject: [PATCH 17/24] app/vmselect/prometheus: fix golangci-lint warning --- app/vmselect/prometheus/prometheus.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index db1f89e764..f5eebc0353 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -180,8 +180,10 @@ func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Reques close(resultsCh) doneCh <- err }() + // Consume all the data from resultsCh. for bb := range resultsCh { - bw.Write(bb.B) + // Do not check for error in bw.Write, since this error is checked inside netstorage.ExportBlocks above. + _, _ = bw.Write(bb.B) quicktemplate.ReleaseByteBuffer(bb) } if err := bw.Flush(); err != nil { From d8af290947cd488f99f74737b70929b2a1a20c51 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Tue, 13 Oct 2020 11:56:53 +0300 Subject: [PATCH 18/24] app/vmselect/promql: fix `mode_over_time` calculations Previously `mode_over_time` could return garbage due to improper shuffling of input data points. --- CHANGELOG.md | 1 + app/vmselect/promql/aggr.go | 2 ++ app/vmselect/promql/rollup.go | 18 +++++++++++++++++- app/vmselect/promql/rollup_test.go | 6 +++--- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf37cfcad0..d2f92b3804 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/674 * BUGFIX: vmalert: accept days, weeks and years in `for: ` part of config like Prometheus does. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/817 +* BUGFIX: fix `mode_over_time(m[d])` calculations. Previously the function could return incorrect results. # [v1.43.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.43.0) diff --git a/app/vmselect/promql/aggr.go b/app/vmselect/promql/aggr.go index e38b8dd77a..b87b34a64a 100644 --- a/app/vmselect/promql/aggr.go +++ b/app/vmselect/promql/aggr.go @@ -494,6 +494,8 @@ func aggrFuncZScore(afa *aggrFuncArg) ([]*timeseries, error) { // // It is expected that a doesn't contain NaNs. // +// The function modifies contents for a, so the caller must prepare it accordingly. +// // See https://en.wikipedia.org/wiki/Mode_(statistics) func modeNoNaNs(prevValue float64, a []float64) float64 { if len(a) == 0 { diff --git a/app/vmselect/promql/rollup.go b/app/vmselect/promql/rollup.go index 3c34d43013..56d26e2c53 100644 --- a/app/vmselect/promql/rollup.go +++ b/app/vmselect/promql/rollup.go @@ -1587,7 +1587,23 @@ func rollupTimestamp(rfa *rollupFuncArg) float64 { func rollupModeOverTime(rfa *rollupFuncArg) float64 { // There is no need in handling NaNs here, since they must be cleaned up // before calling rollup funcs. - return modeNoNaNs(rfa.prevValue, rfa.values) + + // Copy rfa.values to a.A, since modeNoNaNs modifies a.A contents. + a := float64sPool.Get().(*float64s) + a.A = append(a.A[:0], rfa.values...) + result := modeNoNaNs(rfa.prevValue, a.A) + float64sPool.Put(a) + return result +} + +var float64sPool = &sync.Pool{ + New: func() interface{} { + return &float64s{} + }, +} + +type float64s struct { + A []float64 } func rollupAscentOverTime(rfa *rollupFuncArg) float64 { diff --git a/app/vmselect/promql/rollup_test.go b/app/vmselect/promql/rollup_test.go index d698d8c8eb..b66136317d 100644 --- a/app/vmselect/promql/rollup_test.go +++ b/app/vmselect/promql/rollup_test.go @@ -1012,7 +1012,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, nan, 34, 44, 44} + valuesExpected := []float64{nan, 21, 34, 34, 34} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -1026,7 +1026,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 1262.5, 3187.5, 4059.523809523809, 6200} + valuesExpected := []float64{nan, 2775, 5262.5, 3678.5714285714284, 2880} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -1040,7 +1040,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 0.9397878236968458, 1.1969836716333457, 2.3112921116373175, nan} + valuesExpected := []float64{nan, -0.86650328627136, -1.1200838283548589, -0.40035755084856683, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) From 9aa3b6576648ca23dba1c11ef213e91123203399 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Tue, 13 Oct 2020 12:07:59 +0300 Subject: [PATCH 19/24] app/vmselect/promql: improve time series staleness detection This should prevent from double counting for time series at the time when it changes label. The most common case is in K8S, which changes pod uid label with each new deployment. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/748 --- CHANGELOG.md | 2 ++ app/vmselect/promql/exec_test.go | 4 +-- app/vmselect/promql/rollup.go | 11 ++++++- app/vmselect/promql/rollup_test.go | 48 +++++++++++++++--------------- 4 files changed, 38 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2f92b3804..b645b6b446 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,8 @@ * `predict_linear` See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/674 +* BUGFIX: properly handle stale time series after K8S deployment. Previously such time series could be double-counted. + See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/748 * BUGFIX: vmalert: accept days, weeks and years in `for: ` part of config like Prometheus does. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/817 * BUGFIX: fix `mode_over_time(m[d])` calculations. Previously the function could return incorrect results. diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index cf4e1c6e3b..9104656e6e 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -4453,7 +4453,7 @@ func TestExecSuccess(t *testing.T) { q := `distinct_over_time((time() < 1700)[500s])` r1 := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{3, 3, 3, 3, 2, 1}, + Values: []float64{3, 3, 3, 3, nan, nan}, Timestamps: timestampsExpected, } resultExpected := []netstorage.Result{r1} @@ -4464,7 +4464,7 @@ func TestExecSuccess(t *testing.T) { q := `distinct_over_time((time() < 1700)[2.5i])` r1 := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{3, 3, 3, 3, 2, 1}, + Values: []float64{3, 3, 3, 3, nan, nan}, Timestamps: timestampsExpected, } resultExpected := []netstorage.Result{r1} diff --git a/app/vmselect/promql/rollup.go b/app/vmselect/promql/rollup.go index 56d26e2c53..a9718f008c 100644 --- a/app/vmselect/promql/rollup.go +++ b/app/vmselect/promql/rollup.go @@ -500,6 +500,7 @@ func (rc *rollupConfig) doInternal(dstValues []float64, tsm *timeseriesMap, valu j := 0 ni := 0 nj := 0 + stalenessInterval := int64(float64(scrapeInterval) * 0.9) for _, tEnd := range rc.Timestamps { tStart := tEnd - window ni = seekFirstTimestampIdxAfter(timestamps[i:], tStart, ni) @@ -516,9 +517,17 @@ func (rc *rollupConfig) doInternal(dstValues []float64, tsm *timeseriesMap, valu rfa.prevValue = values[i-1] rfa.prevTimestamp = timestamps[i-1] } - rfa.values = values[i:j] rfa.timestamps = timestamps[i:j] + if j == len(timestamps) && i < j && tEnd-timestamps[j-1] > stalenessInterval { + // Do not take into account the last data point in time series if the distance between this data point + // and tEnd exceeds stalenessInterval. + // This should prevent from double counting when a label changes in time series (for instance, + // during new deployment in K8S). See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/748 + rfa.prevValue = nan + rfa.values = nil + rfa.timestamps = nil + } rfa.currTimestamp = tEnd value := rc.Func(rfa) rfa.idx++ diff --git a/app/vmselect/promql/rollup_test.go b/app/vmselect/promql/rollup_test.go index b66136317d..fb7083d809 100644 --- a/app/vmselect/promql/rollup_test.go +++ b/app/vmselect/promql/rollup_test.go @@ -583,7 +583,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, nan, 123, 34, 32} + valuesExpected := []float64{nan, nan, 123, 34, nan} timestampsExpected := []int64{-50, 0, 50, 100, 150} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -690,7 +690,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 123, 54, 44, 34} + valuesExpected := []float64{nan, 123, 54, 44, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -704,7 +704,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 4, 4, 3, 1} + valuesExpected := []float64{nan, 4, 4, 3, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -718,7 +718,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 21, 12, 32, 34} + valuesExpected := []float64{nan, 21, 12, 32, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -732,7 +732,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 123, 99, 44, 34} + valuesExpected := []float64{nan, 123, 99, 44, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -746,7 +746,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 222, 199, 110, 34} + valuesExpected := []float64{nan, 222, 199, 110, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -760,7 +760,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, nan, -9, 22, 0} + valuesExpected := []float64{nan, nan, -9, 22, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -788,7 +788,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 0.004, 0, 0, 0.03} + valuesExpected := []float64{nan, 0.004, 0, 0, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -802,7 +802,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 0.031, 0.044, 0.04, 0.01} + valuesExpected := []float64{nan, 0.031, 0.044, 0.04, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -816,7 +816,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 0.031, 0.075, 0.115, 0.125} + valuesExpected := []float64{nan, 0.031, 0.075, 0.115, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -830,7 +830,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 0.010333333333333333, 0.011, 0.013333333333333334, 0.01} + valuesExpected := []float64{nan, 0.010333333333333333, 0.011, 0.013333333333333334, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -844,7 +844,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 0.010333333333333333, 0.010714285714285714, 0.012, 0.0125} + valuesExpected := []float64{nan, 0.010333333333333333, 0.010714285714285714, 0.012, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -858,7 +858,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 4, 4, 3, 0} + valuesExpected := []float64{nan, 4, 4, 3, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -886,7 +886,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 2, 2, 1, 0} + valuesExpected := []float64{nan, 2, 2, 1, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -900,7 +900,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 55.5, 49.75, 36.666666666666664, 34} + valuesExpected := []float64{nan, 55.5, 49.75, 36.666666666666664, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -914,7 +914,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{0, -2879.310344827587, 558.0608793686595, 422.84569138276544, 0} + valuesExpected := []float64{0, -2879.310344827587, 558.0608793686595, 422.84569138276544, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -942,7 +942,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, -1916.6666666666665, -43500, 400, 0} + valuesExpected := []float64{nan, -1916.6666666666665, -43500, 400, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -956,7 +956,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 39.81519810323691, 32.080952292598795, 5.2493385826745405, 5.830951894845301} + valuesExpected := []float64{nan, 39.81519810323691, 32.080952292598795, 5.2493385826745405, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -970,7 +970,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 2.148, 1.593, 1.156, 1.36} + valuesExpected := []float64{nan, 2.148, 1.593, 1.156, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -984,7 +984,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 4, 4, 3, 1} + valuesExpected := []float64{nan, 4, 4, 3, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -998,7 +998,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 4, 7, 6, 3} + valuesExpected := []float64{nan, 4, 7, 6, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -1012,7 +1012,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 21, 34, 34, 34} + valuesExpected := []float64{nan, 21, 34, 34, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -1026,7 +1026,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{nan, 2775, 5262.5, 3678.5714285714284, 2880} + valuesExpected := []float64{nan, 2775, 5262.5, 3678.5714285714284, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) @@ -1062,7 +1062,7 @@ func TestRollupBigNumberOfValues(t *testing.T) { srcTimestamps[i] = int64(i / 2) } values := rc.Do(nil, srcValues, srcTimestamps) - valuesExpected := []float64{1, 4001, 8001, 9999, nan, nan} + valuesExpected := []float64{1, 4001, 8001, nan, nan, nan} timestampsExpected := []int64{0, 2000, 4000, 6000, 8000, 10000} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) } From a2b9476897ce241ed090040ea2e952050cf10a5c Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Tue, 13 Oct 2020 15:53:31 +0300 Subject: [PATCH 20/24] app/vmselect/promql: return a single time series at max from `absent()` function like Prometheus does --- CHANGELOG.md | 1 + app/vmselect/promql/exec_test.go | 4 ---- app/vmselect/promql/transform.go | 25 +++++++++++++------------ 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b645b6b446..aba39bad1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ * BUGFIX: properly handle stale time series after K8S deployment. Previously such time series could be double-counted. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/748 +* BUGFIX: return a single time series at max from `absent()` function like Prometheus does. * BUGFIX: vmalert: accept days, weeks and years in `for: ` part of config like Prometheus does. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/817 * BUGFIX: fix `mode_over_time(m[d])` calculations. Previously the function could return incorrect results. diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 9104656e6e..3c5c5501dd 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -582,10 +582,6 @@ func TestExecSuccess(t *testing.T) { Values: []float64{1, 1, 1, 1, 1, 1}, Timestamps: timestampsExpected, } - r.MetricName.Tags = []storage.Tag{{ - Key: []byte("yy"), - Value: []byte("foo"), - }} resultExpected := []netstorage.Result{r} f(q, resultExpected) }) diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index bc39a112f0..660f8bb8e9 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -167,23 +167,24 @@ func transformAbsent(tfa *transformFuncArg) ([]*timeseries, error) { if err := expectTransformArgsNum(args, 1); err != nil { return nil, err } - arg := args[0] - if len(arg) == 0 { - rvs := getAbsentTimeseries(tfa.ec, tfa.fe.Args[0]) + tss := args[0] + rvs := getAbsentTimeseries(tfa.ec, tfa.fe.Args[0]) + if len(tss) == 0 { return rvs, nil } - for _, ts := range arg { - ts.MetricName.ResetMetricGroup() - for i, v := range ts.Values { - if !math.IsNaN(v) { - v = nan - } else { - v = 1 + for i := range tss[0].Values { + isAbsent := true + for _, ts := range tss { + if !math.IsNaN(ts.Values[i]) { + isAbsent = false + break } - ts.Values[i] = v + } + if !isAbsent { + rvs[0].Values[i] = nan } } - return arg, nil + return rvs, nil } func getAbsentTimeseries(ec *EvalConfig, arg metricsql.Expr) []*timeseries { From 8e20bc7b53312d9078a0576d7443437d3dbbae74 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Tue, 13 Oct 2020 16:47:19 +0300 Subject: [PATCH 21/24] docs/Cluster-VictoriaMetrics.md: clarify RAM requirements for `vmstorage` nodes --- docs/Cluster-VictoriaMetrics.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md index 9705548da4..8c7668dec0 100644 --- a/docs/Cluster-VictoriaMetrics.md +++ b/docs/Cluster-VictoriaMetrics.md @@ -192,6 +192,7 @@ or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/gr - `federate` - returns [federated metrics](https://prometheus.io/docs/prometheus/latest/federation/). - `api/v1/export` - exports raw data in JSON line format. See [this article](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639) for details. - `api/v1/export/native` - exports raw data in native binary format. It may be imported into another VictoriaMetrics via `api/v1/import/native` (see above). + - `api/v1/export/csv` - exports data in CSV. It may be imported into another VictoriaMetrics via `api/v1/import/csv` (see above). - `api/v1/status/tsdb` - for time series stats. See [these docs](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats) for details. - `api/v1/status/active_queries` - for currently executed active queries. Note that every `vmselect` maintains an independent list of active queries, which is returned in the response. @@ -279,6 +280,12 @@ Each instance type - `vminsert`, `vmselect` and `vmstorage` - can run on the mos * The recommended total number of vCPU cores for all the `vmstorage` instances can be calculated from the ingestion rate: `vCPUs = ingestion_rate / 150K`. * The recommended total amount of RAM for all the `vmstorage` instances can be calculated from the number of active time series: `RAM = active_time_series * 1KB`. Time series is active if it received at least a single data point during the last hour or if it has been queried during the last hour. + The required RAM per each `vmstorage` should be multiplied by `-replicationFactor` if [replication](#replication-and-data-safety) is enabled. + Additional RAM can be required for query processing. + Calculated RAM requrements may differ from actual RAM requirements due to various factors: + * The average number of labels per time series. More labels require more RAM. + * The average length of label names and label values. Longer labels require more RAM. + * The type of queries. Heavy queries that scan big number of time series over long time ranges require more RAM. * The recommended total amount of storage space for all the `vmstorage` instances can be calculated from the ingestion rate and retention: `storage_space = ingestion_rate * retention_seconds`. From 94978af9bcf76c73041d08aeda9d6693b8446e65 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Tue, 13 Oct 2020 16:59:33 +0300 Subject: [PATCH 22/24] CHANGELOG.md: cut v1.44.0 release --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aba39bad1f..8115671c47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # tip + +# [v1.44.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.44.0) + * FEATURE: automatically add missing label filters to binary operands as described at https://utcc.utoronto.ca/~cks/space/blog/sysadmin/PrometheusLabelNonOptimization . This should improve performance for queries with missing label filters in binary operands. For example, the following query should work faster now, because it shouldn't fetch and discard time series for `node_filesystem_files_free` metric without matching labels for the left side of the expression: From bc42b5598f0468cca7a39ab79a07021973c46003 Mon Sep 17 00:00:00 2001 From: Roman Khavronenko <hagen1778@gmail.com> Date: Tue, 13 Oct 2020 18:32:43 +0300 Subject: [PATCH 23/24] vmalert: update docs to highlight the state restore requirements; (#833) Address https://github.com/VictoriaMetrics/VictoriaMetrics/issues/830 --- app/vmalert/README.md | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/app/vmalert/README.md b/app/vmalert/README.md index 77d01a160b..6601c5b9c7 100644 --- a/app/vmalert/README.md +++ b/app/vmalert/README.md @@ -11,6 +11,7 @@ rules against configured address. * Prometheus [alerting rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/#defining-alerting-rules) support; * Integration with [Alertmanager](https://github.com/prometheus/alertmanager); +* Keeps the alerts [state on restarts](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/app/vmalert#alerts-state-on-restarts); * Lightweight without extra dependencies. ### Limitations: @@ -121,14 +122,6 @@ annotations: [ <labelname>: <tmpl_string> ] ``` -`vmalert` has no local storage and alerts state is stored in process memory. Hence, after reloading of `vmalert` process -alerts state will be lost. To avoid this situation, `vmalert` may be configured via following flags: -* `-remoteWrite.url` - URL to Victoria Metrics or VMInsert. `vmalert` will persist alerts state into the configured -address in form of timeseries with name `ALERTS` via remote-write protocol. -* `-remoteRead.url` - URL to Victoria Metrics or VMSelect. `vmalert` will try to restore alerts state from configured -address by querying `ALERTS` timeseries. - - ##### Recording rules The syntax for recording rules is following: @@ -147,6 +140,22 @@ labels: For recording rules to work `-remoteWrite.url` must specified. +#### Alerts state on restarts + +`vmalert` has no local storage, so alerts state is stored in the process memory. Hence, after reloading of `vmalert` +the process alerts state will be lost. To avoid this situation, `vmalert` should be configured via the following flags: +* `-remoteWrite.url` - URL to VictoriaMetrics (Single) or VMInsert (Cluster). `vmalert` will persist alerts state +into the configured address in the form of time series named `ALERTS` and `ALERTS_FOR_STATE` via remote-write protocol. +These are regular time series and may be queried from VM just as any other time series. +The state stored to the configured address on every rule evaluation. +* `-remoteRead.url` - URL to VictoriaMetrics (Single) or VMSelect (Cluster). `vmalert` will try to restore alerts state +from configured address by querying time series with name `ALERTS_FOR_STATE`. + +Both flags are required for the proper state restoring. Restore process may fail if time series are missing +in configured `-remoteRead.url`, weren't updated in the last `1h` or received state doesn't match current `vmalert` +rules configuration. + + #### WEB `vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints: From 590d8d537fa0efcd2059c75f34ab1f6b6f1e3ff1 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Tue, 13 Oct 2020 18:34:25 +0300 Subject: [PATCH 24/24] docs/vmalert.md: `make docs-sync` --- docs/vmalert.md | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/docs/vmalert.md b/docs/vmalert.md index 77d01a160b..6601c5b9c7 100644 --- a/docs/vmalert.md +++ b/docs/vmalert.md @@ -11,6 +11,7 @@ rules against configured address. * Prometheus [alerting rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/#defining-alerting-rules) support; * Integration with [Alertmanager](https://github.com/prometheus/alertmanager); +* Keeps the alerts [state on restarts](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/app/vmalert#alerts-state-on-restarts); * Lightweight without extra dependencies. ### Limitations: @@ -121,14 +122,6 @@ annotations: [ <labelname>: <tmpl_string> ] ``` -`vmalert` has no local storage and alerts state is stored in process memory. Hence, after reloading of `vmalert` process -alerts state will be lost. To avoid this situation, `vmalert` may be configured via following flags: -* `-remoteWrite.url` - URL to Victoria Metrics or VMInsert. `vmalert` will persist alerts state into the configured -address in form of timeseries with name `ALERTS` via remote-write protocol. -* `-remoteRead.url` - URL to Victoria Metrics or VMSelect. `vmalert` will try to restore alerts state from configured -address by querying `ALERTS` timeseries. - - ##### Recording rules The syntax for recording rules is following: @@ -147,6 +140,22 @@ labels: For recording rules to work `-remoteWrite.url` must specified. +#### Alerts state on restarts + +`vmalert` has no local storage, so alerts state is stored in the process memory. Hence, after reloading of `vmalert` +the process alerts state will be lost. To avoid this situation, `vmalert` should be configured via the following flags: +* `-remoteWrite.url` - URL to VictoriaMetrics (Single) or VMInsert (Cluster). `vmalert` will persist alerts state +into the configured address in the form of time series named `ALERTS` and `ALERTS_FOR_STATE` via remote-write protocol. +These are regular time series and may be queried from VM just as any other time series. +The state stored to the configured address on every rule evaluation. +* `-remoteRead.url` - URL to VictoriaMetrics (Single) or VMSelect (Cluster). `vmalert` will try to restore alerts state +from configured address by querying time series with name `ALERTS_FOR_STATE`. + +Both flags are required for the proper state restoring. Restore process may fail if time series are missing +in configured `-remoteRead.url`, weren't updated in the last `1h` or received state doesn't match current `vmalert` +rules configuration. + + #### WEB `vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints: