Adds ready probe (#874)

* adds leading forward slash check for scrapeURL path
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/835

* adds ready probe for scrape config initialization,
it should prevent metrics loss during vmagent rolling update,
/ready api will return 425 http code, if some scrape config still waits for initialization.

* updates docs

* Update app/vmagent/README.md

* renames var

* Update app/vmagent/README.md

Co-authored-by: Aliaksandr Valialkin <valyala@gmail.com>
This commit is contained in:
Nikolay 2020-11-04 21:29:18 +03:00 committed by Aliaksandr Valialkin
parent d8a7186019
commit 5b235b902b
4 changed files with 27 additions and 1 deletions

View file

@ -219,6 +219,8 @@ It accepts optional `show_original_labels=1` query arg, which shows the original
This information may be useful for debugging target relabeling.
* `http://vmagent-host:8429/api/v1/targets`. This handler returns data compatible with [the corresponding page from Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#targets).
* `http://vmagent-host:8429/ready` - this handler returns http 200 status code when `vmagent` finishes initialization for all service_discovery configs.
It may be useful, when you have many entries at `-promscrape.config` and want to perform `vmagent` rolling update without scrape loss.
### Troubleshooting

View file

@ -7,6 +7,7 @@ import (
"os"
"strconv"
"strings"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/csvimport"
@ -222,6 +223,16 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
procutil.SelfSIGHUP()
w.WriteHeader(http.StatusOK)
return true
case "/ready":
if rdy := atomic.LoadInt32(&promscrape.PendingScrapeConfigs); rdy > 0 {
errMsg := fmt.Sprintf("waiting for scrapes to init, left: %d", rdy)
http.Error(w, errMsg, http.StatusTooEarly)
} else {
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(http.StatusOK)
w.Write([]byte("OK"))
}
return true
}
return false
}

View file

@ -219,6 +219,8 @@ It accepts optional `show_original_labels=1` query arg, which shows the original
This information may be useful for debugging target relabeling.
* `http://vmagent-host:8429/api/v1/targets`. This handler returns data compatible with [the corresponding page from Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#targets).
* `http://vmagent-host:8429/ready` - this handler returns http 200 status code, when `vmagent` finished initialization for all service_discovery configs.
It may be useful, when you have a lof of entries at promscrape.config and want to perform `vmagent` rolling update without metrics loss.
### Troubleshooting

View file

@ -5,6 +5,7 @@ import (
"flag"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@ -75,6 +76,9 @@ func Stop() {
var (
globalStopCh chan struct{}
scraperWG sync.WaitGroup
// PendingScrapeConfigs - zero value means, that
// all scrapeConfigs are inited and ready for work.
PendingScrapeConfigs int32
)
func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest), globalStopCh <-chan struct{}) {
@ -166,6 +170,7 @@ func newScrapeConfigs(pushData func(wr *prompbmarshal.WriteRequest)) *scrapeConf
}
func (scs *scrapeConfigs) add(name string, checkInterval time.Duration, getScrapeWork func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork) {
atomic.AddInt32(&PendingScrapeConfigs, 1)
scfg := &scrapeConfig{
name: name,
pushData: scs.pushData,
@ -216,10 +221,15 @@ func (scfg *scrapeConfig) run() {
cfg := <-scfg.cfgCh
var swsPrev []ScrapeWork
for {
updateScrapeWork := func(cfg *Config) {
sws := scfg.getScrapeWork(cfg, swsPrev)
sg.update(sws)
swsPrev = sws
}
updateScrapeWork(cfg)
atomic.AddInt32(&PendingScrapeConfigs, -1)
for {
select {
case <-scfg.stopCh:
@ -227,6 +237,7 @@ func (scfg *scrapeConfig) run() {
case cfg = <-scfg.cfgCh:
case <-tickerCh:
}
updateScrapeWork(cfg)
}
}