Extend metric vm_promscrape_targets with status label (#615)

The change to `vm_promscrape_targets` metric suppose to improve observability
for `vmagent` so it will be possible to track how many targets are up or down
for every specific scrape group:
```
vm_promscrape_targets{type="static_configs", status="down"} 1
vm_promscrape_targets{type="static_configs", status="up"} 2
```
This commit is contained in:
Roman Khavronenko 2020-07-13 19:52:03 +01:00 committed by GitHub
parent 55d83e777d
commit 829ec4f9cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 30 additions and 9 deletions

View file

@ -236,11 +236,11 @@ func newScraperGroup(name string, pushData func(wr *prompbmarshal.WriteRequest))
pushData: pushData,
changesCount: metrics.NewCounter(fmt.Sprintf(`vm_promscrape_config_changes_total{type=%q}`, name)),
}
metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q}`, name), func() float64 {
sg.mLock.Lock()
n := len(sg.m)
sg.mLock.Unlock()
return float64(n)
metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q, status="up"}`, name), func() float64 {
return float64(tsmGlobal.StatusByGroup(sg.name, true))
})
metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q, status="down"}`, name), func() float64 {
return float64(tsmGlobal.StatusByGroup(sg.name, false))
})
return sg
}
@ -277,7 +277,7 @@ func (sg *scraperGroup) update(sws []ScrapeWork) {
}
// Start a scraper for the missing key.
sc := newScraper(sw, sg.pushData)
sc := newScraper(sw, sg.name, sg.pushData)
sg.wg.Add(1)
go func() {
defer sg.wg.Done()
@ -309,12 +309,13 @@ type scraper struct {
stopCh chan struct{}
}
func newScraper(sw *ScrapeWork, pushData func(wr *prompbmarshal.WriteRequest)) *scraper {
func newScraper(sw *ScrapeWork, group string, pushData func(wr *prompbmarshal.WriteRequest)) *scraper {
sc := &scraper{
stopCh: make(chan struct{}),
}
c := newClient(sw)
sc.sw.Config = *sw
sc.sw.ScrapeGroup = group
sc.sw.ReadData = c.ReadData
sc.sw.PushData = pushData
return sc

View file

@ -120,6 +120,10 @@ type scrapeWork struct {
// PushData is called for pushing collected data.
PushData func(wr *prompbmarshal.WriteRequest)
// ScrapeGroup is name of ScrapeGroup that
// scrapeWork belongs to
ScrapeGroup string
bodyBuf []byte
rows parser.Rows
tmpRow parser.Row
@ -232,7 +236,7 @@ func (sw *scrapeWork) scrapeInternal(timestamp int64) error {
prompbmarshal.ResetWriteRequest(&sw.writeRequest)
sw.labels = sw.labels[:0]
sw.samples = sw.samples[:0]
tsmGlobal.Update(&sw.Config, up == 1, timestamp, int64(duration*1000), err)
tsmGlobal.Update(&sw.Config, sw.ScrapeGroup, up == 1, timestamp, int64(duration*1000), err)
return err
}

View file

@ -46,11 +46,12 @@ func (tsm *targetStatusMap) Unregister(sw *ScrapeWork) {
tsm.mu.Unlock()
}
func (tsm *targetStatusMap) Update(sw *ScrapeWork, up bool, scrapeTime, scrapeDuration int64, err error) {
func (tsm *targetStatusMap) Update(sw *ScrapeWork, group string, up bool, scrapeTime, scrapeDuration int64, err error) {
tsm.mu.Lock()
tsm.m[sw.ID] = targetStatus{
sw: sw,
up: up,
scrapeGroup: group,
scrapeTime: scrapeTime,
scrapeDuration: scrapeDuration,
err: err,
@ -58,6 +59,20 @@ func (tsm *targetStatusMap) Update(sw *ScrapeWork, up bool, scrapeTime, scrapeDu
tsm.mu.Unlock()
}
// StatusByGroup returns the number of targets with status==up
// for the given group name
func (tsm *targetStatusMap) StatusByGroup(group string, up bool) int {
var count int
tsm.mu.Lock()
for _, st := range tsm.m {
if st.scrapeGroup == group && st.up == up {
count++
}
}
tsm.mu.Unlock()
return count
}
func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
byJob := make(map[string][]targetStatus)
tsm.mu.Lock()
@ -116,6 +131,7 @@ type jobStatus struct {
type targetStatus struct {
sw *ScrapeWork
up bool
scrapeGroup string
scrapeTime int64
scrapeDuration int64
err error