Extend metric vm_promscrape_targets with status label (#615)

The change to `vm_promscrape_targets` metric suppose to improve observability
for `vmagent` so it will be possible to track how many targets are up or down
for every specific scrape group:
```
vm_promscrape_targets{type="static_configs", status="down"} 1
vm_promscrape_targets{type="static_configs", status="up"} 2
```
This commit is contained in:
Roman Khavronenko 2020-07-13 19:52:03 +01:00 committed by Aliaksandr Valialkin
parent 3898cc0285
commit a02097e657
3 changed files with 30 additions and 9 deletions

View file

@ -236,11 +236,11 @@ func newScraperGroup(name string, pushData func(wr *prompbmarshal.WriteRequest))
pushData: pushData, pushData: pushData,
changesCount: metrics.NewCounter(fmt.Sprintf(`vm_promscrape_config_changes_total{type=%q}`, name)), changesCount: metrics.NewCounter(fmt.Sprintf(`vm_promscrape_config_changes_total{type=%q}`, name)),
} }
metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q}`, name), func() float64 { metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q, status="up"}`, name), func() float64 {
sg.mLock.Lock() return float64(tsmGlobal.StatusByGroup(sg.name, true))
n := len(sg.m) })
sg.mLock.Unlock() metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q, status="down"}`, name), func() float64 {
return float64(n) return float64(tsmGlobal.StatusByGroup(sg.name, false))
}) })
return sg return sg
} }
@ -277,7 +277,7 @@ func (sg *scraperGroup) update(sws []ScrapeWork) {
} }
// Start a scraper for the missing key. // Start a scraper for the missing key.
sc := newScraper(sw, sg.pushData) sc := newScraper(sw, sg.name, sg.pushData)
sg.wg.Add(1) sg.wg.Add(1)
go func() { go func() {
defer sg.wg.Done() defer sg.wg.Done()
@ -309,12 +309,13 @@ type scraper struct {
stopCh chan struct{} stopCh chan struct{}
} }
func newScraper(sw *ScrapeWork, pushData func(wr *prompbmarshal.WriteRequest)) *scraper { func newScraper(sw *ScrapeWork, group string, pushData func(wr *prompbmarshal.WriteRequest)) *scraper {
sc := &scraper{ sc := &scraper{
stopCh: make(chan struct{}), stopCh: make(chan struct{}),
} }
c := newClient(sw) c := newClient(sw)
sc.sw.Config = *sw sc.sw.Config = *sw
sc.sw.ScrapeGroup = group
sc.sw.ReadData = c.ReadData sc.sw.ReadData = c.ReadData
sc.sw.PushData = pushData sc.sw.PushData = pushData
return sc return sc

View file

@ -120,6 +120,10 @@ type scrapeWork struct {
// PushData is called for pushing collected data. // PushData is called for pushing collected data.
PushData func(wr *prompbmarshal.WriteRequest) PushData func(wr *prompbmarshal.WriteRequest)
// ScrapeGroup is name of ScrapeGroup that
// scrapeWork belongs to
ScrapeGroup string
bodyBuf []byte bodyBuf []byte
rows parser.Rows rows parser.Rows
tmpRow parser.Row tmpRow parser.Row
@ -232,7 +236,7 @@ func (sw *scrapeWork) scrapeInternal(timestamp int64) error {
prompbmarshal.ResetWriteRequest(&sw.writeRequest) prompbmarshal.ResetWriteRequest(&sw.writeRequest)
sw.labels = sw.labels[:0] sw.labels = sw.labels[:0]
sw.samples = sw.samples[:0] sw.samples = sw.samples[:0]
tsmGlobal.Update(&sw.Config, up == 1, timestamp, int64(duration*1000), err) tsmGlobal.Update(&sw.Config, sw.ScrapeGroup, up == 1, timestamp, int64(duration*1000), err)
return err return err
} }

View file

@ -46,11 +46,12 @@ func (tsm *targetStatusMap) Unregister(sw *ScrapeWork) {
tsm.mu.Unlock() tsm.mu.Unlock()
} }
func (tsm *targetStatusMap) Update(sw *ScrapeWork, up bool, scrapeTime, scrapeDuration int64, err error) { func (tsm *targetStatusMap) Update(sw *ScrapeWork, group string, up bool, scrapeTime, scrapeDuration int64, err error) {
tsm.mu.Lock() tsm.mu.Lock()
tsm.m[sw.ID] = targetStatus{ tsm.m[sw.ID] = targetStatus{
sw: sw, sw: sw,
up: up, up: up,
scrapeGroup: group,
scrapeTime: scrapeTime, scrapeTime: scrapeTime,
scrapeDuration: scrapeDuration, scrapeDuration: scrapeDuration,
err: err, err: err,
@ -58,6 +59,20 @@ func (tsm *targetStatusMap) Update(sw *ScrapeWork, up bool, scrapeTime, scrapeDu
tsm.mu.Unlock() tsm.mu.Unlock()
} }
// StatusByGroup returns the number of targets with status==up
// for the given group name
func (tsm *targetStatusMap) StatusByGroup(group string, up bool) int {
var count int
tsm.mu.Lock()
for _, st := range tsm.m {
if st.scrapeGroup == group && st.up == up {
count++
}
}
tsm.mu.Unlock()
return count
}
func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) { func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
byJob := make(map[string][]targetStatus) byJob := make(map[string][]targetStatus)
tsm.mu.Lock() tsm.mu.Lock()
@ -116,6 +131,7 @@ type jobStatus struct {
type targetStatus struct { type targetStatus struct {
sw *ScrapeWork sw *ScrapeWork
up bool up bool
scrapeGroup string
scrapeTime int64 scrapeTime int64
scrapeDuration int64 scrapeDuration int64
err error err error