mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
Extend metric vm_promscrape_targets
with status
label (#615)
The change to `vm_promscrape_targets` metric suppose to improve observability for `vmagent` so it will be possible to track how many targets are up or down for every specific scrape group: ``` vm_promscrape_targets{type="static_configs", status="down"} 1 vm_promscrape_targets{type="static_configs", status="up"} 2 ```
This commit is contained in:
parent
55d83e777d
commit
829ec4f9cf
3 changed files with 30 additions and 9 deletions
|
@ -236,11 +236,11 @@ func newScraperGroup(name string, pushData func(wr *prompbmarshal.WriteRequest))
|
|||
pushData: pushData,
|
||||
changesCount: metrics.NewCounter(fmt.Sprintf(`vm_promscrape_config_changes_total{type=%q}`, name)),
|
||||
}
|
||||
metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q}`, name), func() float64 {
|
||||
sg.mLock.Lock()
|
||||
n := len(sg.m)
|
||||
sg.mLock.Unlock()
|
||||
return float64(n)
|
||||
metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q, status="up"}`, name), func() float64 {
|
||||
return float64(tsmGlobal.StatusByGroup(sg.name, true))
|
||||
})
|
||||
metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q, status="down"}`, name), func() float64 {
|
||||
return float64(tsmGlobal.StatusByGroup(sg.name, false))
|
||||
})
|
||||
return sg
|
||||
}
|
||||
|
@ -277,7 +277,7 @@ func (sg *scraperGroup) update(sws []ScrapeWork) {
|
|||
}
|
||||
|
||||
// Start a scraper for the missing key.
|
||||
sc := newScraper(sw, sg.pushData)
|
||||
sc := newScraper(sw, sg.name, sg.pushData)
|
||||
sg.wg.Add(1)
|
||||
go func() {
|
||||
defer sg.wg.Done()
|
||||
|
@ -309,12 +309,13 @@ type scraper struct {
|
|||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
func newScraper(sw *ScrapeWork, pushData func(wr *prompbmarshal.WriteRequest)) *scraper {
|
||||
func newScraper(sw *ScrapeWork, group string, pushData func(wr *prompbmarshal.WriteRequest)) *scraper {
|
||||
sc := &scraper{
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
c := newClient(sw)
|
||||
sc.sw.Config = *sw
|
||||
sc.sw.ScrapeGroup = group
|
||||
sc.sw.ReadData = c.ReadData
|
||||
sc.sw.PushData = pushData
|
||||
return sc
|
||||
|
|
|
@ -120,6 +120,10 @@ type scrapeWork struct {
|
|||
// PushData is called for pushing collected data.
|
||||
PushData func(wr *prompbmarshal.WriteRequest)
|
||||
|
||||
// ScrapeGroup is name of ScrapeGroup that
|
||||
// scrapeWork belongs to
|
||||
ScrapeGroup string
|
||||
|
||||
bodyBuf []byte
|
||||
rows parser.Rows
|
||||
tmpRow parser.Row
|
||||
|
@ -232,7 +236,7 @@ func (sw *scrapeWork) scrapeInternal(timestamp int64) error {
|
|||
prompbmarshal.ResetWriteRequest(&sw.writeRequest)
|
||||
sw.labels = sw.labels[:0]
|
||||
sw.samples = sw.samples[:0]
|
||||
tsmGlobal.Update(&sw.Config, up == 1, timestamp, int64(duration*1000), err)
|
||||
tsmGlobal.Update(&sw.Config, sw.ScrapeGroup, up == 1, timestamp, int64(duration*1000), err)
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
|
@ -46,11 +46,12 @@ func (tsm *targetStatusMap) Unregister(sw *ScrapeWork) {
|
|||
tsm.mu.Unlock()
|
||||
}
|
||||
|
||||
func (tsm *targetStatusMap) Update(sw *ScrapeWork, up bool, scrapeTime, scrapeDuration int64, err error) {
|
||||
func (tsm *targetStatusMap) Update(sw *ScrapeWork, group string, up bool, scrapeTime, scrapeDuration int64, err error) {
|
||||
tsm.mu.Lock()
|
||||
tsm.m[sw.ID] = targetStatus{
|
||||
sw: sw,
|
||||
up: up,
|
||||
scrapeGroup: group,
|
||||
scrapeTime: scrapeTime,
|
||||
scrapeDuration: scrapeDuration,
|
||||
err: err,
|
||||
|
@ -58,6 +59,20 @@ func (tsm *targetStatusMap) Update(sw *ScrapeWork, up bool, scrapeTime, scrapeDu
|
|||
tsm.mu.Unlock()
|
||||
}
|
||||
|
||||
// StatusByGroup returns the number of targets with status==up
|
||||
// for the given group name
|
||||
func (tsm *targetStatusMap) StatusByGroup(group string, up bool) int {
|
||||
var count int
|
||||
tsm.mu.Lock()
|
||||
for _, st := range tsm.m {
|
||||
if st.scrapeGroup == group && st.up == up {
|
||||
count++
|
||||
}
|
||||
}
|
||||
tsm.mu.Unlock()
|
||||
return count
|
||||
}
|
||||
|
||||
func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
|
||||
byJob := make(map[string][]targetStatus)
|
||||
tsm.mu.Lock()
|
||||
|
@ -116,6 +131,7 @@ type jobStatus struct {
|
|||
type targetStatus struct {
|
||||
sw *ScrapeWork
|
||||
up bool
|
||||
scrapeGroup string
|
||||
scrapeTime int64
|
||||
scrapeDuration int64
|
||||
err error
|
||||
|
|
Loading…
Reference in a new issue