mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-02-09 15:27:11 +00:00
Extend metric vm_promscrape_targets
with status
label (#615)
The change to `vm_promscrape_targets` metric suppose to improve observability for `vmagent` so it will be possible to track how many targets are up or down for every specific scrape group: ``` vm_promscrape_targets{type="static_configs", status="down"} 1 vm_promscrape_targets{type="static_configs", status="up"} 2 ```
This commit is contained in:
parent
3898cc0285
commit
a02097e657
3 changed files with 30 additions and 9 deletions
|
@ -236,11 +236,11 @@ func newScraperGroup(name string, pushData func(wr *prompbmarshal.WriteRequest))
|
||||||
pushData: pushData,
|
pushData: pushData,
|
||||||
changesCount: metrics.NewCounter(fmt.Sprintf(`vm_promscrape_config_changes_total{type=%q}`, name)),
|
changesCount: metrics.NewCounter(fmt.Sprintf(`vm_promscrape_config_changes_total{type=%q}`, name)),
|
||||||
}
|
}
|
||||||
metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q}`, name), func() float64 {
|
metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q, status="up"}`, name), func() float64 {
|
||||||
sg.mLock.Lock()
|
return float64(tsmGlobal.StatusByGroup(sg.name, true))
|
||||||
n := len(sg.m)
|
})
|
||||||
sg.mLock.Unlock()
|
metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q, status="down"}`, name), func() float64 {
|
||||||
return float64(n)
|
return float64(tsmGlobal.StatusByGroup(sg.name, false))
|
||||||
})
|
})
|
||||||
return sg
|
return sg
|
||||||
}
|
}
|
||||||
|
@ -277,7 +277,7 @@ func (sg *scraperGroup) update(sws []ScrapeWork) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start a scraper for the missing key.
|
// Start a scraper for the missing key.
|
||||||
sc := newScraper(sw, sg.pushData)
|
sc := newScraper(sw, sg.name, sg.pushData)
|
||||||
sg.wg.Add(1)
|
sg.wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer sg.wg.Done()
|
defer sg.wg.Done()
|
||||||
|
@ -309,12 +309,13 @@ type scraper struct {
|
||||||
stopCh chan struct{}
|
stopCh chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newScraper(sw *ScrapeWork, pushData func(wr *prompbmarshal.WriteRequest)) *scraper {
|
func newScraper(sw *ScrapeWork, group string, pushData func(wr *prompbmarshal.WriteRequest)) *scraper {
|
||||||
sc := &scraper{
|
sc := &scraper{
|
||||||
stopCh: make(chan struct{}),
|
stopCh: make(chan struct{}),
|
||||||
}
|
}
|
||||||
c := newClient(sw)
|
c := newClient(sw)
|
||||||
sc.sw.Config = *sw
|
sc.sw.Config = *sw
|
||||||
|
sc.sw.ScrapeGroup = group
|
||||||
sc.sw.ReadData = c.ReadData
|
sc.sw.ReadData = c.ReadData
|
||||||
sc.sw.PushData = pushData
|
sc.sw.PushData = pushData
|
||||||
return sc
|
return sc
|
||||||
|
|
|
@ -120,6 +120,10 @@ type scrapeWork struct {
|
||||||
// PushData is called for pushing collected data.
|
// PushData is called for pushing collected data.
|
||||||
PushData func(wr *prompbmarshal.WriteRequest)
|
PushData func(wr *prompbmarshal.WriteRequest)
|
||||||
|
|
||||||
|
// ScrapeGroup is name of ScrapeGroup that
|
||||||
|
// scrapeWork belongs to
|
||||||
|
ScrapeGroup string
|
||||||
|
|
||||||
bodyBuf []byte
|
bodyBuf []byte
|
||||||
rows parser.Rows
|
rows parser.Rows
|
||||||
tmpRow parser.Row
|
tmpRow parser.Row
|
||||||
|
@ -232,7 +236,7 @@ func (sw *scrapeWork) scrapeInternal(timestamp int64) error {
|
||||||
prompbmarshal.ResetWriteRequest(&sw.writeRequest)
|
prompbmarshal.ResetWriteRequest(&sw.writeRequest)
|
||||||
sw.labels = sw.labels[:0]
|
sw.labels = sw.labels[:0]
|
||||||
sw.samples = sw.samples[:0]
|
sw.samples = sw.samples[:0]
|
||||||
tsmGlobal.Update(&sw.Config, up == 1, timestamp, int64(duration*1000), err)
|
tsmGlobal.Update(&sw.Config, sw.ScrapeGroup, up == 1, timestamp, int64(duration*1000), err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -46,11 +46,12 @@ func (tsm *targetStatusMap) Unregister(sw *ScrapeWork) {
|
||||||
tsm.mu.Unlock()
|
tsm.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tsm *targetStatusMap) Update(sw *ScrapeWork, up bool, scrapeTime, scrapeDuration int64, err error) {
|
func (tsm *targetStatusMap) Update(sw *ScrapeWork, group string, up bool, scrapeTime, scrapeDuration int64, err error) {
|
||||||
tsm.mu.Lock()
|
tsm.mu.Lock()
|
||||||
tsm.m[sw.ID] = targetStatus{
|
tsm.m[sw.ID] = targetStatus{
|
||||||
sw: sw,
|
sw: sw,
|
||||||
up: up,
|
up: up,
|
||||||
|
scrapeGroup: group,
|
||||||
scrapeTime: scrapeTime,
|
scrapeTime: scrapeTime,
|
||||||
scrapeDuration: scrapeDuration,
|
scrapeDuration: scrapeDuration,
|
||||||
err: err,
|
err: err,
|
||||||
|
@ -58,6 +59,20 @@ func (tsm *targetStatusMap) Update(sw *ScrapeWork, up bool, scrapeTime, scrapeDu
|
||||||
tsm.mu.Unlock()
|
tsm.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StatusByGroup returns the number of targets with status==up
|
||||||
|
// for the given group name
|
||||||
|
func (tsm *targetStatusMap) StatusByGroup(group string, up bool) int {
|
||||||
|
var count int
|
||||||
|
tsm.mu.Lock()
|
||||||
|
for _, st := range tsm.m {
|
||||||
|
if st.scrapeGroup == group && st.up == up {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tsm.mu.Unlock()
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
|
func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
|
||||||
byJob := make(map[string][]targetStatus)
|
byJob := make(map[string][]targetStatus)
|
||||||
tsm.mu.Lock()
|
tsm.mu.Lock()
|
||||||
|
@ -116,6 +131,7 @@ type jobStatus struct {
|
||||||
type targetStatus struct {
|
type targetStatus struct {
|
||||||
sw *ScrapeWork
|
sw *ScrapeWork
|
||||||
up bool
|
up bool
|
||||||
|
scrapeGroup string
|
||||||
scrapeTime int64
|
scrapeTime int64
|
||||||
scrapeDuration int64
|
scrapeDuration int64
|
||||||
err error
|
err error
|
||||||
|
|
Loading…
Reference in a new issue