2020-02-23 11:35:47 +00:00
|
|
|
package promscrape
|
|
|
|
|
|
|
|
import (
|
2020-11-04 15:12:05 +00:00
|
|
|
"flag"
|
2020-02-23 11:35:47 +00:00
|
|
|
"fmt"
|
|
|
|
"io"
|
2020-12-14 11:36:48 +00:00
|
|
|
"net/http"
|
|
|
|
"path"
|
2020-02-23 11:35:47 +00:00
|
|
|
"sort"
|
|
|
|
"sync"
|
|
|
|
"time"
|
2020-10-20 18:44:59 +00:00
|
|
|
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
2020-12-14 11:36:48 +00:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
2020-10-20 18:44:59 +00:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
2020-02-23 11:35:47 +00:00
|
|
|
)
|
|
|
|
|
2020-11-04 15:12:05 +00:00
|
|
|
var maxDroppedTargets = flag.Int("promscrape.maxDroppedTargets", 1000, "The maximum number of `droppedTargets` shown at /api/v1/targets page. "+
|
|
|
|
"Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. "+
|
|
|
|
"Note that the increased number of tracked dropped targets may result in increased memory usage")
|
|
|
|
|
2020-02-23 11:35:47 +00:00
|
|
|
var tsmGlobal = newTargetStatusMap()
|
|
|
|
|
2020-12-14 11:36:48 +00:00
|
|
|
// WriteHumanReadableTargetsStatus writes human-readable status for all the scrape targets to w with given format and options.
|
|
|
|
func WriteHumanReadableTargetsStatus(w http.ResponseWriter, showOriginalLabels, showOnlyUnhealthy bool, format string) {
|
|
|
|
switch format {
|
|
|
|
case "plain":
|
|
|
|
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
|
|
|
tsmGlobal.WriteTargetsPlain(w, showOriginalLabels)
|
|
|
|
case "html":
|
|
|
|
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
|
|
|
tsmGlobal.WriteTargetsHTML(w, showOnlyUnhealthy)
|
|
|
|
}
|
2020-02-23 11:35:47 +00:00
|
|
|
}
|
|
|
|
|
2020-10-20 18:44:59 +00:00
|
|
|
// WriteAPIV1Targets writes /api/v1/targets to w according to https://prometheus.io/docs/prometheus/latest/querying/api/#targets
|
|
|
|
func WriteAPIV1Targets(w io.Writer, state string) {
|
|
|
|
if state == "" {
|
|
|
|
state = "any"
|
|
|
|
}
|
|
|
|
fmt.Fprintf(w, `{"status":"success","data":{"activeTargets":`)
|
|
|
|
if state == "active" || state == "any" {
|
|
|
|
tsmGlobal.WriteActiveTargetsJSON(w)
|
|
|
|
} else {
|
|
|
|
fmt.Fprintf(w, `[]`)
|
|
|
|
}
|
|
|
|
fmt.Fprintf(w, `,"droppedTargets":`)
|
|
|
|
if state == "dropped" || state == "any" {
|
|
|
|
droppedTargetsMap.WriteDroppedTargetsJSON(w)
|
|
|
|
} else {
|
|
|
|
fmt.Fprintf(w, `[]`)
|
|
|
|
}
|
|
|
|
fmt.Fprintf(w, `}}`)
|
|
|
|
}
|
|
|
|
|
2020-02-23 11:35:47 +00:00
|
|
|
type targetStatusMap struct {
|
|
|
|
mu sync.Mutex
|
2020-12-08 09:50:46 +00:00
|
|
|
m map[uint64]*targetStatus
|
2020-02-23 11:35:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func newTargetStatusMap() *targetStatusMap {
|
|
|
|
return &targetStatusMap{
|
2020-12-08 09:50:46 +00:00
|
|
|
m: make(map[uint64]*targetStatus),
|
2020-02-23 11:35:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (tsm *targetStatusMap) Reset() {
|
|
|
|
tsm.mu.Lock()
|
2020-12-08 09:50:46 +00:00
|
|
|
tsm.m = make(map[uint64]*targetStatus)
|
2020-02-23 11:35:47 +00:00
|
|
|
tsm.mu.Unlock()
|
|
|
|
}
|
|
|
|
|
2020-05-03 09:41:13 +00:00
|
|
|
func (tsm *targetStatusMap) Register(sw *ScrapeWork) {
|
2020-03-11 01:19:56 +00:00
|
|
|
tsm.mu.Lock()
|
2020-12-08 09:50:46 +00:00
|
|
|
tsm.m[sw.ID] = &targetStatus{
|
2020-11-10 14:11:16 +00:00
|
|
|
sw: *sw,
|
2020-03-11 01:19:56 +00:00
|
|
|
}
|
|
|
|
tsm.mu.Unlock()
|
|
|
|
}
|
|
|
|
|
2020-05-03 09:41:13 +00:00
|
|
|
func (tsm *targetStatusMap) Unregister(sw *ScrapeWork) {
|
2020-03-11 01:19:56 +00:00
|
|
|
tsm.mu.Lock()
|
2020-05-03 09:41:13 +00:00
|
|
|
delete(tsm.m, sw.ID)
|
2020-03-11 01:19:56 +00:00
|
|
|
tsm.mu.Unlock()
|
|
|
|
}
|
|
|
|
|
2020-07-13 18:52:03 +00:00
|
|
|
func (tsm *targetStatusMap) Update(sw *ScrapeWork, group string, up bool, scrapeTime, scrapeDuration int64, err error) {
|
2020-02-23 11:35:47 +00:00
|
|
|
tsm.mu.Lock()
|
2020-12-08 09:50:46 +00:00
|
|
|
tsm.m[sw.ID] = &targetStatus{
|
2020-11-10 14:11:16 +00:00
|
|
|
sw: *sw,
|
2020-02-23 11:35:47 +00:00
|
|
|
up: up,
|
2020-07-13 18:52:03 +00:00
|
|
|
scrapeGroup: group,
|
2020-02-23 11:35:47 +00:00
|
|
|
scrapeTime: scrapeTime,
|
|
|
|
scrapeDuration: scrapeDuration,
|
|
|
|
err: err,
|
|
|
|
}
|
|
|
|
tsm.mu.Unlock()
|
|
|
|
}
|
|
|
|
|
2020-07-13 18:52:03 +00:00
|
|
|
// StatusByGroup returns the number of targets with status==up
|
|
|
|
// for the given group name
|
|
|
|
func (tsm *targetStatusMap) StatusByGroup(group string, up bool) int {
|
|
|
|
var count int
|
|
|
|
tsm.mu.Lock()
|
|
|
|
for _, st := range tsm.m {
|
|
|
|
if st.scrapeGroup == group && st.up == up {
|
|
|
|
count++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
tsm.mu.Unlock()
|
|
|
|
return count
|
|
|
|
}
|
|
|
|
|
2020-10-20 18:44:59 +00:00
|
|
|
// WriteActiveTargetsJSON writes `activeTargets` contents to w according to https://prometheus.io/docs/prometheus/latest/querying/api/#targets
|
|
|
|
func (tsm *targetStatusMap) WriteActiveTargetsJSON(w io.Writer) {
|
|
|
|
tsm.mu.Lock()
|
|
|
|
type keyStatus struct {
|
|
|
|
key string
|
|
|
|
st targetStatus
|
|
|
|
}
|
|
|
|
kss := make([]keyStatus, 0, len(tsm.m))
|
|
|
|
for _, st := range tsm.m {
|
|
|
|
key := promLabelsString(st.sw.OriginalLabels)
|
|
|
|
kss = append(kss, keyStatus{
|
|
|
|
key: key,
|
2020-12-08 09:50:46 +00:00
|
|
|
st: *st,
|
2020-10-20 18:44:59 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
tsm.mu.Unlock()
|
|
|
|
|
|
|
|
sort.Slice(kss, func(i, j int) bool {
|
|
|
|
return kss[i].key < kss[j].key
|
|
|
|
})
|
|
|
|
fmt.Fprintf(w, `[`)
|
|
|
|
for i, ks := range kss {
|
|
|
|
st := ks.st
|
|
|
|
fmt.Fprintf(w, `{"discoveredLabels":`)
|
|
|
|
writeLabelsJSON(w, st.sw.OriginalLabels)
|
|
|
|
fmt.Fprintf(w, `,"labels":`)
|
|
|
|
labelsFinalized := promrelabel.FinalizeLabels(nil, st.sw.Labels)
|
|
|
|
writeLabelsJSON(w, labelsFinalized)
|
|
|
|
fmt.Fprintf(w, `,"scrapePool":%q`, st.sw.Job())
|
|
|
|
fmt.Fprintf(w, `,"scrapeUrl":%q`, st.sw.ScrapeURL)
|
|
|
|
errMsg := ""
|
|
|
|
if st.err != nil {
|
|
|
|
errMsg = st.err.Error()
|
|
|
|
}
|
|
|
|
fmt.Fprintf(w, `,"lastError":%q`, errMsg)
|
|
|
|
fmt.Fprintf(w, `,"lastScrape":%q`, time.Unix(st.scrapeTime/1000, (st.scrapeTime%1000)*1e6).Format(time.RFC3339Nano))
|
|
|
|
fmt.Fprintf(w, `,"lastScrapeDuration":%g`, (time.Millisecond * time.Duration(st.scrapeDuration)).Seconds())
|
|
|
|
state := "up"
|
|
|
|
if !st.up {
|
|
|
|
state = "down"
|
|
|
|
}
|
|
|
|
fmt.Fprintf(w, `,"health":%q}`, state)
|
|
|
|
if i+1 < len(kss) {
|
|
|
|
fmt.Fprintf(w, `,`)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fmt.Fprintf(w, `]`)
|
|
|
|
}
|
|
|
|
|
|
|
|
func writeLabelsJSON(w io.Writer, labels []prompbmarshal.Label) {
|
|
|
|
fmt.Fprintf(w, `{`)
|
|
|
|
for i, label := range labels {
|
|
|
|
fmt.Fprintf(w, "%q:%q", label.Name, label.Value)
|
|
|
|
if i+1 < len(labels) {
|
|
|
|
fmt.Fprintf(w, `,`)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fmt.Fprintf(w, `}`)
|
|
|
|
}
|
|
|
|
|
2020-02-23 11:35:47 +00:00
|
|
|
type targetStatus struct {
|
2020-11-10 14:11:16 +00:00
|
|
|
sw ScrapeWork
|
2020-02-23 11:35:47 +00:00
|
|
|
up bool
|
2020-07-13 18:52:03 +00:00
|
|
|
scrapeGroup string
|
2020-02-23 11:35:47 +00:00
|
|
|
scrapeTime int64
|
|
|
|
scrapeDuration int64
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
|
|
|
func (st *targetStatus) getDurationFromLastScrape() time.Duration {
|
|
|
|
return time.Since(time.Unix(st.scrapeTime/1000, (st.scrapeTime%1000)*1e6))
|
|
|
|
}
|
2020-10-20 18:44:59 +00:00
|
|
|
|
|
|
|
type droppedTargets struct {
|
|
|
|
mu sync.Mutex
|
|
|
|
m map[string]droppedTarget
|
|
|
|
lastCleanupTime uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
type droppedTarget struct {
|
|
|
|
originalLabels []prompbmarshal.Label
|
|
|
|
deadline uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dt *droppedTargets) Register(originalLabels []prompbmarshal.Label) {
|
|
|
|
key := promLabelsString(originalLabels)
|
|
|
|
currentTime := fasttime.UnixTimestamp()
|
|
|
|
dt.mu.Lock()
|
2020-11-04 15:03:43 +00:00
|
|
|
if k, ok := dt.m[key]; ok {
|
|
|
|
k.deadline = currentTime + 10*60
|
|
|
|
dt.m[key] = k
|
|
|
|
} else if len(dt.m) < *maxDroppedTargets {
|
|
|
|
dt.m[key] = droppedTarget{
|
|
|
|
originalLabels: originalLabels,
|
|
|
|
deadline: currentTime + 10*60,
|
|
|
|
}
|
2020-10-20 18:44:59 +00:00
|
|
|
}
|
|
|
|
if currentTime-dt.lastCleanupTime > 60 {
|
|
|
|
for k, v := range dt.m {
|
|
|
|
if currentTime > v.deadline {
|
|
|
|
delete(dt.m, k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
dt.lastCleanupTime = currentTime
|
|
|
|
}
|
|
|
|
dt.mu.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
// WriteDroppedTargetsJSON writes `droppedTargets` contents to w according to https://prometheus.io/docs/prometheus/latest/querying/api/#targets
|
|
|
|
func (dt *droppedTargets) WriteDroppedTargetsJSON(w io.Writer) {
|
|
|
|
dt.mu.Lock()
|
|
|
|
type keyStatus struct {
|
|
|
|
key string
|
|
|
|
originalLabels []prompbmarshal.Label
|
|
|
|
}
|
|
|
|
kss := make([]keyStatus, 0, len(dt.m))
|
|
|
|
for _, v := range dt.m {
|
|
|
|
key := promLabelsString(v.originalLabels)
|
|
|
|
kss = append(kss, keyStatus{
|
|
|
|
key: key,
|
|
|
|
originalLabels: v.originalLabels,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
dt.mu.Unlock()
|
|
|
|
|
|
|
|
sort.Slice(kss, func(i, j int) bool {
|
|
|
|
return kss[i].key < kss[j].key
|
|
|
|
})
|
|
|
|
fmt.Fprintf(w, `[`)
|
|
|
|
for i, ks := range kss {
|
|
|
|
fmt.Fprintf(w, `{"discoveredLabels":`)
|
|
|
|
writeLabelsJSON(w, ks.originalLabels)
|
|
|
|
fmt.Fprintf(w, `}`)
|
|
|
|
if i+1 < len(kss) {
|
|
|
|
fmt.Fprintf(w, `,`)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fmt.Fprintf(w, `]`)
|
|
|
|
}
|
|
|
|
|
|
|
|
var droppedTargetsMap = &droppedTargets{
|
|
|
|
m: make(map[string]droppedTarget),
|
|
|
|
}
|
2020-12-14 11:36:48 +00:00
|
|
|
|
|
|
|
type jobTargetStatus struct {
|
|
|
|
up bool
|
|
|
|
endpoint string
|
|
|
|
labels []prompbmarshal.Label
|
|
|
|
originalLabels []prompbmarshal.Label
|
|
|
|
lastScrapeTime time.Duration
|
|
|
|
scrapeDuration time.Duration
|
|
|
|
error string
|
|
|
|
}
|
|
|
|
|
|
|
|
type jobTargetsStatuses struct {
|
|
|
|
job string
|
|
|
|
upCount int
|
|
|
|
targetsTotal int
|
|
|
|
targetsStatus []jobTargetStatus
|
|
|
|
}
|
|
|
|
|
|
|
|
func (tsm *targetStatusMap) getTargetsStatusByJob() []jobTargetsStatuses {
|
|
|
|
byJob := make(map[string][]targetStatus)
|
|
|
|
tsm.mu.Lock()
|
|
|
|
for _, st := range tsm.m {
|
|
|
|
job := st.sw.Job()
|
|
|
|
byJob[job] = append(byJob[job], *st)
|
|
|
|
}
|
|
|
|
tsm.mu.Unlock()
|
|
|
|
|
|
|
|
var jts []jobTargetsStatuses
|
|
|
|
for job, statuses := range byJob {
|
|
|
|
sort.Slice(statuses, func(i, j int) bool {
|
|
|
|
return statuses[i].sw.ScrapeURL < statuses[j].sw.ScrapeURL
|
|
|
|
})
|
|
|
|
ups := 0
|
|
|
|
var targetsStatuses []jobTargetStatus
|
|
|
|
for _, ts := range statuses {
|
|
|
|
if ts.up {
|
|
|
|
ups++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for _, st := range statuses {
|
|
|
|
errMsg := ""
|
|
|
|
if st.err != nil {
|
|
|
|
errMsg = st.err.Error()
|
|
|
|
}
|
|
|
|
targetsStatuses = append(targetsStatuses, jobTargetStatus{
|
|
|
|
up: st.up,
|
|
|
|
endpoint: st.sw.ScrapeURL,
|
|
|
|
labels: promrelabel.FinalizeLabels(nil, st.sw.Labels),
|
|
|
|
originalLabels: st.sw.OriginalLabels,
|
|
|
|
lastScrapeTime: st.getDurationFromLastScrape(),
|
|
|
|
scrapeDuration: time.Duration(st.scrapeDuration),
|
|
|
|
error: errMsg,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
jts = append(jts, jobTargetsStatuses{
|
|
|
|
job: job,
|
|
|
|
upCount: ups,
|
|
|
|
targetsTotal: len(statuses),
|
|
|
|
targetsStatus: targetsStatuses,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
sort.Slice(jts, func(i, j int) bool {
|
|
|
|
return jts[i].job < jts[j].job
|
|
|
|
})
|
|
|
|
return jts
|
|
|
|
}
|
|
|
|
|
|
|
|
// WriteTargetsHTML writes targets status grouped by job into writer w in html table,
|
|
|
|
// accepts filter to show only unhealthy targets.
|
|
|
|
func (tsm *targetStatusMap) WriteTargetsHTML(w io.Writer, showOnlyUnhealthy bool) {
|
|
|
|
jss := tsm.getTargetsStatusByJob()
|
|
|
|
targetsPath := path.Join(httpserver.GetPathPrefix(), "/targets")
|
|
|
|
WriteTargetsResponseHTML(w, jss, targetsPath, showOnlyUnhealthy)
|
|
|
|
}
|
|
|
|
|
|
|
|
// WriteTargetsPlain writes targets grouped by job into writer w in plain text,
|
|
|
|
// accept filter to show original labels.
|
|
|
|
func (tsm *targetStatusMap) WriteTargetsPlain(w io.Writer, showOriginalLabels bool) {
|
|
|
|
jss := tsm.getTargetsStatusByJob()
|
|
|
|
WriteTargetsResponsePlain(w, jss, showOriginalLabels)
|
|
|
|
}
|