diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 244e7ae68..d9524af55 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -31,6 +31,7 @@ The sandbox cluster installation is running under the constant load generated by * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.disableOnDiskQueue` command-line flag, which can be used for disabling data queueing to disk when the remote storage cannot keep up with the data ingestion rate. See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2110). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for reading and writing samples via [Google PubSub](https://cloud.google.com/pubsub). See [these docs](https://docs.victoriametrics.com/vmagent.html#google-pubsub-integration). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for Datadog `/api/v2/series` and `/api/beta/sketches` ingestion protocols to vmagent/vminsert components. See this [doc](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) for examples. Thanks to @AndrewChubatiuk for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5094). +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): show all the dropped targets together with the reason why they are dropped at `http://vmagent:8429/service-discovery` page. Previously targets, which were dropped because of [target sharding](https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets) weren't displayed on this page. This could complicate service discovery debugging. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5389). * FEATURE: reduce the default value for `-import.maxLineLen` command-line flag from 100MB to 10MB in order to prevent excessive memory usage during data import via [/api/v1/import](https://docs.victoriametrics.com/#how-to-import-data-in-json-line-format). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `keep_if_contains` and `drop_if_contains` relabeling actions. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling-enhancements) for details. * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [day_of_year()](https://docs.victoriametrics.com/MetricsQL.html#day_of_year) function, which returns the day of the year for each of the given unix timestamps. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5345) for details. Thanks to @luckyxiaoqiang for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5368/). diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 84ddf47d2..3830c24e4 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -1049,14 +1049,18 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel defer promutils.PutLabels(labels) mergeLabels(labels, swc, target, extraLabels, metaLabels) - var originalLabels *promutils.Labels - if !*dropOriginalLabels { - originalLabels = labels.Clone() - } + originalLabels := labels.Clone() labels.Labels = swc.relabelConfigs.Apply(labels.Labels, 0) // Remove labels starting from "__meta_" prefix according to https://www.robustperception.io/life-of-a-label/ labels.RemoveMetaLabels() + if labels.Len() == 0 { + // Drop target without labels. + originalLabels = sortOriginalLabelsIfNeeded(originalLabels) + droppedTargetsMap.Register(originalLabels, swc.relabelConfigs, targetDropReasonRelabeling) + return nil, nil + } + // Verify whether the scrape work must be skipped because of `-promscrape.cluster.*` configs. // Perform the verification on labels after the relabeling in order to guarantee that targets with the same set of labels // go to the same vmagent shard. @@ -1067,23 +1071,16 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel needSkip := needSkipScrapeWork(bytesutil.ToUnsafeString(bb.B), *clusterMembersCount, *clusterReplicationFactor, clusterMemberID) scrapeWorkKeyBufPool.Put(bb) if needSkip { + originalLabels = sortOriginalLabelsIfNeeded(originalLabels) + droppedTargetsMap.Register(originalLabels, swc.relabelConfigs, targetDropReasonSharding) return nil, nil } } - if !*dropOriginalLabels { - originalLabels.Sort() - // Reduce memory usage by interning all the strings in originalLabels. - originalLabels.InternStrings() - } - if labels.Len() == 0 { - // Drop target without labels. - droppedTargetsMap.Register(originalLabels, swc.relabelConfigs) - return nil, nil - } scrapeURL, address := promrelabel.GetScrapeURL(labels, swc.params) if scrapeURL == "" { // Drop target without URL. - droppedTargetsMap.Register(originalLabels, swc.relabelConfigs) + originalLabels = sortOriginalLabelsIfNeeded(originalLabels) + droppedTargetsMap.Register(originalLabels, swc.relabelConfigs, targetDropReasonMissingScrapeURL) return nil, nil } if _, err := url.Parse(scrapeURL); err != nil { @@ -1155,6 +1152,7 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel // Reduce memory usage by interning all the strings in labels. labelsCopy.InternStrings() + originalLabels = sortOriginalLabelsIfNeeded(originalLabels) sw := &ScrapeWork{ ScrapeURL: scrapeURL, ScrapeInterval: scrapeInterval, @@ -1185,6 +1183,16 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel return sw, nil } +func sortOriginalLabelsIfNeeded(originalLabels *promutils.Labels) *promutils.Labels { + if *dropOriginalLabels { + return nil + } + originalLabels.Sort() + // Reduce memory usage by interning all the strings in originalLabels. + originalLabels.InternStrings() + return originalLabels +} + func mergeLabels(dst *promutils.Labels, swc *scrapeWorkConfig, target string, extraLabels, metaLabels *promutils.Labels) { if n := dst.Len(); n > 0 { logger.Panicf("BUG: len(dst.Labels) must be 0; got %d", n) diff --git a/lib/promscrape/scraper.go b/lib/promscrape/scraper.go index 0880ac204..f61cc4758 100644 --- a/lib/promscrape/scraper.go +++ b/lib/promscrape/scraper.go @@ -370,7 +370,7 @@ func (sg *scraperGroup) update(sws []*ScrapeWork) { "original labels for target1: %s; original labels for target2: %s", sw.ScrapeURL, sw.Labels.String(), originalLabels.String(), sw.OriginalLabels.String()) } - droppedTargetsMap.Register(sw.OriginalLabels, sw.RelabelConfigs) + droppedTargetsMap.Register(sw.OriginalLabels, sw.RelabelConfigs, targetDropReasonDuplicate) continue } swsMap[key] = sw.OriginalLabels diff --git a/lib/promscrape/targetstatus.go b/lib/promscrape/targetstatus.go index de1823364..61845bd94 100644 --- a/lib/promscrape/targetstatus.go +++ b/lib/promscrape/targetstatus.go @@ -13,7 +13,6 @@ import ( "time" "unsafe" - "github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils" "github.com/cespare/xxhash/v2" @@ -248,17 +247,25 @@ func (ts *targetStatus) getDurationFromLastScrape() time.Duration { } type droppedTargets struct { - mu sync.Mutex - m map[uint64]droppedTarget - lastCleanupTime uint64 + mu sync.Mutex + m map[uint64]droppedTarget } type droppedTarget struct { originalLabels *promutils.Labels relabelConfigs *promrelabel.ParsedConfigs - deadline uint64 + dropReason targetDropReason } +type targetDropReason string + +const ( + targetDropReasonRelabeling = targetDropReason("relabeling") // target dropped because of relabeling + targetDropReasonMissingScrapeURL = targetDropReason("missing scrape URL") // target dropped because of missing scrape URL + targetDropReasonDuplicate = targetDropReason("duplicate") // target with the given set of labels already exists + targetDropReasonSharding = targetDropReason("sharding") // target is dropped becase of sharding https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets +) + func (dt *droppedTargets) getTargetsList() []droppedTarget { dt.mu.Lock() dts := make([]droppedTarget, 0, len(dt.m)) @@ -275,30 +282,30 @@ func (dt *droppedTargets) getTargetsList() []droppedTarget { return dts } -func (dt *droppedTargets) Register(originalLabels *promutils.Labels, relabelConfigs *promrelabel.ParsedConfigs) { - if *dropOriginalLabels { - // The originalLabels must be dropped, so do not register it. +// Register registers dropped target with the given originalLabels. +// +// The relabelConfigs must contain relabel configs, which were applied to originalLabels. +// The reason must contain the reason why the target has been dropped. +func (dt *droppedTargets) Register(originalLabels *promutils.Labels, relabelConfigs *promrelabel.ParsedConfigs, reason targetDropReason) { + if originalLabels == nil { + // Do not register target without originalLabels. This is the case when *dropOriginalLabels is set to true. return } // It is better to have hash collisions instead of spending additional CPU on originalLabels.String() call. key := labelsHash(originalLabels) - currentTime := fasttime.UnixTimestamp() dt.mu.Lock() - _, ok := dt.m[key] - if ok || len(dt.m) < *maxDroppedTargets { - dt.m[key] = droppedTarget{ - originalLabels: originalLabels, - relabelConfigs: relabelConfigs, - deadline: currentTime + 10*60, - } + dt.m[key] = droppedTarget{ + originalLabels: originalLabels, + relabelConfigs: relabelConfigs, + dropReason: reason, } - if currentTime-dt.lastCleanupTime > 60 { - for k, v := range dt.m { - if currentTime > v.deadline { - delete(dt.m, k) + if len(dt.m) >= *maxDroppedTargets { + for k := range dt.m { + delete(dt.m, k) + if len(dt.m) < *maxDroppedTargets { + break } } - dt.lastCleanupTime = currentTime } dt.mu.Unlock() } @@ -514,6 +521,7 @@ type targetLabels struct { up bool originalLabels *promutils.Labels labels *promutils.Labels + dropReason targetDropReason } type targetLabelsByJob struct { jobName string @@ -604,6 +612,7 @@ func (tsr *targetsStatusResult) getTargetLabelsByJob() []*targetLabelsByJob { m.droppedTargets++ m.targets = append(m.targets, targetLabels{ originalLabels: dt.originalLabels, + dropReason: dt.dropReason, }) } a := make([]*targetLabelsByJob, 0, len(byJob)) diff --git a/lib/promscrape/targetstatus.qtpl b/lib/promscrape/targetstatus.qtpl index 97f822add..0c767103a 100644 --- a/lib/promscrape/targetstatus.qtpl +++ b/lib/promscrape/targetstatus.qtpl @@ -336,7 +336,7 @@ {% elseif t.labels.Len() > 0 %} DOWN {% else %} - DROPPED + DROPPED ({%s string(t.dropReason) %}) {% endif %}