mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
lib/promscrape/discovery/kubernetes: do not generate targets for already terminated pods and containers
Already terminated pods and containers cannot be scraped and will never resurrect, so there is zero sense in creating scrape targets for them.
This commit is contained in:
parent
4d961c70f7
commit
ef12598ad4
5 changed files with 79 additions and 12 deletions
|
@ -78,6 +78,7 @@ The sandbox cluster installation is running under the constant load generated by
|
|||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): exit if there is config syntax error in [`scrape_config_files`](https://docs.victoriametrics.com/vmagent.html#loading-scrape-configs-from-multiple-files) when `-promscrape.config.strictParse=true`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5508).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly discover targets for `role: endpoints` and `role: endpointslice` in [kubernetes_sd_configs](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs). Previously some `endpoints` and `endpointslice` targets could be left undiscovered or some targets could have missing `__meta_*` labels when performing service discovery in busy Kubernetes clusters with large number of pods. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5557).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): respect explicitly set `series_limit: 0` in [scrape_config](https://docs.victoriametrics.com/sd_configs.html#scrape_configs). This allows removing [`series_limit` restriction](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter) on a per-`scrape_config` basis when global limit is set via `-promscrape.seriesLimitPerTarget`. Previously, `0` value was ignored in favor of `-promscrape.seriesLimitPerTarget`.
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not discover scrape targets for already terminated pods and containers in [`kubernetes_sd_configs`](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs). Such pods and containers cannot be scraped and cannot resurrect, so there is no sense in generating scrape targets for them.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix a link for the statistic inaccuracy explanation in the cardinality explorer tool. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5460).
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix the display of autocomplete results and cache the results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5472) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5470).
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): send `step` param for instant queries. The change reverts [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896) due to reasons explained in [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896#issuecomment-1896704401).
|
||||
|
|
|
@ -133,7 +133,7 @@ func (eps *Endpoints) getTargetLabels(gw *groupWatcher) []*promutils.Labels {
|
|||
m := promutils.GetLabels()
|
||||
m.Add("__address__", addr)
|
||||
p.appendCommonLabels(m, gw)
|
||||
p.appendContainerLabels(m, c, &cp)
|
||||
p.appendContainerLabels(m, &c, &cp)
|
||||
|
||||
// Prometheus sets endpoints_name and namespace labels for all endpoints
|
||||
// Even if port is not matching service port.
|
||||
|
@ -189,7 +189,7 @@ func getEndpointLabelsForAddressAndPort(gw *groupWatcher, podPortsSeen map[*Pod]
|
|||
for _, cp := range c.Ports {
|
||||
if cp.ContainerPort == epp.Port {
|
||||
podPortsSeen[p] = append(podPortsSeen[p], cp.ContainerPort)
|
||||
p.appendContainerLabels(m, c, &cp)
|
||||
p.appendContainerLabels(m, &c, &cp)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
|
|
@ -83,7 +83,7 @@ func (eps *EndpointSlice) getTargetLabels(gw *groupWatcher) []*promutils.Labels
|
|||
m := promutils.GetLabels()
|
||||
m.Add("__address__", addr)
|
||||
p.appendCommonLabels(m, gw)
|
||||
p.appendContainerLabels(m, c, &cp)
|
||||
p.appendContainerLabels(m, &c, &cp)
|
||||
|
||||
// Prometheus sets endpoints_name and namespace labels for all endpoints
|
||||
// Even if port is not matching service port.
|
||||
|
@ -127,7 +127,7 @@ func getEndpointSliceLabelsForAddressAndPort(gw *groupWatcher, podPortsSeen map[
|
|||
for _, cp := range c.Ports {
|
||||
if cp.ContainerPort == epp.Port {
|
||||
podPortsSeen[p] = append(podPortsSeen[p], cp.ContainerPort)
|
||||
p.appendContainerLabels(m, c, &cp)
|
||||
p.appendContainerLabels(m, &c, &cp)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
|
|
@ -104,19 +104,51 @@ type PodCondition struct {
|
|||
type ContainerStatus struct {
|
||||
Name string
|
||||
ContainerID string
|
||||
State ContainerState
|
||||
}
|
||||
|
||||
// ContainerState implements k8s container state.
|
||||
//
|
||||
// See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#containerstatus-v1-core
|
||||
type ContainerState struct {
|
||||
Terminated *ContainerStateTerminated
|
||||
}
|
||||
|
||||
// ContainerState implements k8s terminated container state.
|
||||
//
|
||||
// See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#containerstatus-v1-core
|
||||
type ContainerStateTerminated struct {
|
||||
ExitCode int
|
||||
}
|
||||
|
||||
func getContainerID(p *Pod, containerName string, isInit bool) string {
|
||||
cs := p.getContainerStatus(containerName, isInit)
|
||||
if cs == nil {
|
||||
return ""
|
||||
}
|
||||
return cs.ContainerID
|
||||
}
|
||||
|
||||
func isContainerTerminated(p *Pod, containerName string, isInit bool) bool {
|
||||
cs := p.getContainerStatus(containerName, isInit)
|
||||
if cs == nil {
|
||||
return false
|
||||
}
|
||||
return cs.State.Terminated != nil
|
||||
}
|
||||
|
||||
func (p *Pod) getContainerStatus(containerName string, isInit bool) *ContainerStatus {
|
||||
css := p.Status.ContainerStatuses
|
||||
if isInit {
|
||||
css = p.Status.InitContainerStatuses
|
||||
}
|
||||
for _, cs := range css {
|
||||
for i := range css {
|
||||
cs := &css[i]
|
||||
if cs.Name == containerName {
|
||||
return cs.ContainerID
|
||||
return cs
|
||||
}
|
||||
}
|
||||
return ""
|
||||
return nil
|
||||
}
|
||||
|
||||
// getTargetLabels returns labels for each port of the given p.
|
||||
|
@ -124,28 +156,42 @@ func getContainerID(p *Pod, containerName string, isInit bool) string {
|
|||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#pod
|
||||
func (p *Pod) getTargetLabels(gw *groupWatcher) []*promutils.Labels {
|
||||
if len(p.Status.PodIP) == 0 {
|
||||
// Skip pod without IP
|
||||
// Skip pod without IP, since such pods cannnot be scraped.
|
||||
return nil
|
||||
}
|
||||
if isPodPhaseFinished(p.Status.Phase) {
|
||||
// Skip already stopped pod, since it cannot be scraped.
|
||||
return nil
|
||||
}
|
||||
|
||||
var ms []*promutils.Labels
|
||||
ms = appendPodLabels(ms, gw, p, p.Spec.Containers, false)
|
||||
ms = appendPodLabels(ms, gw, p, p.Spec.InitContainers, true)
|
||||
return ms
|
||||
}
|
||||
|
||||
func isPodPhaseFinished(phase string) bool {
|
||||
// See https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase
|
||||
return phase == "Succeeded" || phase == "Failed"
|
||||
|
||||
}
|
||||
func appendPodLabels(ms []*promutils.Labels, gw *groupWatcher, p *Pod, cs []Container, isInit bool) []*promutils.Labels {
|
||||
for _, c := range cs {
|
||||
if isContainerTerminated(p, c.Name, isInit) {
|
||||
// Skip terminated containers
|
||||
continue
|
||||
}
|
||||
for _, cp := range c.Ports {
|
||||
ms = appendPodLabelsInternal(ms, gw, p, c, &cp, isInit)
|
||||
ms = appendPodLabelsInternal(ms, gw, p, &c, &cp, isInit)
|
||||
}
|
||||
if len(c.Ports) == 0 {
|
||||
ms = appendPodLabelsInternal(ms, gw, p, c, nil, isInit)
|
||||
ms = appendPodLabelsInternal(ms, gw, p, &c, nil, isInit)
|
||||
}
|
||||
}
|
||||
return ms
|
||||
}
|
||||
|
||||
func appendPodLabelsInternal(ms []*promutils.Labels, gw *groupWatcher, p *Pod, c Container, cp *ContainerPort, isInit bool) []*promutils.Labels {
|
||||
func appendPodLabelsInternal(ms []*promutils.Labels, gw *groupWatcher, p *Pod, c *Container, cp *ContainerPort, isInit bool) []*promutils.Labels {
|
||||
addr := p.Status.PodIP
|
||||
if cp != nil {
|
||||
addr = discoveryutils.JoinHostPort(addr, cp.ContainerPort)
|
||||
|
@ -168,7 +214,7 @@ func appendPodLabelsInternal(ms []*promutils.Labels, gw *groupWatcher, p *Pod, c
|
|||
return append(ms, m)
|
||||
}
|
||||
|
||||
func (p *Pod) appendContainerLabels(m *promutils.Labels, c Container, cp *ContainerPort) {
|
||||
func (p *Pod) appendContainerLabels(m *promutils.Labels, c *Container, cp *ContainerPort) {
|
||||
m.Add("__meta_kubernetes_pod_container_image", c.Image)
|
||||
m.Add("__meta_kubernetes_pod_container_name", c.Name)
|
||||
if cp != nil {
|
||||
|
|
|
@ -80,6 +80,17 @@ const testPodsList = `
|
|||
}
|
||||
],
|
||||
"containers": [
|
||||
{
|
||||
"name": "terminated-container",
|
||||
"image": "terminated-image",
|
||||
"ports": [
|
||||
{
|
||||
"name": "terminated-port",
|
||||
"containerPort": 4321,
|
||||
"protocol": "TCP"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "etcd",
|
||||
"image": "k8s.gcr.io/etcd:3.4.3-0",
|
||||
|
@ -197,6 +208,15 @@ const testPodsList = `
|
|||
],
|
||||
"startTime": "2020-03-20T13:30:29Z",
|
||||
"containerStatuses": [
|
||||
{
|
||||
"name": "terminated-container",
|
||||
"state": {
|
||||
"terminated": {
|
||||
"exitCode": 432
|
||||
}
|
||||
},
|
||||
"containerID": "terminated-container-id"
|
||||
},
|
||||
{
|
||||
"name": "etcd",
|
||||
"state": {
|
||||
|
|
Loading…
Reference in a new issue