lib/promscrape/discovery/kubernetes: do not generate targets for already terminated pods and containers

Already terminated pods and containers cannot be scraped and will never resurrect,
so there is zero sense in creating scrape targets for them.
This commit is contained in:
Aliaksandr Valialkin 2024-01-24 14:52:12 +02:00
parent 4d961c70f7
commit ef12598ad4
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
5 changed files with 79 additions and 12 deletions

View file

@ -78,6 +78,7 @@ The sandbox cluster installation is running under the constant load generated by
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): exit if there is config syntax error in [`scrape_config_files`](https://docs.victoriametrics.com/vmagent.html#loading-scrape-configs-from-multiple-files) when `-promscrape.config.strictParse=true`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5508).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly discover targets for `role: endpoints` and `role: endpointslice` in [kubernetes_sd_configs](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs). Previously some `endpoints` and `endpointslice` targets could be left undiscovered or some targets could have missing `__meta_*` labels when performing service discovery in busy Kubernetes clusters with large number of pods. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5557).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): respect explicitly set `series_limit: 0` in [scrape_config](https://docs.victoriametrics.com/sd_configs.html#scrape_configs). This allows removing [`series_limit` restriction](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter) on a per-`scrape_config` basis when global limit is set via `-promscrape.seriesLimitPerTarget`. Previously, `0` value was ignored in favor of `-promscrape.seriesLimitPerTarget`.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not discover scrape targets for already terminated pods and containers in [`kubernetes_sd_configs`](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs). Such pods and containers cannot be scraped and cannot resurrect, so there is no sense in generating scrape targets for them.
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix a link for the statistic inaccuracy explanation in the cardinality explorer tool. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5460).
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix the display of autocomplete results and cache the results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5472) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5470).
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): send `step` param for instant queries. The change reverts [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896) due to reasons explained in [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896#issuecomment-1896704401).

View file

@ -133,7 +133,7 @@ func (eps *Endpoints) getTargetLabels(gw *groupWatcher) []*promutils.Labels {
m := promutils.GetLabels()
m.Add("__address__", addr)
p.appendCommonLabels(m, gw)
p.appendContainerLabels(m, c, &cp)
p.appendContainerLabels(m, &c, &cp)
// Prometheus sets endpoints_name and namespace labels for all endpoints
// Even if port is not matching service port.
@ -189,7 +189,7 @@ func getEndpointLabelsForAddressAndPort(gw *groupWatcher, podPortsSeen map[*Pod]
for _, cp := range c.Ports {
if cp.ContainerPort == epp.Port {
podPortsSeen[p] = append(podPortsSeen[p], cp.ContainerPort)
p.appendContainerLabels(m, c, &cp)
p.appendContainerLabels(m, &c, &cp)
break
}
}

View file

@ -83,7 +83,7 @@ func (eps *EndpointSlice) getTargetLabels(gw *groupWatcher) []*promutils.Labels
m := promutils.GetLabels()
m.Add("__address__", addr)
p.appendCommonLabels(m, gw)
p.appendContainerLabels(m, c, &cp)
p.appendContainerLabels(m, &c, &cp)
// Prometheus sets endpoints_name and namespace labels for all endpoints
// Even if port is not matching service port.
@ -127,7 +127,7 @@ func getEndpointSliceLabelsForAddressAndPort(gw *groupWatcher, podPortsSeen map[
for _, cp := range c.Ports {
if cp.ContainerPort == epp.Port {
podPortsSeen[p] = append(podPortsSeen[p], cp.ContainerPort)
p.appendContainerLabels(m, c, &cp)
p.appendContainerLabels(m, &c, &cp)
break
}
}

View file

@ -104,19 +104,51 @@ type PodCondition struct {
type ContainerStatus struct {
Name string
ContainerID string
State ContainerState
}
// ContainerState implements k8s container state.
//
// See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#containerstatus-v1-core
type ContainerState struct {
Terminated *ContainerStateTerminated
}
// ContainerState implements k8s terminated container state.
//
// See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#containerstatus-v1-core
type ContainerStateTerminated struct {
ExitCode int
}
func getContainerID(p *Pod, containerName string, isInit bool) string {
cs := p.getContainerStatus(containerName, isInit)
if cs == nil {
return ""
}
return cs.ContainerID
}
func isContainerTerminated(p *Pod, containerName string, isInit bool) bool {
cs := p.getContainerStatus(containerName, isInit)
if cs == nil {
return false
}
return cs.State.Terminated != nil
}
func (p *Pod) getContainerStatus(containerName string, isInit bool) *ContainerStatus {
css := p.Status.ContainerStatuses
if isInit {
css = p.Status.InitContainerStatuses
}
for _, cs := range css {
for i := range css {
cs := &css[i]
if cs.Name == containerName {
return cs.ContainerID
return cs
}
}
return ""
return nil
}
// getTargetLabels returns labels for each port of the given p.
@ -124,28 +156,42 @@ func getContainerID(p *Pod, containerName string, isInit bool) string {
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#pod
func (p *Pod) getTargetLabels(gw *groupWatcher) []*promutils.Labels {
if len(p.Status.PodIP) == 0 {
// Skip pod without IP
// Skip pod without IP, since such pods cannnot be scraped.
return nil
}
if isPodPhaseFinished(p.Status.Phase) {
// Skip already stopped pod, since it cannot be scraped.
return nil
}
var ms []*promutils.Labels
ms = appendPodLabels(ms, gw, p, p.Spec.Containers, false)
ms = appendPodLabels(ms, gw, p, p.Spec.InitContainers, true)
return ms
}
func isPodPhaseFinished(phase string) bool {
// See https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase
return phase == "Succeeded" || phase == "Failed"
}
func appendPodLabels(ms []*promutils.Labels, gw *groupWatcher, p *Pod, cs []Container, isInit bool) []*promutils.Labels {
for _, c := range cs {
if isContainerTerminated(p, c.Name, isInit) {
// Skip terminated containers
continue
}
for _, cp := range c.Ports {
ms = appendPodLabelsInternal(ms, gw, p, c, &cp, isInit)
ms = appendPodLabelsInternal(ms, gw, p, &c, &cp, isInit)
}
if len(c.Ports) == 0 {
ms = appendPodLabelsInternal(ms, gw, p, c, nil, isInit)
ms = appendPodLabelsInternal(ms, gw, p, &c, nil, isInit)
}
}
return ms
}
func appendPodLabelsInternal(ms []*promutils.Labels, gw *groupWatcher, p *Pod, c Container, cp *ContainerPort, isInit bool) []*promutils.Labels {
func appendPodLabelsInternal(ms []*promutils.Labels, gw *groupWatcher, p *Pod, c *Container, cp *ContainerPort, isInit bool) []*promutils.Labels {
addr := p.Status.PodIP
if cp != nil {
addr = discoveryutils.JoinHostPort(addr, cp.ContainerPort)
@ -168,7 +214,7 @@ func appendPodLabelsInternal(ms []*promutils.Labels, gw *groupWatcher, p *Pod, c
return append(ms, m)
}
func (p *Pod) appendContainerLabels(m *promutils.Labels, c Container, cp *ContainerPort) {
func (p *Pod) appendContainerLabels(m *promutils.Labels, c *Container, cp *ContainerPort) {
m.Add("__meta_kubernetes_pod_container_image", c.Image)
m.Add("__meta_kubernetes_pod_container_name", c.Name)
if cp != nil {

View file

@ -80,6 +80,17 @@ const testPodsList = `
}
],
"containers": [
{
"name": "terminated-container",
"image": "terminated-image",
"ports": [
{
"name": "terminated-port",
"containerPort": 4321,
"protocol": "TCP"
}
]
},
{
"name": "etcd",
"image": "k8s.gcr.io/etcd:3.4.3-0",
@ -197,6 +208,15 @@ const testPodsList = `
],
"startTime": "2020-03-20T13:30:29Z",
"containerStatuses": [
{
"name": "terminated-container",
"state": {
"terminated": {
"exitCode": 432
}
},
"containerID": "terminated-container-id"
},
{
"name": "etcd",
"state": {