From 01f55bc66b3c5e4e600ffe8754b27041b2beda5b Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 7 Jul 2022 00:32:24 +0300 Subject: [PATCH] lib/promscrape/discovery/kubernetes: properly populate service-level labels for `role: endpointslice` targets Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2823 --- docs/CHANGELOG.md | 1 + .../discovery/kubernetes/endpoints_test.go | 2 +- .../discovery/kubernetes/endpointslice.go | 5 +- .../kubernetes/endpointslice_test.go | 278 ++++++++++++++++++ 4 files changed, 284 insertions(+), 2 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index b32de5d25..3c9dacbf9 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -58,6 +58,7 @@ scrape_configs: * BUGFIX: limit max memory occupied by the cache, which stores parsed regular expressions. Previously too long regular expressions passed in [MetricsQL queries](https://docs.victoriametrics.com/MetricsQL.html) could result in big amounts of used memory (e.g. multiple of gigabytes). Now the max cache size for parsed regexps is limited to a a few megabytes. * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle partial counter resets when calculating [rate](https://docs.victoriametrics.com/MetricsQL.html#rate), [irate](https://docs.victoriametrics.com/MetricsQL.html#irate) and [increase](https://docs.victoriametrics.com/MetricsQL.html#increase) functions. Previously these functions could return zero values after partial counter resets until the counter increases to the last value before partial counter reset. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787). * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly calculate [histogram_quantile](https://docs.victoriametrics.com/MetricsQL.html#histogram_quantile) over Prometheus buckets with unexpected values. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2819). +* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly add service-level labels (`__meta_kubernetes_service_*`) to discovered targets for `role: endpointslice` in [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config). Previously these labels were missing. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2823). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly reload changed `-promscrape.config` file when `-promscrape.configCheckInterval` option is set. The changed config file wasn't reloaded in this case since [v1.69.0](#v1690). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2786). Thanks to @ttyv for the fix. * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767). diff --git a/lib/promscrape/discovery/kubernetes/endpoints_test.go b/lib/promscrape/discovery/kubernetes/endpoints_test.go index 7f13fa75f..bbd7c296b 100644 --- a/lib/promscrape/discovery/kubernetes/endpoints_test.go +++ b/lib/promscrape/discovery/kubernetes/endpoints_test.go @@ -114,7 +114,7 @@ func TestParseEndpointsListSuccess(t *testing.T) { } } -func TestGetEndpointLabels(t *testing.T) { +func TestGetEndpointsLabels(t *testing.T) { type testArgs struct { containerPorts map[string][]ContainerPort endpointPorts []EndpointPort diff --git a/lib/promscrape/discovery/kubernetes/endpointslice.go b/lib/promscrape/discovery/kubernetes/endpointslice.go index 8e2244989..f4a2a07ac 100644 --- a/lib/promscrape/discovery/kubernetes/endpointslice.go +++ b/lib/promscrape/discovery/kubernetes/endpointslice.go @@ -38,8 +38,11 @@ func parseEndpointSlice(data []byte) (object, error) { // // See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#endpointslices func (eps *EndpointSlice) getTargetLabels(gw *groupWatcher) []map[string]string { + // The associated service name is stored in kubernetes.io/service-name label. + // See https://kubernetes.io/docs/reference/labels-annotations-taints/#kubernetesioservice-name + svcName := eps.Metadata.Labels.GetByName("kubernetes.io/service-name") var svc *Service - if o := gw.getObjectByRoleLocked("service", eps.Metadata.Namespace, eps.Metadata.Name); o != nil { + if o := gw.getObjectByRoleLocked("service", eps.Metadata.Namespace, svcName); o != nil { svc = o.(*Service) } podPortsSeen := make(map[*Pod][]int) diff --git a/lib/promscrape/discovery/kubernetes/endpointslice_test.go b/lib/promscrape/discovery/kubernetes/endpointslice_test.go index a09f15841..f6c65a094 100644 --- a/lib/promscrape/discovery/kubernetes/endpointslice_test.go +++ b/lib/promscrape/discovery/kubernetes/endpointslice_test.go @@ -224,3 +224,281 @@ func TestParseEndpointSliceListSuccess(t *testing.T) { } } + +func TestGetEndpointsliceLabels(t *testing.T) { + type testArgs struct { + containerPorts map[string][]ContainerPort + endpointPorts []EndpointPort + } + f := func(t *testing.T, args testArgs, wantLabels [][]prompbmarshal.Label) { + t.Helper() + eps := EndpointSlice{ + Metadata: ObjectMeta{ + Name: "test-eps", + Namespace: "default", + Labels: discoveryutils.GetSortedLabels(map[string]string{ + "kubernetes.io/service-name": "test-svc", + }), + }, + Endpoints: []Endpoint{ + { + Addresses: []string{ + "10.13.15.15", + }, + Conditions: EndpointConditions{ + Ready: true, + }, + Hostname: "foo.bar", + TargetRef: ObjectReference{ + Kind: "Pod", + Namespace: "default", + Name: "test-pod", + }, + Topology: map[string]string{ + "x": "y", + }, + }, + }, + AddressType: "foobar", + Ports: args.endpointPorts, + } + svc := Service{ + Metadata: ObjectMeta{ + Name: "test-svc", + Namespace: "default", + }, + Spec: ServiceSpec{ + ClusterIP: "1.2.3.4", + Type: "service-type", + Ports: []ServicePort{ + { + Name: "test-port", + Port: 8081, + }, + }, + }, + } + pod := Pod{ + Metadata: ObjectMeta{ + UID: "pod-uid", + Name: "test-pod", + Namespace: "default", + }, + Spec: PodSpec{ + NodeName: "test-node", + }, + Status: PodStatus{ + Phase: "abc", + PodIP: "192.168.15.1", + HostIP: "4.5.6.7", + }, + } + node := Node{ + Metadata: ObjectMeta{ + Labels: []prompbmarshal.Label{ + { + Name: "node-label", + Value: "xyz", + }, + }, + }, + } + for cn, ports := range args.containerPorts { + pod.Spec.Containers = append(pod.Spec.Containers, Container{Name: cn, Ports: ports}) + } + var gw groupWatcher + gw.m = map[string]*urlWatcher{ + "pod": { + role: "pod", + objectsByKey: map[string]object{ + "default/test-pod": &pod, + }, + }, + "service": { + role: "service", + objectsByKey: map[string]object{ + "default/test-svc": &svc, + }, + }, + "node": { + role: "node", + objectsByKey: map[string]object{ + "/test-node": &node, + }, + }, + } + gw.attachNodeMetadata = true + var sortedLabelss [][]prompbmarshal.Label + gotLabels := eps.getTargetLabels(&gw) + for _, lbs := range gotLabels { + sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(lbs)) + } + if !areEqualLabelss(sortedLabelss, wantLabels) { + t.Fatalf("unexpected labels:\ngot\n%v\nwant\n%v", sortedLabelss, wantLabels) + } + } + + t.Run("1 port from endpoint", func(t *testing.T) { + f(t, testArgs{ + endpointPorts: []EndpointPort{ + { + Name: "web", + Port: 8081, + Protocol: "foobar", + }, + }, + }, [][]prompbmarshal.Label{ + discoveryutils.GetSortedLabels(map[string]string{ + "__address__": "10.13.15.15:8081", + "__meta_kubernetes_endpointslice_address_target_kind": "Pod", + "__meta_kubernetes_endpointslice_address_target_name": "test-pod", + "__meta_kubernetes_endpointslice_address_type": "foobar", + "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", + "__meta_kubernetes_endpointslice_endpoint_hostname": "foo.bar", + "__meta_kubernetes_endpointslice_endpoint_topology_present_x": "true", + "__meta_kubernetes_endpointslice_endpoint_topology_x": "y", + "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "test-svc", + "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true", + "__meta_kubernetes_endpointslice_name": "test-eps", + "__meta_kubernetes_endpointslice_port": "8081", + "__meta_kubernetes_endpointslice_port_name": "web", + "__meta_kubernetes_endpointslice_port_protocol": "foobar", + "__meta_kubernetes_namespace": "default", + "__meta_kubernetes_node_label_node_label": "xyz", + "__meta_kubernetes_node_labelpresent_node_label": "true", + "__meta_kubernetes_node_name": "test-node", + "__meta_kubernetes_pod_host_ip": "4.5.6.7", + "__meta_kubernetes_pod_ip": "192.168.15.1", + "__meta_kubernetes_pod_name": "test-pod", + "__meta_kubernetes_pod_node_name": "test-node", + "__meta_kubernetes_pod_phase": "abc", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "pod-uid", + "__meta_kubernetes_service_cluster_ip": "1.2.3.4", + "__meta_kubernetes_service_name": "test-svc", + "__meta_kubernetes_service_type": "service-type", + }), + }) + }) + + t.Run("1 port from endpoint and 1 from pod", func(t *testing.T) { + f(t, testArgs{ + containerPorts: map[string][]ContainerPort{"metrics": {{ + Name: "http-metrics", + ContainerPort: 8428, + Protocol: "foobar", + }}}, + endpointPorts: []EndpointPort{ + { + Name: "web", + Port: 8081, + Protocol: "https", + }, + }, + }, [][]prompbmarshal.Label{ + discoveryutils.GetSortedLabels(map[string]string{ + "__address__": "10.13.15.15:8081", + "__meta_kubernetes_endpointslice_address_target_kind": "Pod", + "__meta_kubernetes_endpointslice_address_target_name": "test-pod", + "__meta_kubernetes_endpointslice_address_type": "foobar", + "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", + "__meta_kubernetes_endpointslice_endpoint_hostname": "foo.bar", + "__meta_kubernetes_endpointslice_endpoint_topology_present_x": "true", + "__meta_kubernetes_endpointslice_endpoint_topology_x": "y", + "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "test-svc", + "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true", + "__meta_kubernetes_endpointslice_name": "test-eps", + "__meta_kubernetes_endpointslice_port": "8081", + "__meta_kubernetes_endpointslice_port_name": "web", + "__meta_kubernetes_endpointslice_port_protocol": "https", + "__meta_kubernetes_namespace": "default", + "__meta_kubernetes_node_label_node_label": "xyz", + "__meta_kubernetes_node_labelpresent_node_label": "true", + "__meta_kubernetes_node_name": "test-node", + "__meta_kubernetes_pod_host_ip": "4.5.6.7", + "__meta_kubernetes_pod_ip": "192.168.15.1", + "__meta_kubernetes_pod_name": "test-pod", + "__meta_kubernetes_pod_node_name": "test-node", + "__meta_kubernetes_pod_phase": "abc", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "pod-uid", + "__meta_kubernetes_service_cluster_ip": "1.2.3.4", + "__meta_kubernetes_service_name": "test-svc", + "__meta_kubernetes_service_type": "service-type", + }), + discoveryutils.GetSortedLabels(map[string]string{ + "__address__": "192.168.15.1:8428", + "__meta_kubernetes_namespace": "default", + "__meta_kubernetes_node_label_node_label": "xyz", + "__meta_kubernetes_node_labelpresent_node_label": "true", + "__meta_kubernetes_node_name": "test-node", + "__meta_kubernetes_pod_container_name": "metrics", + "__meta_kubernetes_pod_container_port_name": "http-metrics", + "__meta_kubernetes_pod_container_port_number": "8428", + "__meta_kubernetes_pod_container_port_protocol": "foobar", + "__meta_kubernetes_pod_host_ip": "4.5.6.7", + "__meta_kubernetes_pod_ip": "192.168.15.1", + "__meta_kubernetes_pod_name": "test-pod", + "__meta_kubernetes_pod_node_name": "test-node", + "__meta_kubernetes_pod_phase": "abc", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "pod-uid", + "__meta_kubernetes_service_cluster_ip": "1.2.3.4", + "__meta_kubernetes_service_name": "test-svc", + "__meta_kubernetes_service_type": "service-type", + }), + }) + }) + + t.Run("1 port from endpoint", func(t *testing.T) { + f(t, testArgs{ + containerPorts: map[string][]ContainerPort{"metrics": {{ + Name: "web", + ContainerPort: 8428, + Protocol: "sdc", + }}}, + endpointPorts: []EndpointPort{ + { + Name: "web", + Port: 8428, + Protocol: "xabc", + }, + }, + }, [][]prompbmarshal.Label{ + discoveryutils.GetSortedLabels(map[string]string{ + "__address__": "10.13.15.15:8428", + "__meta_kubernetes_endpointslice_address_target_kind": "Pod", + "__meta_kubernetes_endpointslice_address_target_name": "test-pod", + "__meta_kubernetes_endpointslice_address_type": "foobar", + "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", + "__meta_kubernetes_endpointslice_endpoint_hostname": "foo.bar", + "__meta_kubernetes_endpointslice_endpoint_topology_present_x": "true", + "__meta_kubernetes_endpointslice_endpoint_topology_x": "y", + "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "test-svc", + "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true", + "__meta_kubernetes_endpointslice_name": "test-eps", + "__meta_kubernetes_endpointslice_port": "8428", + "__meta_kubernetes_endpointslice_port_name": "web", + "__meta_kubernetes_endpointslice_port_protocol": "xabc", + "__meta_kubernetes_namespace": "default", + "__meta_kubernetes_node_label_node_label": "xyz", + "__meta_kubernetes_node_labelpresent_node_label": "true", + "__meta_kubernetes_node_name": "test-node", + "__meta_kubernetes_pod_container_name": "metrics", + "__meta_kubernetes_pod_container_port_name": "web", + "__meta_kubernetes_pod_container_port_number": "8428", + "__meta_kubernetes_pod_container_port_protocol": "sdc", + "__meta_kubernetes_pod_host_ip": "4.5.6.7", + "__meta_kubernetes_pod_ip": "192.168.15.1", + "__meta_kubernetes_pod_name": "test-pod", + "__meta_kubernetes_pod_node_name": "test-node", + "__meta_kubernetes_pod_phase": "abc", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "pod-uid", + "__meta_kubernetes_service_cluster_ip": "1.2.3.4", + "__meta_kubernetes_service_name": "test-svc", + "__meta_kubernetes_service_type": "service-type", + }), + }) + }) +}