lib/promscrape/discovery/kubernetes: properly populate service-level labels for role: endpointslice targets

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2823
This commit is contained in:
Aliaksandr Valialkin 2022-07-07 00:32:24 +03:00
parent b186b63e07
commit 01f55bc66b
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
4 changed files with 284 additions and 2 deletions

View file

@ -58,6 +58,7 @@ scrape_configs:
* BUGFIX: limit max memory occupied by the cache, which stores parsed regular expressions. Previously too long regular expressions passed in [MetricsQL queries](https://docs.victoriametrics.com/MetricsQL.html) could result in big amounts of used memory (e.g. multiple of gigabytes). Now the max cache size for parsed regexps is limited to a a few megabytes.
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle partial counter resets when calculating [rate](https://docs.victoriametrics.com/MetricsQL.html#rate), [irate](https://docs.victoriametrics.com/MetricsQL.html#irate) and [increase](https://docs.victoriametrics.com/MetricsQL.html#increase) functions. Previously these functions could return zero values after partial counter resets until the counter increases to the last value before partial counter reset. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787).
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly calculate [histogram_quantile](https://docs.victoriametrics.com/MetricsQL.html#histogram_quantile) over Prometheus buckets with unexpected values. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2819).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly add service-level labels (`__meta_kubernetes_service_*`) to discovered targets for `role: endpointslice` in [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config). Previously these labels were missing. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2823).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly reload changed `-promscrape.config` file when `-promscrape.configCheckInterval` option is set. The changed config file wasn't reloaded in this case since [v1.69.0](#v1690). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2786). Thanks to @ttyv for the fix.
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767).

View file

@ -114,7 +114,7 @@ func TestParseEndpointsListSuccess(t *testing.T) {
}
}
func TestGetEndpointLabels(t *testing.T) {
func TestGetEndpointsLabels(t *testing.T) {
type testArgs struct {
containerPorts map[string][]ContainerPort
endpointPorts []EndpointPort

View file

@ -38,8 +38,11 @@ func parseEndpointSlice(data []byte) (object, error) {
//
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#endpointslices
func (eps *EndpointSlice) getTargetLabels(gw *groupWatcher) []map[string]string {
// The associated service name is stored in kubernetes.io/service-name label.
// See https://kubernetes.io/docs/reference/labels-annotations-taints/#kubernetesioservice-name
svcName := eps.Metadata.Labels.GetByName("kubernetes.io/service-name")
var svc *Service
if o := gw.getObjectByRoleLocked("service", eps.Metadata.Namespace, eps.Metadata.Name); o != nil {
if o := gw.getObjectByRoleLocked("service", eps.Metadata.Namespace, svcName); o != nil {
svc = o.(*Service)
}
podPortsSeen := make(map[*Pod][]int)

View file

@ -224,3 +224,281 @@ func TestParseEndpointSliceListSuccess(t *testing.T) {
}
}
func TestGetEndpointsliceLabels(t *testing.T) {
type testArgs struct {
containerPorts map[string][]ContainerPort
endpointPorts []EndpointPort
}
f := func(t *testing.T, args testArgs, wantLabels [][]prompbmarshal.Label) {
t.Helper()
eps := EndpointSlice{
Metadata: ObjectMeta{
Name: "test-eps",
Namespace: "default",
Labels: discoveryutils.GetSortedLabels(map[string]string{
"kubernetes.io/service-name": "test-svc",
}),
},
Endpoints: []Endpoint{
{
Addresses: []string{
"10.13.15.15",
},
Conditions: EndpointConditions{
Ready: true,
},
Hostname: "foo.bar",
TargetRef: ObjectReference{
Kind: "Pod",
Namespace: "default",
Name: "test-pod",
},
Topology: map[string]string{
"x": "y",
},
},
},
AddressType: "foobar",
Ports: args.endpointPorts,
}
svc := Service{
Metadata: ObjectMeta{
Name: "test-svc",
Namespace: "default",
},
Spec: ServiceSpec{
ClusterIP: "1.2.3.4",
Type: "service-type",
Ports: []ServicePort{
{
Name: "test-port",
Port: 8081,
},
},
},
}
pod := Pod{
Metadata: ObjectMeta{
UID: "pod-uid",
Name: "test-pod",
Namespace: "default",
},
Spec: PodSpec{
NodeName: "test-node",
},
Status: PodStatus{
Phase: "abc",
PodIP: "192.168.15.1",
HostIP: "4.5.6.7",
},
}
node := Node{
Metadata: ObjectMeta{
Labels: []prompbmarshal.Label{
{
Name: "node-label",
Value: "xyz",
},
},
},
}
for cn, ports := range args.containerPorts {
pod.Spec.Containers = append(pod.Spec.Containers, Container{Name: cn, Ports: ports})
}
var gw groupWatcher
gw.m = map[string]*urlWatcher{
"pod": {
role: "pod",
objectsByKey: map[string]object{
"default/test-pod": &pod,
},
},
"service": {
role: "service",
objectsByKey: map[string]object{
"default/test-svc": &svc,
},
},
"node": {
role: "node",
objectsByKey: map[string]object{
"/test-node": &node,
},
},
}
gw.attachNodeMetadata = true
var sortedLabelss [][]prompbmarshal.Label
gotLabels := eps.getTargetLabels(&gw)
for _, lbs := range gotLabels {
sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(lbs))
}
if !areEqualLabelss(sortedLabelss, wantLabels) {
t.Fatalf("unexpected labels:\ngot\n%v\nwant\n%v", sortedLabelss, wantLabels)
}
}
t.Run("1 port from endpoint", func(t *testing.T) {
f(t, testArgs{
endpointPorts: []EndpointPort{
{
Name: "web",
Port: 8081,
Protocol: "foobar",
},
},
}, [][]prompbmarshal.Label{
discoveryutils.GetSortedLabels(map[string]string{
"__address__": "10.13.15.15:8081",
"__meta_kubernetes_endpointslice_address_target_kind": "Pod",
"__meta_kubernetes_endpointslice_address_target_name": "test-pod",
"__meta_kubernetes_endpointslice_address_type": "foobar",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_hostname": "foo.bar",
"__meta_kubernetes_endpointslice_endpoint_topology_present_x": "true",
"__meta_kubernetes_endpointslice_endpoint_topology_x": "y",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "test-svc",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_name": "test-eps",
"__meta_kubernetes_endpointslice_port": "8081",
"__meta_kubernetes_endpointslice_port_name": "web",
"__meta_kubernetes_endpointslice_port_protocol": "foobar",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_node_label_node_label": "xyz",
"__meta_kubernetes_node_labelpresent_node_label": "true",
"__meta_kubernetes_node_name": "test-node",
"__meta_kubernetes_pod_host_ip": "4.5.6.7",
"__meta_kubernetes_pod_ip": "192.168.15.1",
"__meta_kubernetes_pod_name": "test-pod",
"__meta_kubernetes_pod_node_name": "test-node",
"__meta_kubernetes_pod_phase": "abc",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "pod-uid",
"__meta_kubernetes_service_cluster_ip": "1.2.3.4",
"__meta_kubernetes_service_name": "test-svc",
"__meta_kubernetes_service_type": "service-type",
}),
})
})
t.Run("1 port from endpoint and 1 from pod", func(t *testing.T) {
f(t, testArgs{
containerPorts: map[string][]ContainerPort{"metrics": {{
Name: "http-metrics",
ContainerPort: 8428,
Protocol: "foobar",
}}},
endpointPorts: []EndpointPort{
{
Name: "web",
Port: 8081,
Protocol: "https",
},
},
}, [][]prompbmarshal.Label{
discoveryutils.GetSortedLabels(map[string]string{
"__address__": "10.13.15.15:8081",
"__meta_kubernetes_endpointslice_address_target_kind": "Pod",
"__meta_kubernetes_endpointslice_address_target_name": "test-pod",
"__meta_kubernetes_endpointslice_address_type": "foobar",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_hostname": "foo.bar",
"__meta_kubernetes_endpointslice_endpoint_topology_present_x": "true",
"__meta_kubernetes_endpointslice_endpoint_topology_x": "y",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "test-svc",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_name": "test-eps",
"__meta_kubernetes_endpointslice_port": "8081",
"__meta_kubernetes_endpointslice_port_name": "web",
"__meta_kubernetes_endpointslice_port_protocol": "https",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_node_label_node_label": "xyz",
"__meta_kubernetes_node_labelpresent_node_label": "true",
"__meta_kubernetes_node_name": "test-node",
"__meta_kubernetes_pod_host_ip": "4.5.6.7",
"__meta_kubernetes_pod_ip": "192.168.15.1",
"__meta_kubernetes_pod_name": "test-pod",
"__meta_kubernetes_pod_node_name": "test-node",
"__meta_kubernetes_pod_phase": "abc",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "pod-uid",
"__meta_kubernetes_service_cluster_ip": "1.2.3.4",
"__meta_kubernetes_service_name": "test-svc",
"__meta_kubernetes_service_type": "service-type",
}),
discoveryutils.GetSortedLabels(map[string]string{
"__address__": "192.168.15.1:8428",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_node_label_node_label": "xyz",
"__meta_kubernetes_node_labelpresent_node_label": "true",
"__meta_kubernetes_node_name": "test-node",
"__meta_kubernetes_pod_container_name": "metrics",
"__meta_kubernetes_pod_container_port_name": "http-metrics",
"__meta_kubernetes_pod_container_port_number": "8428",
"__meta_kubernetes_pod_container_port_protocol": "foobar",
"__meta_kubernetes_pod_host_ip": "4.5.6.7",
"__meta_kubernetes_pod_ip": "192.168.15.1",
"__meta_kubernetes_pod_name": "test-pod",
"__meta_kubernetes_pod_node_name": "test-node",
"__meta_kubernetes_pod_phase": "abc",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "pod-uid",
"__meta_kubernetes_service_cluster_ip": "1.2.3.4",
"__meta_kubernetes_service_name": "test-svc",
"__meta_kubernetes_service_type": "service-type",
}),
})
})
t.Run("1 port from endpoint", func(t *testing.T) {
f(t, testArgs{
containerPorts: map[string][]ContainerPort{"metrics": {{
Name: "web",
ContainerPort: 8428,
Protocol: "sdc",
}}},
endpointPorts: []EndpointPort{
{
Name: "web",
Port: 8428,
Protocol: "xabc",
},
},
}, [][]prompbmarshal.Label{
discoveryutils.GetSortedLabels(map[string]string{
"__address__": "10.13.15.15:8428",
"__meta_kubernetes_endpointslice_address_target_kind": "Pod",
"__meta_kubernetes_endpointslice_address_target_name": "test-pod",
"__meta_kubernetes_endpointslice_address_type": "foobar",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_hostname": "foo.bar",
"__meta_kubernetes_endpointslice_endpoint_topology_present_x": "true",
"__meta_kubernetes_endpointslice_endpoint_topology_x": "y",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "test-svc",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_name": "test-eps",
"__meta_kubernetes_endpointslice_port": "8428",
"__meta_kubernetes_endpointslice_port_name": "web",
"__meta_kubernetes_endpointslice_port_protocol": "xabc",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_node_label_node_label": "xyz",
"__meta_kubernetes_node_labelpresent_node_label": "true",
"__meta_kubernetes_node_name": "test-node",
"__meta_kubernetes_pod_container_name": "metrics",
"__meta_kubernetes_pod_container_port_name": "web",
"__meta_kubernetes_pod_container_port_number": "8428",
"__meta_kubernetes_pod_container_port_protocol": "sdc",
"__meta_kubernetes_pod_host_ip": "4.5.6.7",
"__meta_kubernetes_pod_ip": "192.168.15.1",
"__meta_kubernetes_pod_name": "test-pod",
"__meta_kubernetes_pod_node_name": "test-node",
"__meta_kubernetes_pod_phase": "abc",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "pod-uid",
"__meta_kubernetes_service_cluster_ip": "1.2.3.4",
"__meta_kubernetes_service_name": "test-svc",
"__meta_kubernetes_service_type": "service-type",
}),
})
})
}