diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index afecb124c3..c2d858f453 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -7,6 +7,7 @@ sort: 15 ## tip * FEATURE: add `present_over_time(m[d])` function, which returns 1 if `m` has a least a single sample over the previous duration `d`. This function has been added also to [Prometheus 2.29](https://github.com/prometheus/prometheus/releases/tag/v2.29.0-rc.0). +* FEATURE: vmagent: add `__meta_ec2_availability_zone_id` label, which has been introduced in [Prometheus 2.29](https://github.com/prometheus/prometheus/releases/tag/v2.29.0-rc.0). * FEATURE: add `-search.maxSamplesPerSeries` command-line flag for limiting the number of raw samples a single query can process per each time series. This option can protect from out of memory errors when a query processes tens of millions of raw samples per series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1067). * FEATURE: add `-search.maxSamplesPerQuery` command-line flag for limiting the number of raw samples a single query can process across all the time series. This option can protect from heavy queries, which select too big number of raw samples. Thanks to @jiangxinlingdu for [the initial pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1478). * FEATURE: improve performance for queries that process big number of time series and/or samples on systems with big number of CPU cores. diff --git a/lib/promscrape/discovery/ec2/api.go b/lib/promscrape/discovery/ec2/api.go index 9a13dcb9d5..e72c702239 100644 --- a/lib/promscrape/discovery/ec2/api.go +++ b/lib/promscrape/discovery/ec2/api.go @@ -33,6 +33,10 @@ type apiConfig struct { // Real credentials used for accessing EC2 API. creds *apiCredentials credsLock sync.Mutex + + // A map from AZ name to AZ id. + azMap map[string]string + azMapLock sync.Mutex } // apiCredentials represents aws api credentials diff --git a/lib/promscrape/discovery/ec2/instance.go b/lib/promscrape/discovery/ec2/instance.go index 45ccfb0cfe..37425e2d2e 100644 --- a/lib/promscrape/discovery/ec2/instance.go +++ b/lib/promscrape/discovery/ec2/instance.go @@ -5,6 +5,7 @@ import ( "fmt" "strings" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" ) @@ -14,10 +15,11 @@ func getInstancesLabels(cfg *apiConfig) ([]map[string]string, error) { if err != nil { return nil, err } + azMap := getAZMap(cfg) var ms []map[string]string for _, r := range rs { for _, inst := range r.InstanceSet.Items { - ms = inst.appendTargetLabels(ms, r.OwnerID, cfg.port) + ms = inst.appendTargetLabels(ms, r.OwnerID, cfg.port, azMap) } } return ms, nil @@ -25,11 +27,10 @@ func getInstancesLabels(cfg *apiConfig) ([]map[string]string, error) { func getReservations(cfg *apiConfig) ([]Reservation, error) { // See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html - action := "DescribeInstances" var rs []Reservation pageToken := "" for { - data, err := getEC2APIResponse(cfg, action, pageToken) + data, err := getEC2APIResponse(cfg, "DescribeInstances", pageToken) if err != nil { return nil, fmt.Errorf("cannot obtain instances: %w", err) } @@ -132,29 +133,87 @@ func parseInstancesResponse(data []byte) (*InstancesResponse, error) { return &v, nil } -func (inst *Instance) appendTargetLabels(ms []map[string]string, ownerID string, port int) []map[string]string { +func getAZMap(cfg *apiConfig) map[string]string { + cfg.azMapLock.Lock() + defer cfg.azMapLock.Unlock() + + if cfg.azMap != nil { + return cfg.azMap + } + + azs, err := getAvailabilityZones(cfg) + cfg.azMap = make(map[string]string, len(azs)) + if err != nil { + logger.Warnf("couldn't load availability zones map, so __meta_ec2_availability_zone_id label isn't set: %s", err) + return cfg.azMap + } + for _, az := range azs { + cfg.azMap[az.ZoneName] = az.ZoneID + } + return cfg.azMap +} + +func getAvailabilityZones(cfg *apiConfig) ([]AvailabilityZone, error) { + // See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeAvailabilityZones.html + data, err := getEC2APIResponse(cfg, "DescribeAvailabilityZones", "") + if err != nil { + return nil, fmt.Errorf("cannot obtain availability zones: %w", err) + } + azr, err := parseAvailabilityZonesResponse(data) + if err != nil { + return nil, fmt.Errorf("cannot parse availability zones list: %w", err) + } + return azr.AvailabilityZoneInfo.Items, nil +} + +// AvailabilityZonesResponse represents the response for https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeAvailabilityZones.html +type AvailabilityZonesResponse struct { + AvailabilityZoneInfo AvailabilityZoneInfo `xml:"availabilityZoneInfo"` +} + +// AvailabilityZoneInfo represents availabilityZoneInfo for https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeAvailabilityZones.html +type AvailabilityZoneInfo struct { + Items []AvailabilityZone `xml:"item"` +} + +// AvailabilityZone represents availabilityZone for https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_AvailabilityZone.html +type AvailabilityZone struct { + ZoneName string `xml:"zoneName"` + ZoneID string `xml:"zoneId"` +} + +func parseAvailabilityZonesResponse(data []byte) (*AvailabilityZonesResponse, error) { + var v AvailabilityZonesResponse + if err := xml.Unmarshal(data, &v); err != nil { + return nil, fmt.Errorf("cannot unmarshal DescribeAvailabilityZonesResponse from %q: %w", data, err) + } + return &v, nil +} + +func (inst *Instance) appendTargetLabels(ms []map[string]string, ownerID string, port int, azMap map[string]string) []map[string]string { if len(inst.PrivateIPAddress) == 0 { // Cannot scrape instance without private IP address return ms } addr := discoveryutils.JoinHostPort(inst.PrivateIPAddress, port) m := map[string]string{ - "__address__": addr, - "__meta_ec2_architecture": inst.Architecture, - "__meta_ec2_ami": inst.ImageID, - "__meta_ec2_availability_zone": inst.Placement.AvailabilityZone, - "__meta_ec2_instance_id": inst.ID, - "__meta_ec2_instance_lifecycle": inst.Lifecycle, - "__meta_ec2_instance_state": inst.State.Name, - "__meta_ec2_instance_type": inst.Type, - "__meta_ec2_owner_id": ownerID, - "__meta_ec2_platform": inst.Platform, - "__meta_ec2_primary_subnet_id": inst.SubnetID, - "__meta_ec2_private_dns_name": inst.PrivateDNSName, - "__meta_ec2_private_ip": inst.PrivateIPAddress, - "__meta_ec2_public_dns_name": inst.PublicDNSName, - "__meta_ec2_public_ip": inst.PublicIPAddress, - "__meta_ec2_vpc_id": inst.VPCID, + "__address__": addr, + "__meta_ec2_architecture": inst.Architecture, + "__meta_ec2_ami": inst.ImageID, + "__meta_ec2_availability_zone": inst.Placement.AvailabilityZone, + "__meta_ec2_availability_zone_id": azMap[inst.Placement.AvailabilityZone], + "__meta_ec2_instance_id": inst.ID, + "__meta_ec2_instance_lifecycle": inst.Lifecycle, + "__meta_ec2_instance_state": inst.State.Name, + "__meta_ec2_instance_type": inst.Type, + "__meta_ec2_owner_id": ownerID, + "__meta_ec2_platform": inst.Platform, + "__meta_ec2_primary_subnet_id": inst.SubnetID, + "__meta_ec2_private_dns_name": inst.PrivateDNSName, + "__meta_ec2_private_ip": inst.PrivateIPAddress, + "__meta_ec2_public_dns_name": inst.PublicDNSName, + "__meta_ec2_public_ip": inst.PublicIPAddress, + "__meta_ec2_vpc_id": inst.VPCID, } if len(inst.VPCID) > 0 { subnets := make([]string, 0, len(inst.NetworkInterfaceSet.Items)) diff --git a/lib/promscrape/discovery/ec2/instance_test.go b/lib/promscrape/discovery/ec2/instance_test.go index a193292646..a8441a96cb 100644 --- a/lib/promscrape/discovery/ec2/instance_test.go +++ b/lib/promscrape/discovery/ec2/instance_test.go @@ -8,6 +8,56 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" ) +func TestDescribeAvailabilityZonesResponse(t *testing.T) { + data := ` + e23c5a54-a29c-43ee-8b55-0c13c26e9e01 + + + opt-in-not-required + us-west-2a + usw2-az1 + available + us-west-2 + + us-west-2-lax-1 + + + us-west-2 + opt-in-not-required + us-west-2b + usw2-az2 + available + us-west-2 + + us-west-2-lax-1 + + + +` + + azr, err := parseAvailabilityZonesResponse([]byte(data)) + if err != nil { + t.Fatalf("unexpected error when parsing data: %s", err) + } + azrExpected := &AvailabilityZonesResponse{ + AvailabilityZoneInfo: AvailabilityZoneInfo{ + Items: []AvailabilityZone{ + { + ZoneName: "us-west-2a", + ZoneID: "usw2-az1", + }, + { + ZoneName: "us-west-2b", + ZoneID: "usw2-az2", + }, + }, + }, + } + if !reflect.DeepEqual(azr, azrExpected) { + t.Fatalf("unexpected DescribeAvailabilityZonesResponse parsed;\ngot\n%+v\nwant\n%+v", azr, azrExpected) + } +} + func TestParseInstancesResponse(t *testing.T) { data := ` @@ -188,31 +238,34 @@ func TestParseInstancesResponse(t *testing.T) { ownerID := rs.OwnerID port := 423 inst := rs.InstanceSet.Items[0] - labelss := inst.appendTargetLabels(nil, ownerID, port) + labelss := inst.appendTargetLabels(nil, ownerID, port, map[string]string{ + "eu-west-2c": "foobar-zone", + }) var sortedLabelss [][]prompbmarshal.Label for _, labels := range labelss { sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(labels)) } expectedLabels := [][]prompbmarshal.Label{ discoveryutils.GetSortedLabels(map[string]string{ - "__address__": "172.31.11.152:423", - "__meta_ec2_architecture": "x86_64", - "__meta_ec2_availability_zone": "eu-west-2c", - "__meta_ec2_ami": "ami-0eb89db7593b5d434", - "__meta_ec2_instance_id": "i-0e730b692d9c15460", - "__meta_ec2_instance_lifecycle": "spot", - "__meta_ec2_instance_state": "running", - "__meta_ec2_instance_type": "t2.micro", - "__meta_ec2_owner_id": "793614593844", - "__meta_ec2_platform": "windows", - "__meta_ec2_primary_subnet_id": "subnet-57044c3e", - "__meta_ec2_private_dns_name": "ip-172-31-11-152.eu-west-2.compute.internal", - "__meta_ec2_private_ip": "172.31.11.152", - "__meta_ec2_public_dns_name": "ec2-3-8-232-141.eu-west-2.compute.amazonaws.com", - "__meta_ec2_public_ip": "3.8.232.141", - "__meta_ec2_subnet_id": ",subnet-57044c3e,", - "__meta_ec2_tag_foo": "bar", - "__meta_ec2_vpc_id": "vpc-f1eaad99", + "__address__": "172.31.11.152:423", + "__meta_ec2_architecture": "x86_64", + "__meta_ec2_availability_zone": "eu-west-2c", + "__meta_ec2_availability_zone_id": "foobar-zone", + "__meta_ec2_ami": "ami-0eb89db7593b5d434", + "__meta_ec2_instance_id": "i-0e730b692d9c15460", + "__meta_ec2_instance_lifecycle": "spot", + "__meta_ec2_instance_state": "running", + "__meta_ec2_instance_type": "t2.micro", + "__meta_ec2_owner_id": "793614593844", + "__meta_ec2_platform": "windows", + "__meta_ec2_primary_subnet_id": "subnet-57044c3e", + "__meta_ec2_private_dns_name": "ip-172-31-11-152.eu-west-2.compute.internal", + "__meta_ec2_private_ip": "172.31.11.152", + "__meta_ec2_public_dns_name": "ec2-3-8-232-141.eu-west-2.compute.amazonaws.com", + "__meta_ec2_public_ip": "3.8.232.141", + "__meta_ec2_subnet_id": ",subnet-57044c3e,", + "__meta_ec2_tag_foo": "bar", + "__meta_ec2_vpc_id": "vpc-f1eaad99", }), } if !reflect.DeepEqual(sortedLabelss, expectedLabels) {