lib/promscrape/discovery/ec2: add __meta_ec2_availability_zone_id label as Prometheus 2.29 does

This commit is contained in:
Aliaksandr Valialkin 2021-08-03 13:26:28 +03:00
parent 60cfa5f100
commit 336a2aa2e0
4 changed files with 156 additions and 39 deletions

View file

@ -7,6 +7,7 @@ sort: 15
## tip ## tip
* FEATURE: add `present_over_time(m[d])` function, which returns 1 if `m` has a least a single sample over the previous duration `d`. This function has been added also to [Prometheus 2.29](https://github.com/prometheus/prometheus/releases/tag/v2.29.0-rc.0). * FEATURE: add `present_over_time(m[d])` function, which returns 1 if `m` has a least a single sample over the previous duration `d`. This function has been added also to [Prometheus 2.29](https://github.com/prometheus/prometheus/releases/tag/v2.29.0-rc.0).
* FEATURE: vmagent: add `__meta_ec2_availability_zone_id` label, which has been introduced in [Prometheus 2.29](https://github.com/prometheus/prometheus/releases/tag/v2.29.0-rc.0).
* FEATURE: add `-search.maxSamplesPerSeries` command-line flag for limiting the number of raw samples a single query can process per each time series. This option can protect from out of memory errors when a query processes tens of millions of raw samples per series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1067). * FEATURE: add `-search.maxSamplesPerSeries` command-line flag for limiting the number of raw samples a single query can process per each time series. This option can protect from out of memory errors when a query processes tens of millions of raw samples per series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1067).
* FEATURE: add `-search.maxSamplesPerQuery` command-line flag for limiting the number of raw samples a single query can process across all the time series. This option can protect from heavy queries, which select too big number of raw samples. Thanks to @jiangxinlingdu for [the initial pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1478). * FEATURE: add `-search.maxSamplesPerQuery` command-line flag for limiting the number of raw samples a single query can process across all the time series. This option can protect from heavy queries, which select too big number of raw samples. Thanks to @jiangxinlingdu for [the initial pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1478).
* FEATURE: improve performance for queries that process big number of time series and/or samples on systems with big number of CPU cores. * FEATURE: improve performance for queries that process big number of time series and/or samples on systems with big number of CPU cores.

View file

@ -33,6 +33,10 @@ type apiConfig struct {
// Real credentials used for accessing EC2 API. // Real credentials used for accessing EC2 API.
creds *apiCredentials creds *apiCredentials
credsLock sync.Mutex credsLock sync.Mutex
// A map from AZ name to AZ id.
azMap map[string]string
azMapLock sync.Mutex
} }
// apiCredentials represents aws api credentials // apiCredentials represents aws api credentials

View file

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"strings" "strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
) )
@ -14,10 +15,11 @@ func getInstancesLabels(cfg *apiConfig) ([]map[string]string, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
azMap := getAZMap(cfg)
var ms []map[string]string var ms []map[string]string
for _, r := range rs { for _, r := range rs {
for _, inst := range r.InstanceSet.Items { for _, inst := range r.InstanceSet.Items {
ms = inst.appendTargetLabels(ms, r.OwnerID, cfg.port) ms = inst.appendTargetLabels(ms, r.OwnerID, cfg.port, azMap)
} }
} }
return ms, nil return ms, nil
@ -25,11 +27,10 @@ func getInstancesLabels(cfg *apiConfig) ([]map[string]string, error) {
func getReservations(cfg *apiConfig) ([]Reservation, error) { func getReservations(cfg *apiConfig) ([]Reservation, error) {
// See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html // See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html
action := "DescribeInstances"
var rs []Reservation var rs []Reservation
pageToken := "" pageToken := ""
for { for {
data, err := getEC2APIResponse(cfg, action, pageToken) data, err := getEC2APIResponse(cfg, "DescribeInstances", pageToken)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot obtain instances: %w", err) return nil, fmt.Errorf("cannot obtain instances: %w", err)
} }
@ -132,29 +133,87 @@ func parseInstancesResponse(data []byte) (*InstancesResponse, error) {
return &v, nil return &v, nil
} }
func (inst *Instance) appendTargetLabels(ms []map[string]string, ownerID string, port int) []map[string]string { func getAZMap(cfg *apiConfig) map[string]string {
cfg.azMapLock.Lock()
defer cfg.azMapLock.Unlock()
if cfg.azMap != nil {
return cfg.azMap
}
azs, err := getAvailabilityZones(cfg)
cfg.azMap = make(map[string]string, len(azs))
if err != nil {
logger.Warnf("couldn't load availability zones map, so __meta_ec2_availability_zone_id label isn't set: %s", err)
return cfg.azMap
}
for _, az := range azs {
cfg.azMap[az.ZoneName] = az.ZoneID
}
return cfg.azMap
}
func getAvailabilityZones(cfg *apiConfig) ([]AvailabilityZone, error) {
// See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeAvailabilityZones.html
data, err := getEC2APIResponse(cfg, "DescribeAvailabilityZones", "")
if err != nil {
return nil, fmt.Errorf("cannot obtain availability zones: %w", err)
}
azr, err := parseAvailabilityZonesResponse(data)
if err != nil {
return nil, fmt.Errorf("cannot parse availability zones list: %w", err)
}
return azr.AvailabilityZoneInfo.Items, nil
}
// AvailabilityZonesResponse represents the response for https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeAvailabilityZones.html
type AvailabilityZonesResponse struct {
AvailabilityZoneInfo AvailabilityZoneInfo `xml:"availabilityZoneInfo"`
}
// AvailabilityZoneInfo represents availabilityZoneInfo for https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeAvailabilityZones.html
type AvailabilityZoneInfo struct {
Items []AvailabilityZone `xml:"item"`
}
// AvailabilityZone represents availabilityZone for https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_AvailabilityZone.html
type AvailabilityZone struct {
ZoneName string `xml:"zoneName"`
ZoneID string `xml:"zoneId"`
}
func parseAvailabilityZonesResponse(data []byte) (*AvailabilityZonesResponse, error) {
var v AvailabilityZonesResponse
if err := xml.Unmarshal(data, &v); err != nil {
return nil, fmt.Errorf("cannot unmarshal DescribeAvailabilityZonesResponse from %q: %w", data, err)
}
return &v, nil
}
func (inst *Instance) appendTargetLabels(ms []map[string]string, ownerID string, port int, azMap map[string]string) []map[string]string {
if len(inst.PrivateIPAddress) == 0 { if len(inst.PrivateIPAddress) == 0 {
// Cannot scrape instance without private IP address // Cannot scrape instance without private IP address
return ms return ms
} }
addr := discoveryutils.JoinHostPort(inst.PrivateIPAddress, port) addr := discoveryutils.JoinHostPort(inst.PrivateIPAddress, port)
m := map[string]string{ m := map[string]string{
"__address__": addr, "__address__": addr,
"__meta_ec2_architecture": inst.Architecture, "__meta_ec2_architecture": inst.Architecture,
"__meta_ec2_ami": inst.ImageID, "__meta_ec2_ami": inst.ImageID,
"__meta_ec2_availability_zone": inst.Placement.AvailabilityZone, "__meta_ec2_availability_zone": inst.Placement.AvailabilityZone,
"__meta_ec2_instance_id": inst.ID, "__meta_ec2_availability_zone_id": azMap[inst.Placement.AvailabilityZone],
"__meta_ec2_instance_lifecycle": inst.Lifecycle, "__meta_ec2_instance_id": inst.ID,
"__meta_ec2_instance_state": inst.State.Name, "__meta_ec2_instance_lifecycle": inst.Lifecycle,
"__meta_ec2_instance_type": inst.Type, "__meta_ec2_instance_state": inst.State.Name,
"__meta_ec2_owner_id": ownerID, "__meta_ec2_instance_type": inst.Type,
"__meta_ec2_platform": inst.Platform, "__meta_ec2_owner_id": ownerID,
"__meta_ec2_primary_subnet_id": inst.SubnetID, "__meta_ec2_platform": inst.Platform,
"__meta_ec2_private_dns_name": inst.PrivateDNSName, "__meta_ec2_primary_subnet_id": inst.SubnetID,
"__meta_ec2_private_ip": inst.PrivateIPAddress, "__meta_ec2_private_dns_name": inst.PrivateDNSName,
"__meta_ec2_public_dns_name": inst.PublicDNSName, "__meta_ec2_private_ip": inst.PrivateIPAddress,
"__meta_ec2_public_ip": inst.PublicIPAddress, "__meta_ec2_public_dns_name": inst.PublicDNSName,
"__meta_ec2_vpc_id": inst.VPCID, "__meta_ec2_public_ip": inst.PublicIPAddress,
"__meta_ec2_vpc_id": inst.VPCID,
} }
if len(inst.VPCID) > 0 { if len(inst.VPCID) > 0 {
subnets := make([]string, 0, len(inst.NetworkInterfaceSet.Items)) subnets := make([]string, 0, len(inst.NetworkInterfaceSet.Items))

View file

@ -8,6 +8,56 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
) )
func TestDescribeAvailabilityZonesResponse(t *testing.T) {
data := `<DescribeAvailabilityZonesResponse xmlns="http://ec2.amazonaws.com/doc/2016-11-15/">
<requestId>e23c5a54-a29c-43ee-8b55-0c13c26e9e01</requestId>
<availabilityZoneInfo>
<item>
<optInStatus>opt-in-not-required</optInStatus>
<zoneName>us-west-2a</zoneName>
<zoneId>usw2-az1</zoneId>
<zoneState>available</zoneState>
<regionName>us-west-2</regionName>
<messageSet/>
<NetworkBorderGroup>us-west-2-lax-1</NetworkBorderGroup>
</item>
<item>
<groupName>us-west-2</groupName>
<optInStatus>opt-in-not-required</optInStatus>
<zoneName>us-west-2b</zoneName>
<zoneId>usw2-az2</zoneId>
<zoneState>available</zoneState>
<regionName>us-west-2</regionName>
<messageSet/>
<NetworkBorderGroup>us-west-2-lax-1</NetworkBorderGroup>
</item>
</availabilityZoneInfo>
</DescribeAvailabilityZonesResponse>
`
azr, err := parseAvailabilityZonesResponse([]byte(data))
if err != nil {
t.Fatalf("unexpected error when parsing data: %s", err)
}
azrExpected := &AvailabilityZonesResponse{
AvailabilityZoneInfo: AvailabilityZoneInfo{
Items: []AvailabilityZone{
{
ZoneName: "us-west-2a",
ZoneID: "usw2-az1",
},
{
ZoneName: "us-west-2b",
ZoneID: "usw2-az2",
},
},
},
}
if !reflect.DeepEqual(azr, azrExpected) {
t.Fatalf("unexpected DescribeAvailabilityZonesResponse parsed;\ngot\n%+v\nwant\n%+v", azr, azrExpected)
}
}
func TestParseInstancesResponse(t *testing.T) { func TestParseInstancesResponse(t *testing.T) {
data := `<?xml version="1.0" encoding="UTF-8"?> data := `<?xml version="1.0" encoding="UTF-8"?>
<DescribeInstancesResponse xmlns="http://ec2.amazonaws.com/doc/2013-10-15/"> <DescribeInstancesResponse xmlns="http://ec2.amazonaws.com/doc/2013-10-15/">
@ -188,31 +238,34 @@ func TestParseInstancesResponse(t *testing.T) {
ownerID := rs.OwnerID ownerID := rs.OwnerID
port := 423 port := 423
inst := rs.InstanceSet.Items[0] inst := rs.InstanceSet.Items[0]
labelss := inst.appendTargetLabels(nil, ownerID, port) labelss := inst.appendTargetLabels(nil, ownerID, port, map[string]string{
"eu-west-2c": "foobar-zone",
})
var sortedLabelss [][]prompbmarshal.Label var sortedLabelss [][]prompbmarshal.Label
for _, labels := range labelss { for _, labels := range labelss {
sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(labels)) sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(labels))
} }
expectedLabels := [][]prompbmarshal.Label{ expectedLabels := [][]prompbmarshal.Label{
discoveryutils.GetSortedLabels(map[string]string{ discoveryutils.GetSortedLabels(map[string]string{
"__address__": "172.31.11.152:423", "__address__": "172.31.11.152:423",
"__meta_ec2_architecture": "x86_64", "__meta_ec2_architecture": "x86_64",
"__meta_ec2_availability_zone": "eu-west-2c", "__meta_ec2_availability_zone": "eu-west-2c",
"__meta_ec2_ami": "ami-0eb89db7593b5d434", "__meta_ec2_availability_zone_id": "foobar-zone",
"__meta_ec2_instance_id": "i-0e730b692d9c15460", "__meta_ec2_ami": "ami-0eb89db7593b5d434",
"__meta_ec2_instance_lifecycle": "spot", "__meta_ec2_instance_id": "i-0e730b692d9c15460",
"__meta_ec2_instance_state": "running", "__meta_ec2_instance_lifecycle": "spot",
"__meta_ec2_instance_type": "t2.micro", "__meta_ec2_instance_state": "running",
"__meta_ec2_owner_id": "793614593844", "__meta_ec2_instance_type": "t2.micro",
"__meta_ec2_platform": "windows", "__meta_ec2_owner_id": "793614593844",
"__meta_ec2_primary_subnet_id": "subnet-57044c3e", "__meta_ec2_platform": "windows",
"__meta_ec2_private_dns_name": "ip-172-31-11-152.eu-west-2.compute.internal", "__meta_ec2_primary_subnet_id": "subnet-57044c3e",
"__meta_ec2_private_ip": "172.31.11.152", "__meta_ec2_private_dns_name": "ip-172-31-11-152.eu-west-2.compute.internal",
"__meta_ec2_public_dns_name": "ec2-3-8-232-141.eu-west-2.compute.amazonaws.com", "__meta_ec2_private_ip": "172.31.11.152",
"__meta_ec2_public_ip": "3.8.232.141", "__meta_ec2_public_dns_name": "ec2-3-8-232-141.eu-west-2.compute.amazonaws.com",
"__meta_ec2_subnet_id": ",subnet-57044c3e,", "__meta_ec2_public_ip": "3.8.232.141",
"__meta_ec2_tag_foo": "bar", "__meta_ec2_subnet_id": ",subnet-57044c3e,",
"__meta_ec2_vpc_id": "vpc-f1eaad99", "__meta_ec2_tag_foo": "bar",
"__meta_ec2_vpc_id": "vpc-f1eaad99",
}), }),
} }
if !reflect.DeepEqual(sortedLabelss, expectedLabels) { if !reflect.DeepEqual(sortedLabelss, expectedLabels) {