lib/promscrape/discovery/ec2: add __meta_ec2_availability_zone_id label as Prometheus 2.29 does

This commit is contained in:
Aliaksandr Valialkin 2021-08-03 13:26:28 +03:00
parent 60cfa5f100
commit 336a2aa2e0
4 changed files with 156 additions and 39 deletions

View file

@ -7,6 +7,7 @@ sort: 15
## tip
* FEATURE: add `present_over_time(m[d])` function, which returns 1 if `m` has a least a single sample over the previous duration `d`. This function has been added also to [Prometheus 2.29](https://github.com/prometheus/prometheus/releases/tag/v2.29.0-rc.0).
* FEATURE: vmagent: add `__meta_ec2_availability_zone_id` label, which has been introduced in [Prometheus 2.29](https://github.com/prometheus/prometheus/releases/tag/v2.29.0-rc.0).
* FEATURE: add `-search.maxSamplesPerSeries` command-line flag for limiting the number of raw samples a single query can process per each time series. This option can protect from out of memory errors when a query processes tens of millions of raw samples per series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1067).
* FEATURE: add `-search.maxSamplesPerQuery` command-line flag for limiting the number of raw samples a single query can process across all the time series. This option can protect from heavy queries, which select too big number of raw samples. Thanks to @jiangxinlingdu for [the initial pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1478).
* FEATURE: improve performance for queries that process big number of time series and/or samples on systems with big number of CPU cores.

View file

@ -33,6 +33,10 @@ type apiConfig struct {
// Real credentials used for accessing EC2 API.
creds *apiCredentials
credsLock sync.Mutex
// A map from AZ name to AZ id.
azMap map[string]string
azMapLock sync.Mutex
}
// apiCredentials represents aws api credentials

View file

@ -5,6 +5,7 @@ import (
"fmt"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
)
@ -14,10 +15,11 @@ func getInstancesLabels(cfg *apiConfig) ([]map[string]string, error) {
if err != nil {
return nil, err
}
azMap := getAZMap(cfg)
var ms []map[string]string
for _, r := range rs {
for _, inst := range r.InstanceSet.Items {
ms = inst.appendTargetLabels(ms, r.OwnerID, cfg.port)
ms = inst.appendTargetLabels(ms, r.OwnerID, cfg.port, azMap)
}
}
return ms, nil
@ -25,11 +27,10 @@ func getInstancesLabels(cfg *apiConfig) ([]map[string]string, error) {
func getReservations(cfg *apiConfig) ([]Reservation, error) {
// See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html
action := "DescribeInstances"
var rs []Reservation
pageToken := ""
for {
data, err := getEC2APIResponse(cfg, action, pageToken)
data, err := getEC2APIResponse(cfg, "DescribeInstances", pageToken)
if err != nil {
return nil, fmt.Errorf("cannot obtain instances: %w", err)
}
@ -132,29 +133,87 @@ func parseInstancesResponse(data []byte) (*InstancesResponse, error) {
return &v, nil
}
func (inst *Instance) appendTargetLabels(ms []map[string]string, ownerID string, port int) []map[string]string {
func getAZMap(cfg *apiConfig) map[string]string {
cfg.azMapLock.Lock()
defer cfg.azMapLock.Unlock()
if cfg.azMap != nil {
return cfg.azMap
}
azs, err := getAvailabilityZones(cfg)
cfg.azMap = make(map[string]string, len(azs))
if err != nil {
logger.Warnf("couldn't load availability zones map, so __meta_ec2_availability_zone_id label isn't set: %s", err)
return cfg.azMap
}
for _, az := range azs {
cfg.azMap[az.ZoneName] = az.ZoneID
}
return cfg.azMap
}
func getAvailabilityZones(cfg *apiConfig) ([]AvailabilityZone, error) {
// See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeAvailabilityZones.html
data, err := getEC2APIResponse(cfg, "DescribeAvailabilityZones", "")
if err != nil {
return nil, fmt.Errorf("cannot obtain availability zones: %w", err)
}
azr, err := parseAvailabilityZonesResponse(data)
if err != nil {
return nil, fmt.Errorf("cannot parse availability zones list: %w", err)
}
return azr.AvailabilityZoneInfo.Items, nil
}
// AvailabilityZonesResponse represents the response for https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeAvailabilityZones.html
type AvailabilityZonesResponse struct {
AvailabilityZoneInfo AvailabilityZoneInfo `xml:"availabilityZoneInfo"`
}
// AvailabilityZoneInfo represents availabilityZoneInfo for https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeAvailabilityZones.html
type AvailabilityZoneInfo struct {
Items []AvailabilityZone `xml:"item"`
}
// AvailabilityZone represents availabilityZone for https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_AvailabilityZone.html
type AvailabilityZone struct {
ZoneName string `xml:"zoneName"`
ZoneID string `xml:"zoneId"`
}
func parseAvailabilityZonesResponse(data []byte) (*AvailabilityZonesResponse, error) {
var v AvailabilityZonesResponse
if err := xml.Unmarshal(data, &v); err != nil {
return nil, fmt.Errorf("cannot unmarshal DescribeAvailabilityZonesResponse from %q: %w", data, err)
}
return &v, nil
}
func (inst *Instance) appendTargetLabels(ms []map[string]string, ownerID string, port int, azMap map[string]string) []map[string]string {
if len(inst.PrivateIPAddress) == 0 {
// Cannot scrape instance without private IP address
return ms
}
addr := discoveryutils.JoinHostPort(inst.PrivateIPAddress, port)
m := map[string]string{
"__address__": addr,
"__meta_ec2_architecture": inst.Architecture,
"__meta_ec2_ami": inst.ImageID,
"__meta_ec2_availability_zone": inst.Placement.AvailabilityZone,
"__meta_ec2_instance_id": inst.ID,
"__meta_ec2_instance_lifecycle": inst.Lifecycle,
"__meta_ec2_instance_state": inst.State.Name,
"__meta_ec2_instance_type": inst.Type,
"__meta_ec2_owner_id": ownerID,
"__meta_ec2_platform": inst.Platform,
"__meta_ec2_primary_subnet_id": inst.SubnetID,
"__meta_ec2_private_dns_name": inst.PrivateDNSName,
"__meta_ec2_private_ip": inst.PrivateIPAddress,
"__meta_ec2_public_dns_name": inst.PublicDNSName,
"__meta_ec2_public_ip": inst.PublicIPAddress,
"__meta_ec2_vpc_id": inst.VPCID,
"__address__": addr,
"__meta_ec2_architecture": inst.Architecture,
"__meta_ec2_ami": inst.ImageID,
"__meta_ec2_availability_zone": inst.Placement.AvailabilityZone,
"__meta_ec2_availability_zone_id": azMap[inst.Placement.AvailabilityZone],
"__meta_ec2_instance_id": inst.ID,
"__meta_ec2_instance_lifecycle": inst.Lifecycle,
"__meta_ec2_instance_state": inst.State.Name,
"__meta_ec2_instance_type": inst.Type,
"__meta_ec2_owner_id": ownerID,
"__meta_ec2_platform": inst.Platform,
"__meta_ec2_primary_subnet_id": inst.SubnetID,
"__meta_ec2_private_dns_name": inst.PrivateDNSName,
"__meta_ec2_private_ip": inst.PrivateIPAddress,
"__meta_ec2_public_dns_name": inst.PublicDNSName,
"__meta_ec2_public_ip": inst.PublicIPAddress,
"__meta_ec2_vpc_id": inst.VPCID,
}
if len(inst.VPCID) > 0 {
subnets := make([]string, 0, len(inst.NetworkInterfaceSet.Items))

View file

@ -8,6 +8,56 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
)
func TestDescribeAvailabilityZonesResponse(t *testing.T) {
data := `<DescribeAvailabilityZonesResponse xmlns="http://ec2.amazonaws.com/doc/2016-11-15/">
<requestId>e23c5a54-a29c-43ee-8b55-0c13c26e9e01</requestId>
<availabilityZoneInfo>
<item>
<optInStatus>opt-in-not-required</optInStatus>
<zoneName>us-west-2a</zoneName>
<zoneId>usw2-az1</zoneId>
<zoneState>available</zoneState>
<regionName>us-west-2</regionName>
<messageSet/>
<NetworkBorderGroup>us-west-2-lax-1</NetworkBorderGroup>
</item>
<item>
<groupName>us-west-2</groupName>
<optInStatus>opt-in-not-required</optInStatus>
<zoneName>us-west-2b</zoneName>
<zoneId>usw2-az2</zoneId>
<zoneState>available</zoneState>
<regionName>us-west-2</regionName>
<messageSet/>
<NetworkBorderGroup>us-west-2-lax-1</NetworkBorderGroup>
</item>
</availabilityZoneInfo>
</DescribeAvailabilityZonesResponse>
`
azr, err := parseAvailabilityZonesResponse([]byte(data))
if err != nil {
t.Fatalf("unexpected error when parsing data: %s", err)
}
azrExpected := &AvailabilityZonesResponse{
AvailabilityZoneInfo: AvailabilityZoneInfo{
Items: []AvailabilityZone{
{
ZoneName: "us-west-2a",
ZoneID: "usw2-az1",
},
{
ZoneName: "us-west-2b",
ZoneID: "usw2-az2",
},
},
},
}
if !reflect.DeepEqual(azr, azrExpected) {
t.Fatalf("unexpected DescribeAvailabilityZonesResponse parsed;\ngot\n%+v\nwant\n%+v", azr, azrExpected)
}
}
func TestParseInstancesResponse(t *testing.T) {
data := `<?xml version="1.0" encoding="UTF-8"?>
<DescribeInstancesResponse xmlns="http://ec2.amazonaws.com/doc/2013-10-15/">
@ -188,31 +238,34 @@ func TestParseInstancesResponse(t *testing.T) {
ownerID := rs.OwnerID
port := 423
inst := rs.InstanceSet.Items[0]
labelss := inst.appendTargetLabels(nil, ownerID, port)
labelss := inst.appendTargetLabels(nil, ownerID, port, map[string]string{
"eu-west-2c": "foobar-zone",
})
var sortedLabelss [][]prompbmarshal.Label
for _, labels := range labelss {
sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(labels))
}
expectedLabels := [][]prompbmarshal.Label{
discoveryutils.GetSortedLabels(map[string]string{
"__address__": "172.31.11.152:423",
"__meta_ec2_architecture": "x86_64",
"__meta_ec2_availability_zone": "eu-west-2c",
"__meta_ec2_ami": "ami-0eb89db7593b5d434",
"__meta_ec2_instance_id": "i-0e730b692d9c15460",
"__meta_ec2_instance_lifecycle": "spot",
"__meta_ec2_instance_state": "running",
"__meta_ec2_instance_type": "t2.micro",
"__meta_ec2_owner_id": "793614593844",
"__meta_ec2_platform": "windows",
"__meta_ec2_primary_subnet_id": "subnet-57044c3e",
"__meta_ec2_private_dns_name": "ip-172-31-11-152.eu-west-2.compute.internal",
"__meta_ec2_private_ip": "172.31.11.152",
"__meta_ec2_public_dns_name": "ec2-3-8-232-141.eu-west-2.compute.amazonaws.com",
"__meta_ec2_public_ip": "3.8.232.141",
"__meta_ec2_subnet_id": ",subnet-57044c3e,",
"__meta_ec2_tag_foo": "bar",
"__meta_ec2_vpc_id": "vpc-f1eaad99",
"__address__": "172.31.11.152:423",
"__meta_ec2_architecture": "x86_64",
"__meta_ec2_availability_zone": "eu-west-2c",
"__meta_ec2_availability_zone_id": "foobar-zone",
"__meta_ec2_ami": "ami-0eb89db7593b5d434",
"__meta_ec2_instance_id": "i-0e730b692d9c15460",
"__meta_ec2_instance_lifecycle": "spot",
"__meta_ec2_instance_state": "running",
"__meta_ec2_instance_type": "t2.micro",
"__meta_ec2_owner_id": "793614593844",
"__meta_ec2_platform": "windows",
"__meta_ec2_primary_subnet_id": "subnet-57044c3e",
"__meta_ec2_private_dns_name": "ip-172-31-11-152.eu-west-2.compute.internal",
"__meta_ec2_private_ip": "172.31.11.152",
"__meta_ec2_public_dns_name": "ec2-3-8-232-141.eu-west-2.compute.amazonaws.com",
"__meta_ec2_public_ip": "3.8.232.141",
"__meta_ec2_subnet_id": ",subnet-57044c3e,",
"__meta_ec2_tag_foo": "bar",
"__meta_ec2_vpc_id": "vpc-f1eaad99",
}),
}
if !reflect.DeepEqual(sortedLabelss, expectedLabels) {