app/vmauth: fix discovering backend IPs when url_prefix contains hostname with srv+ prefix (#6401)

This change fixes the following panic:
```
2024-06-04T11:16:52.899Z        warn    app/vmauth/auth_config.go:353   cannot discover backend SRV records for http://srv+localhost:8080: lookup localhost on 10.100.10.4:53: server misbehaving; use it literally
panic: runtime error: integer divide by zero

goroutine 9 [running]:
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver.handlerWrapper.func1()
        /Users/lhhdz/wd/projects/go/VictoriaMetrics/lib/httpserver/httpserver.go:291 +0x58
panic({0x103115100?, 0x10338d700?})
        /Users/lhhdz/go/pkg/mod/golang.org/toolchain@v0.0.1-go1.22.3.darwin-arm64/src/runtime/panic.go:770 +0x124
main.getLeastLoadedBackendURL({0x0?, 0x22?, 0x1400014757b?}, 0x1400013c120?)
        /Users/lhhdz/wd/projects/go/VictoriaMetrics/app/vmauth/auth_config.go:473 +0x210
main.(*URLPrefix).getBackendURL(0x140000aa080)
        /Users/lhhdz/wd/projects/go/VictoriaMetrics/app/vmauth/auth_config.go:312 +0xb8
```

---------

Co-authored-by: Haley Wang <haley@victoriametrics.com>
This commit is contained in:
LHHDZ 2024-06-12 17:47:44 +08:00 committed by hagen1778
parent 0269c89e03
commit 3a45bbb4e0
No known key found for this signature in database
GPG key ID: 3BF75F3741CA9640
4 changed files with 99 additions and 8 deletions

View file

@ -353,7 +353,7 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
logger.Warnf("cannot discover backend SRV records for %s: %s; use it literally", bu, err)
resolvedAddrs = []string{host}
} else {
resolvedAddrs := make([]string, len(addrs))
resolvedAddrs = make([]string, len(addrs))
for i, addr := range addrs {
resolvedAddrs[i] = fmt.Sprintf("%s:%d", addr.Target, addr.Port)
}
@ -380,6 +380,7 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
var busNew []*backendURL
for _, bu := range up.busOriginal {
host := bu.Hostname()
host = strings.TrimPrefix(host, "srv+")
port := bu.Port()
for _, addr := range hostToAddrs[host] {
buCopy := *bu
@ -1011,6 +1012,8 @@ func (up *URLPrefix) sanitizeAndInitialize() error {
}
}
up.bus.Store(&bus)
up.nextDiscoveryDeadline.Store(0)
up.n.Store(0)
return nil
}

View file

@ -1,11 +1,15 @@
package main
import (
"context"
"fmt"
"net"
"net/url"
"reflect"
"strings"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
)
func TestDropPrefixParts(t *testing.T) {
@ -92,7 +96,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
u = normalizeURL(u)
up, hc := ui.getURLPrefixAndHeaders(u, nil)
if up == nil {
t.Fatalf("cannot determie backend: %s", err)
t.Fatalf("cannot match available backend: %s", err)
}
bu := up.getBackendURL()
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts)
@ -256,6 +260,53 @@ func TestCreateTargetURLSuccess(t *testing.T) {
f(ui, `/api/v1/query?query=up{env="prod"}`, `http://vmselect/1/prometheus/api/v1/query?query=up%7Benv%3D%22prod%22%7D`, "", "", nil, "least_loaded", 0)
f(ui, `/api/v1/query?query=up{foo="bar",env="dev",pod!=""}`, `http://vmselect/0/prometheus/api/v1/query?query=up%7Bfoo%3D%22bar%22%2Cenv%3D%22dev%22%2Cpod%21%3D%22%22%7D`, "", "", nil, "least_loaded", 0)
f(ui, `/api/v1/query?query=up{foo="bar"}`, `http://default-server/api/v1/query?query=up%7Bfoo%3D%22bar%22%7D`, "", "", nil, "least_loaded", 0)
customResolver := &fakeResolver{
Resolver: &net.Resolver{},
lookupSRVResults: map[string][]*net.SRV{
"vmselect": {
{
Target: "10.6.142.50",
Port: 8481,
},
{
Target: "10.6.142.51",
Port: 8481,
},
},
},
lookupIPAddrResults: map[string][]net.IPAddr{
"vminsert": {
{
IP: net.ParseIP("10.6.142.52"),
},
},
},
}
netutil.Resolver = customResolver
// Discover backendURL
allowed := true
ui = &UserInfo{
URLMaps: []URLMap{
{
SrcPaths: getRegexs([]string{"/select/.+"}),
URLPrefix: mustParseURL("http://srv+vmselect"),
},
{
SrcPaths: getRegexs([]string{"/insert/.+"}),
URLPrefix: mustParseURL("http://vminsert:8480"),
},
},
DiscoverBackendIPs: &allowed,
URLPrefix: mustParseURL("http://non-exist-dns-addr"),
}
f(ui, `/select/0/prometheus/api/v1/query?query=up`, "http://10.6.142.51:8481/select/0/prometheus/api/v1/query?query=up", "", "", nil, "least_loaded", 0)
// url_prefix counter will be reset, still go to 10.6.142.51
f(ui, `/select/0/prometheus/api/v1/query?query=up`, "http://10.6.142.51:8481/select/0/prometheus/api/v1/query?query=up", "", "", nil, "least_loaded", 0)
f(ui, `/insert/0/prometheus/api/v1/write`, "http://10.6.142.52:8480/insert/0/prometheus/api/v1/write", "", "", nil, "least_loaded", 0)
// unsuccessful dns resolve
f(ui, `/test`, "http://non-exist-dns-addr/test", "", "", nil, "least_loaded", 0)
}
func TestCreateTargetURLFailure(t *testing.T) {
@ -295,3 +346,29 @@ func headersToString(hs []*Header) string {
}
return strings.Join(a, "\n")
}
type fakeResolver struct {
Resolver *net.Resolver
lookupSRVResults map[string][]*net.SRV
lookupIPAddrResults map[string][]net.IPAddr
}
func (r *fakeResolver) LookupSRV(_ context.Context, _, _, name string) (string, []*net.SRV, error) {
if results, ok := r.lookupSRVResults[name]; ok {
return name, results, nil
}
return name, nil, fmt.Errorf("no srv results found for host: %s", name)
}
func (r *fakeResolver) LookupIPAddr(_ context.Context, host string) ([]net.IPAddr, error) {
if results, ok := r.lookupIPAddrResults[host]; ok {
return results, nil
}
return nil, fmt.Errorf("no results found for host: %s", host)
}
func (r *fakeResolver) LookupMX(_ context.Context, _ string) ([]*net.MX, error) {
return nil, nil
}

View file

@ -90,6 +90,7 @@ Released at 2024-06-07
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): correctly apply `-inmemoryDataFlushInterval` when it's set to minimum supported value 1s.
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): reduce the default value for `-maxLabelValueLen` command-line flag from `16KiB` to `1KiB`. This should prevent from issues like [this one](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6176) when time series with too long labels are ingested into VictoriaMetrics.
* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth/): properly release memory used for metrics during config reload. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6247).
* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth/): fix discovering backend IPs when `url_prefix` contains hostname with srv+ prefix. Thanks to @shichanglin5 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6401).
* BUGFIX: [dashboards](https://grafana.com/orgs/victoriametrics): fix `AnnotationQueryRunner` error in Grafana when executing annotations query against Prometheus backend. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6309) for details.
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): filter deleted label names and values from [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples/#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues) responses when `match[]` filter matches small number of time series. The issue was introduced [v1.81.0](https://docs.victoriametrics.com/changelog_2022/#v1810).
* BUGFIX: [vmalert-tool](https://docs.victoriametrics.com/vmalert-tool/): fix float values template in `input_series`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6391).

View file

@ -9,6 +9,22 @@ import (
"time"
)
type resolver interface {
LookupSRV(ctx context.Context, service, proto, name string) (cname string, addrs []*net.SRV, err error)
LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, error)
LookupMX(ctx context.Context, name string) ([]*net.MX, error)
}
// Resolver is default DNS resolver.
var Resolver resolver
func init() {
Resolver = &net.Resolver{
PreferGo: true,
StrictErrors: true,
}
}
// IsTrivialNetworkError returns true if the err can be ignored during logging.
func IsTrivialNetworkError(err error) bool {
// Suppress trivial network errors, which could occur at remote side.
@ -43,12 +59,6 @@ func DialMaybeSRV(ctx context.Context, network, addr string) (net.Conn, error) {
return Dialer.DialContext(ctx, network, addr)
}
// Resolver is default DNS resolver.
var Resolver = &net.Resolver{
PreferGo: true,
StrictErrors: true,
}
// Dialer is default network dialer.
var Dialer = &net.Dialer{
Timeout: 30 * time.Second,