From 3a45bbb4e0f21b8d343d41bcd3cb8dd87956ac05 Mon Sep 17 00:00:00 2001 From: LHHDZ Date: Wed, 12 Jun 2024 17:47:44 +0800 Subject: [PATCH] app/vmauth: fix discovering backend IPs when `url_prefix` contains hostname with `srv+` prefix (#6401) This change fixes the following panic: ``` 2024-06-04T11:16:52.899Z warn app/vmauth/auth_config.go:353 cannot discover backend SRV records for http://srv+localhost:8080: lookup localhost on 10.100.10.4:53: server misbehaving; use it literally panic: runtime error: integer divide by zero goroutine 9 [running]: github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver.handlerWrapper.func1() /Users/lhhdz/wd/projects/go/VictoriaMetrics/lib/httpserver/httpserver.go:291 +0x58 panic({0x103115100?, 0x10338d700?}) /Users/lhhdz/go/pkg/mod/golang.org/toolchain@v0.0.1-go1.22.3.darwin-arm64/src/runtime/panic.go:770 +0x124 main.getLeastLoadedBackendURL({0x0?, 0x22?, 0x1400014757b?}, 0x1400013c120?) /Users/lhhdz/wd/projects/go/VictoriaMetrics/app/vmauth/auth_config.go:473 +0x210 main.(*URLPrefix).getBackendURL(0x140000aa080) /Users/lhhdz/wd/projects/go/VictoriaMetrics/app/vmauth/auth_config.go:312 +0xb8 ``` --------- Co-authored-by: Haley Wang --- app/vmauth/auth_config.go | 5 ++- app/vmauth/target_url_test.go | 79 ++++++++++++++++++++++++++++++++++- docs/CHANGELOG.md | 1 + lib/netutil/netutil.go | 22 +++++++--- 4 files changed, 99 insertions(+), 8 deletions(-) diff --git a/app/vmauth/auth_config.go b/app/vmauth/auth_config.go index b4afeee31..c299a6f66 100644 --- a/app/vmauth/auth_config.go +++ b/app/vmauth/auth_config.go @@ -353,7 +353,7 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() { logger.Warnf("cannot discover backend SRV records for %s: %s; use it literally", bu, err) resolvedAddrs = []string{host} } else { - resolvedAddrs := make([]string, len(addrs)) + resolvedAddrs = make([]string, len(addrs)) for i, addr := range addrs { resolvedAddrs[i] = fmt.Sprintf("%s:%d", addr.Target, addr.Port) } @@ -380,6 +380,7 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() { var busNew []*backendURL for _, bu := range up.busOriginal { host := bu.Hostname() + host = strings.TrimPrefix(host, "srv+") port := bu.Port() for _, addr := range hostToAddrs[host] { buCopy := *bu @@ -1011,6 +1012,8 @@ func (up *URLPrefix) sanitizeAndInitialize() error { } } up.bus.Store(&bus) + up.nextDiscoveryDeadline.Store(0) + up.n.Store(0) return nil } diff --git a/app/vmauth/target_url_test.go b/app/vmauth/target_url_test.go index 56871428d..89fb52a7e 100644 --- a/app/vmauth/target_url_test.go +++ b/app/vmauth/target_url_test.go @@ -1,11 +1,15 @@ package main import ( + "context" "fmt" + "net" "net/url" "reflect" "strings" "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil" ) func TestDropPrefixParts(t *testing.T) { @@ -92,7 +96,7 @@ func TestCreateTargetURLSuccess(t *testing.T) { u = normalizeURL(u) up, hc := ui.getURLPrefixAndHeaders(u, nil) if up == nil { - t.Fatalf("cannot determie backend: %s", err) + t.Fatalf("cannot match available backend: %s", err) } bu := up.getBackendURL() target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts) @@ -256,6 +260,53 @@ func TestCreateTargetURLSuccess(t *testing.T) { f(ui, `/api/v1/query?query=up{env="prod"}`, `http://vmselect/1/prometheus/api/v1/query?query=up%7Benv%3D%22prod%22%7D`, "", "", nil, "least_loaded", 0) f(ui, `/api/v1/query?query=up{foo="bar",env="dev",pod!=""}`, `http://vmselect/0/prometheus/api/v1/query?query=up%7Bfoo%3D%22bar%22%2Cenv%3D%22dev%22%2Cpod%21%3D%22%22%7D`, "", "", nil, "least_loaded", 0) f(ui, `/api/v1/query?query=up{foo="bar"}`, `http://default-server/api/v1/query?query=up%7Bfoo%3D%22bar%22%7D`, "", "", nil, "least_loaded", 0) + + customResolver := &fakeResolver{ + Resolver: &net.Resolver{}, + lookupSRVResults: map[string][]*net.SRV{ + "vmselect": { + { + Target: "10.6.142.50", + Port: 8481, + }, + { + Target: "10.6.142.51", + Port: 8481, + }, + }, + }, + lookupIPAddrResults: map[string][]net.IPAddr{ + "vminsert": { + { + IP: net.ParseIP("10.6.142.52"), + }, + }, + }, + } + netutil.Resolver = customResolver + + // Discover backendURL + allowed := true + ui = &UserInfo{ + URLMaps: []URLMap{ + { + SrcPaths: getRegexs([]string{"/select/.+"}), + URLPrefix: mustParseURL("http://srv+vmselect"), + }, + { + SrcPaths: getRegexs([]string{"/insert/.+"}), + URLPrefix: mustParseURL("http://vminsert:8480"), + }, + }, + DiscoverBackendIPs: &allowed, + URLPrefix: mustParseURL("http://non-exist-dns-addr"), + } + f(ui, `/select/0/prometheus/api/v1/query?query=up`, "http://10.6.142.51:8481/select/0/prometheus/api/v1/query?query=up", "", "", nil, "least_loaded", 0) + // url_prefix counter will be reset, still go to 10.6.142.51 + f(ui, `/select/0/prometheus/api/v1/query?query=up`, "http://10.6.142.51:8481/select/0/prometheus/api/v1/query?query=up", "", "", nil, "least_loaded", 0) + f(ui, `/insert/0/prometheus/api/v1/write`, "http://10.6.142.52:8480/insert/0/prometheus/api/v1/write", "", "", nil, "least_loaded", 0) + // unsuccessful dns resolve + f(ui, `/test`, "http://non-exist-dns-addr/test", "", "", nil, "least_loaded", 0) } func TestCreateTargetURLFailure(t *testing.T) { @@ -295,3 +346,29 @@ func headersToString(hs []*Header) string { } return strings.Join(a, "\n") } + +type fakeResolver struct { + Resolver *net.Resolver + lookupSRVResults map[string][]*net.SRV + lookupIPAddrResults map[string][]net.IPAddr +} + +func (r *fakeResolver) LookupSRV(_ context.Context, _, _, name string) (string, []*net.SRV, error) { + if results, ok := r.lookupSRVResults[name]; ok { + return name, results, nil + } + + return name, nil, fmt.Errorf("no srv results found for host: %s", name) +} + +func (r *fakeResolver) LookupIPAddr(_ context.Context, host string) ([]net.IPAddr, error) { + if results, ok := r.lookupIPAddrResults[host]; ok { + return results, nil + } + + return nil, fmt.Errorf("no results found for host: %s", host) +} + +func (r *fakeResolver) LookupMX(_ context.Context, _ string) ([]*net.MX, error) { + return nil, nil +} diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 28d4f2095..bb8bdf0e2 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -90,6 +90,7 @@ Released at 2024-06-07 * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): correctly apply `-inmemoryDataFlushInterval` when it's set to minimum supported value 1s. * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): reduce the default value for `-maxLabelValueLen` command-line flag from `16KiB` to `1KiB`. This should prevent from issues like [this one](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6176) when time series with too long labels are ingested into VictoriaMetrics. * BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth/): properly release memory used for metrics during config reload. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6247). +* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth/): fix discovering backend IPs when `url_prefix` contains hostname with srv+ prefix. Thanks to @shichanglin5 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6401). * BUGFIX: [dashboards](https://grafana.com/orgs/victoriametrics): fix `AnnotationQueryRunner` error in Grafana when executing annotations query against Prometheus backend. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6309) for details. * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): filter deleted label names and values from [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples/#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues) responses when `match[]` filter matches small number of time series. The issue was introduced [v1.81.0](https://docs.victoriametrics.com/changelog_2022/#v1810). * BUGFIX: [vmalert-tool](https://docs.victoriametrics.com/vmalert-tool/): fix float values template in `input_series`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6391). diff --git a/lib/netutil/netutil.go b/lib/netutil/netutil.go index 39ffc609c..150b27723 100644 --- a/lib/netutil/netutil.go +++ b/lib/netutil/netutil.go @@ -9,6 +9,22 @@ import ( "time" ) +type resolver interface { + LookupSRV(ctx context.Context, service, proto, name string) (cname string, addrs []*net.SRV, err error) + LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, error) + LookupMX(ctx context.Context, name string) ([]*net.MX, error) +} + +// Resolver is default DNS resolver. +var Resolver resolver + +func init() { + Resolver = &net.Resolver{ + PreferGo: true, + StrictErrors: true, + } +} + // IsTrivialNetworkError returns true if the err can be ignored during logging. func IsTrivialNetworkError(err error) bool { // Suppress trivial network errors, which could occur at remote side. @@ -43,12 +59,6 @@ func DialMaybeSRV(ctx context.Context, network, addr string) (net.Conn, error) { return Dialer.DialContext(ctx, network, addr) } -// Resolver is default DNS resolver. -var Resolver = &net.Resolver{ - PreferGo: true, - StrictErrors: true, -} - // Dialer is default network dialer. var Dialer = &net.Dialer{ Timeout: 30 * time.Second,