mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
vmalert-tool: implement unittest (#4789)
1. split package rule under /app/vmalert, expose needed objects 2. add vmalert-tool with unittest subcmd https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2945
This commit is contained in:
parent
98a5007d32
commit
dc28196237
74 changed files with 3997 additions and 1683 deletions
62
Makefile
62
Makefile
|
@ -28,7 +28,8 @@ all: \
|
||||||
vmauth-prod \
|
vmauth-prod \
|
||||||
vmbackup-prod \
|
vmbackup-prod \
|
||||||
vmrestore-prod \
|
vmrestore-prod \
|
||||||
vmctl-prod
|
vmctl-prod \
|
||||||
|
vmalert-tool-prod
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf bin/*
|
rm -rf bin/*
|
||||||
|
@ -40,7 +41,8 @@ publish: package-base \
|
||||||
publish-vmauth \
|
publish-vmauth \
|
||||||
publish-vmbackup \
|
publish-vmbackup \
|
||||||
publish-vmrestore \
|
publish-vmrestore \
|
||||||
publish-vmctl
|
publish-vmctl \
|
||||||
|
publish-vmalert-tool
|
||||||
|
|
||||||
package: \
|
package: \
|
||||||
package-victoria-metrics \
|
package-victoria-metrics \
|
||||||
|
@ -50,7 +52,8 @@ package: \
|
||||||
package-vmauth \
|
package-vmauth \
|
||||||
package-vmbackup \
|
package-vmbackup \
|
||||||
package-vmrestore \
|
package-vmrestore \
|
||||||
package-vmctl
|
package-vmctl \
|
||||||
|
package-vmalert-tool
|
||||||
|
|
||||||
vmutils: \
|
vmutils: \
|
||||||
vmagent \
|
vmagent \
|
||||||
|
@ -58,7 +61,8 @@ vmutils: \
|
||||||
vmauth \
|
vmauth \
|
||||||
vmbackup \
|
vmbackup \
|
||||||
vmrestore \
|
vmrestore \
|
||||||
vmctl
|
vmctl \
|
||||||
|
vmalert-tool
|
||||||
|
|
||||||
vmutils-pure: \
|
vmutils-pure: \
|
||||||
vmagent-pure \
|
vmagent-pure \
|
||||||
|
@ -66,7 +70,8 @@ vmutils-pure: \
|
||||||
vmauth-pure \
|
vmauth-pure \
|
||||||
vmbackup-pure \
|
vmbackup-pure \
|
||||||
vmrestore-pure \
|
vmrestore-pure \
|
||||||
vmctl-pure
|
vmctl-pure \
|
||||||
|
vmalert-tool-pure
|
||||||
|
|
||||||
vmutils-linux-amd64: \
|
vmutils-linux-amd64: \
|
||||||
vmagent-linux-amd64 \
|
vmagent-linux-amd64 \
|
||||||
|
@ -74,7 +79,8 @@ vmutils-linux-amd64: \
|
||||||
vmauth-linux-amd64 \
|
vmauth-linux-amd64 \
|
||||||
vmbackup-linux-amd64 \
|
vmbackup-linux-amd64 \
|
||||||
vmrestore-linux-amd64 \
|
vmrestore-linux-amd64 \
|
||||||
vmctl-linux-amd64
|
vmctl-linux-amd64 \
|
||||||
|
vmalert-tool-linux-amd64
|
||||||
|
|
||||||
vmutils-linux-arm64: \
|
vmutils-linux-arm64: \
|
||||||
vmagent-linux-arm64 \
|
vmagent-linux-arm64 \
|
||||||
|
@ -82,7 +88,8 @@ vmutils-linux-arm64: \
|
||||||
vmauth-linux-arm64 \
|
vmauth-linux-arm64 \
|
||||||
vmbackup-linux-arm64 \
|
vmbackup-linux-arm64 \
|
||||||
vmrestore-linux-arm64 \
|
vmrestore-linux-arm64 \
|
||||||
vmctl-linux-arm64
|
vmctl-linux-arm64 \
|
||||||
|
vmalert-tool-linux-arm64
|
||||||
|
|
||||||
vmutils-linux-arm: \
|
vmutils-linux-arm: \
|
||||||
vmagent-linux-arm \
|
vmagent-linux-arm \
|
||||||
|
@ -90,7 +97,8 @@ vmutils-linux-arm: \
|
||||||
vmauth-linux-arm \
|
vmauth-linux-arm \
|
||||||
vmbackup-linux-arm \
|
vmbackup-linux-arm \
|
||||||
vmrestore-linux-arm \
|
vmrestore-linux-arm \
|
||||||
vmctl-linux-arm
|
vmctl-linux-arm \
|
||||||
|
vmalert-tool-linux-arm
|
||||||
|
|
||||||
vmutils-linux-386: \
|
vmutils-linux-386: \
|
||||||
vmagent-linux-386 \
|
vmagent-linux-386 \
|
||||||
|
@ -98,7 +106,8 @@ vmutils-linux-386: \
|
||||||
vmauth-linux-386 \
|
vmauth-linux-386 \
|
||||||
vmbackup-linux-386 \
|
vmbackup-linux-386 \
|
||||||
vmrestore-linux-386 \
|
vmrestore-linux-386 \
|
||||||
vmctl-linux-386
|
vmctl-linux-386 \
|
||||||
|
vmalert-tool-linux-386
|
||||||
|
|
||||||
vmutils-linux-ppc64le: \
|
vmutils-linux-ppc64le: \
|
||||||
vmagent-linux-ppc64le \
|
vmagent-linux-ppc64le \
|
||||||
|
@ -106,7 +115,8 @@ vmutils-linux-ppc64le: \
|
||||||
vmauth-linux-ppc64le \
|
vmauth-linux-ppc64le \
|
||||||
vmbackup-linux-ppc64le \
|
vmbackup-linux-ppc64le \
|
||||||
vmrestore-linux-ppc64le \
|
vmrestore-linux-ppc64le \
|
||||||
vmctl-linux-ppc64le
|
vmctl-linux-ppc64le \
|
||||||
|
vmalert-tool-linux-ppc64le
|
||||||
|
|
||||||
vmutils-darwin-amd64: \
|
vmutils-darwin-amd64: \
|
||||||
vmagent-darwin-amd64 \
|
vmagent-darwin-amd64 \
|
||||||
|
@ -114,7 +124,8 @@ vmutils-darwin-amd64: \
|
||||||
vmauth-darwin-amd64 \
|
vmauth-darwin-amd64 \
|
||||||
vmbackup-darwin-amd64 \
|
vmbackup-darwin-amd64 \
|
||||||
vmrestore-darwin-amd64 \
|
vmrestore-darwin-amd64 \
|
||||||
vmctl-darwin-amd64
|
vmctl-darwin-amd64 \
|
||||||
|
vmalert-tool-darwin-amd64
|
||||||
|
|
||||||
vmutils-darwin-arm64: \
|
vmutils-darwin-arm64: \
|
||||||
vmagent-darwin-arm64 \
|
vmagent-darwin-arm64 \
|
||||||
|
@ -122,7 +133,8 @@ vmutils-darwin-arm64: \
|
||||||
vmauth-darwin-arm64 \
|
vmauth-darwin-arm64 \
|
||||||
vmbackup-darwin-arm64 \
|
vmbackup-darwin-arm64 \
|
||||||
vmrestore-darwin-arm64 \
|
vmrestore-darwin-arm64 \
|
||||||
vmctl-darwin-arm64
|
vmctl-darwin-arm64 \
|
||||||
|
vmalert-tool-darwin-arm64
|
||||||
|
|
||||||
vmutils-freebsd-amd64: \
|
vmutils-freebsd-amd64: \
|
||||||
vmagent-freebsd-amd64 \
|
vmagent-freebsd-amd64 \
|
||||||
|
@ -130,7 +142,8 @@ vmutils-freebsd-amd64: \
|
||||||
vmauth-freebsd-amd64 \
|
vmauth-freebsd-amd64 \
|
||||||
vmbackup-freebsd-amd64 \
|
vmbackup-freebsd-amd64 \
|
||||||
vmrestore-freebsd-amd64 \
|
vmrestore-freebsd-amd64 \
|
||||||
vmctl-freebsd-amd64
|
vmctl-freebsd-amd64 \
|
||||||
|
vmalert-tool-freebsd-amd64
|
||||||
|
|
||||||
vmutils-openbsd-amd64: \
|
vmutils-openbsd-amd64: \
|
||||||
vmagent-openbsd-amd64 \
|
vmagent-openbsd-amd64 \
|
||||||
|
@ -138,7 +151,8 @@ vmutils-openbsd-amd64: \
|
||||||
vmauth-openbsd-amd64 \
|
vmauth-openbsd-amd64 \
|
||||||
vmbackup-openbsd-amd64 \
|
vmbackup-openbsd-amd64 \
|
||||||
vmrestore-openbsd-amd64 \
|
vmrestore-openbsd-amd64 \
|
||||||
vmctl-openbsd-amd64
|
vmctl-openbsd-amd64 \
|
||||||
|
vmalert-tool-openbsd-amd64
|
||||||
|
|
||||||
vmutils-windows-amd64: \
|
vmutils-windows-amd64: \
|
||||||
vmagent-windows-amd64 \
|
vmagent-windows-amd64 \
|
||||||
|
@ -146,7 +160,8 @@ vmutils-windows-amd64: \
|
||||||
vmauth-windows-amd64 \
|
vmauth-windows-amd64 \
|
||||||
vmbackup-windows-amd64 \
|
vmbackup-windows-amd64 \
|
||||||
vmrestore-windows-amd64 \
|
vmrestore-windows-amd64 \
|
||||||
vmctl-windows-amd64
|
vmctl-windows-amd64 \
|
||||||
|
vmalert-tool-windows-amd64
|
||||||
|
|
||||||
victoria-metrics-crossbuild: \
|
victoria-metrics-crossbuild: \
|
||||||
victoria-metrics-linux-386 \
|
victoria-metrics-linux-386 \
|
||||||
|
@ -342,7 +357,8 @@ release-vmutils-goos-goarch: \
|
||||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||||
vmctl-$(GOOS)-$(GOARCH)-prod
|
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||||
|
vmalert-tool-$(GOOS)-$(GOARCH)-prod
|
||||||
cd bin && \
|
cd bin && \
|
||||||
tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||||
|
@ -351,6 +367,7 @@ release-vmutils-goos-goarch: \
|
||||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||||
vmctl-$(GOOS)-$(GOARCH)-prod \
|
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||||
|
vmalert-tool-$(GOOS)-$(GOARCH)-prod
|
||||||
&& sha256sum vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
&& sha256sum vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
|
||||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||||
vmalert-$(GOOS)-$(GOARCH)-prod \
|
vmalert-$(GOOS)-$(GOARCH)-prod \
|
||||||
|
@ -358,6 +375,7 @@ release-vmutils-goos-goarch: \
|
||||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||||
vmctl-$(GOOS)-$(GOARCH)-prod \
|
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||||
|
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
|
||||||
| sed s/-$(GOOS)-$(GOARCH)-prod/-prod/ > vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
| sed s/-$(GOOS)-$(GOARCH)-prod/-prod/ > vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||||
cd bin && rm -rf \
|
cd bin && rm -rf \
|
||||||
vmagent-$(GOOS)-$(GOARCH)-prod \
|
vmagent-$(GOOS)-$(GOARCH)-prod \
|
||||||
|
@ -365,7 +383,8 @@ release-vmutils-goos-goarch: \
|
||||||
vmauth-$(GOOS)-$(GOARCH)-prod \
|
vmauth-$(GOOS)-$(GOARCH)-prod \
|
||||||
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
vmbackup-$(GOOS)-$(GOARCH)-prod \
|
||||||
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
vmrestore-$(GOOS)-$(GOARCH)-prod \
|
||||||
vmctl-$(GOOS)-$(GOARCH)-prod
|
vmctl-$(GOOS)-$(GOARCH)-prod \
|
||||||
|
vmalert-tool-$(GOOS)-$(GOARCH)-prod
|
||||||
|
|
||||||
release-vmutils-windows-goarch: \
|
release-vmutils-windows-goarch: \
|
||||||
vmagent-windows-$(GOARCH)-prod \
|
vmagent-windows-$(GOARCH)-prod \
|
||||||
|
@ -373,7 +392,8 @@ release-vmutils-windows-goarch: \
|
||||||
vmauth-windows-$(GOARCH)-prod \
|
vmauth-windows-$(GOARCH)-prod \
|
||||||
vmbackup-windows-$(GOARCH)-prod \
|
vmbackup-windows-$(GOARCH)-prod \
|
||||||
vmrestore-windows-$(GOARCH)-prod \
|
vmrestore-windows-$(GOARCH)-prod \
|
||||||
vmctl-windows-$(GOARCH)-prod
|
vmctl-windows-$(GOARCH)-prod \
|
||||||
|
vmalert-tool-windows-$(GOARCH)-prod
|
||||||
cd bin && \
|
cd bin && \
|
||||||
zip vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
zip vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||||
vmagent-windows-$(GOARCH)-prod.exe \
|
vmagent-windows-$(GOARCH)-prod.exe \
|
||||||
|
@ -382,6 +402,7 @@ release-vmutils-windows-goarch: \
|
||||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||||
vmctl-windows-$(GOARCH)-prod.exe \
|
vmctl-windows-$(GOARCH)-prod.exe \
|
||||||
|
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||||
&& sha256sum vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
&& sha256sum vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
|
||||||
vmagent-windows-$(GOARCH)-prod.exe \
|
vmagent-windows-$(GOARCH)-prod.exe \
|
||||||
vmalert-windows-$(GOARCH)-prod.exe \
|
vmalert-windows-$(GOARCH)-prod.exe \
|
||||||
|
@ -389,6 +410,7 @@ release-vmutils-windows-goarch: \
|
||||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||||
vmctl-windows-$(GOARCH)-prod.exe \
|
vmctl-windows-$(GOARCH)-prod.exe \
|
||||||
|
vmalert-tool-windows-$(GOARCH)-prod.exe \
|
||||||
> vmutils-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
> vmutils-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
|
||||||
cd bin && rm -rf \
|
cd bin && rm -rf \
|
||||||
vmagent-windows-$(GOARCH)-prod.exe \
|
vmagent-windows-$(GOARCH)-prod.exe \
|
||||||
|
@ -396,7 +418,8 @@ release-vmutils-windows-goarch: \
|
||||||
vmauth-windows-$(GOARCH)-prod.exe \
|
vmauth-windows-$(GOARCH)-prod.exe \
|
||||||
vmbackup-windows-$(GOARCH)-prod.exe \
|
vmbackup-windows-$(GOARCH)-prod.exe \
|
||||||
vmrestore-windows-$(GOARCH)-prod.exe \
|
vmrestore-windows-$(GOARCH)-prod.exe \
|
||||||
vmctl-windows-$(GOARCH)-prod.exe
|
vmctl-windows-$(GOARCH)-prod.exe \
|
||||||
|
vmalert-tool-windows-$(GOARCH)-prod.exe
|
||||||
|
|
||||||
pprof-cpu:
|
pprof-cpu:
|
||||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
||||||
|
@ -514,3 +537,4 @@ docs-sync:
|
||||||
SRC=app/vmctl/README.md DST=docs/vmctl.md OLD_URL='/vmctl.html' ORDER=8 TITLE=vmctl $(MAKE) copy-docs
|
SRC=app/vmctl/README.md DST=docs/vmctl.md OLD_URL='/vmctl.html' ORDER=8 TITLE=vmctl $(MAKE) copy-docs
|
||||||
SRC=app/vmgateway/README.md DST=docs/vmgateway.md OLD_URL='/vmgateway.html' ORDER=9 TITLE=vmgateway $(MAKE) copy-docs
|
SRC=app/vmgateway/README.md DST=docs/vmgateway.md OLD_URL='/vmgateway.html' ORDER=9 TITLE=vmgateway $(MAKE) copy-docs
|
||||||
SRC=app/vmbackupmanager/README.md DST=docs/vmbackupmanager.md OLD_URL='/vmbackupmanager.html' ORDER=10 TITLE=vmbackupmanager $(MAKE) copy-docs
|
SRC=app/vmbackupmanager/README.md DST=docs/vmbackupmanager.md OLD_URL='/vmbackupmanager.html' ORDER=10 TITLE=vmbackupmanager $(MAKE) copy-docs
|
||||||
|
SRC=app/vmalert-tool/README.md DST=docs/vmalert-tool.md OLD_URL='' ORDER=12 TITLE=vmalert-tool $(MAKE) copy-docs
|
||||||
|
|
103
app/vmalert-tool/Makefile
Normal file
103
app/vmalert-tool/Makefile
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
# All these commands must run from repository root.
|
||||||
|
|
||||||
|
vmalert-tool:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-local
|
||||||
|
|
||||||
|
vmalert-tool-race:
|
||||||
|
APP_NAME=vmalert-tool RACE=-race $(MAKE) app-local
|
||||||
|
|
||||||
|
vmalert-tool-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker
|
||||||
|
|
||||||
|
vmalert-tool-pure-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-pure
|
||||||
|
|
||||||
|
vmalert-tool-linux-amd64-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-amd64
|
||||||
|
|
||||||
|
vmalert-tool-linux-arm-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-arm
|
||||||
|
|
||||||
|
vmalert-tool-linux-arm64-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-arm64
|
||||||
|
|
||||||
|
vmalert-tool-linux-ppc64le-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-ppc64le
|
||||||
|
|
||||||
|
vmalert-tool-linux-386-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
|
||||||
|
|
||||||
|
vmalert-tool-darwin-amd64-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64
|
||||||
|
|
||||||
|
vmalert-tool-darwin-arm64-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-arm64
|
||||||
|
|
||||||
|
vmalert-tool-freebsd-amd64-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-freebsd-amd64
|
||||||
|
|
||||||
|
vmalert-tool-openbsd-amd64-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-openbsd-amd64
|
||||||
|
|
||||||
|
vmalert-tool-windows-amd64-prod:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-windows-amd64
|
||||||
|
|
||||||
|
package-vmalert-tool:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) package-via-docker
|
||||||
|
|
||||||
|
package-vmalert-tool-pure:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) package-via-docker-pure
|
||||||
|
|
||||||
|
package-vmalert-tool-amd64:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) package-via-docker-amd64
|
||||||
|
|
||||||
|
package-vmalert-tool-arm:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) package-via-docker-arm
|
||||||
|
|
||||||
|
package-vmalert-tool-arm64:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) package-via-docker-arm64
|
||||||
|
|
||||||
|
package-vmalert-tool-ppc64le:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) package-via-docker-ppc64le
|
||||||
|
|
||||||
|
package-vmalert-tool-386:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) package-via-docker-386
|
||||||
|
|
||||||
|
publish-vmalert-tool:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) publish-via-docker
|
||||||
|
|
||||||
|
vmalert-tool-linux-amd64:
|
||||||
|
APP_NAME=vmalert-tool CGO_ENABLED=1 GOOS=linux GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||||
|
|
||||||
|
vmalert-tool-linux-arm:
|
||||||
|
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=arm $(MAKE) app-local-goos-goarch
|
||||||
|
|
||||||
|
vmalert-tool-linux-arm64:
|
||||||
|
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=arm64 $(MAKE) app-local-goos-goarch
|
||||||
|
|
||||||
|
vmalert-tool-linux-ppc64le:
|
||||||
|
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
|
||||||
|
|
||||||
|
vmalert-tool-linux-s390x:
|
||||||
|
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
|
||||||
|
|
||||||
|
vmalert-tool-linux-386:
|
||||||
|
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
|
||||||
|
|
||||||
|
vmalert-tool-darwin-amd64:
|
||||||
|
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||||
|
|
||||||
|
vmalert-tool-darwin-arm64:
|
||||||
|
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 $(MAKE) app-local-goos-goarch
|
||||||
|
|
||||||
|
vmalert-tool-freebsd-amd64:
|
||||||
|
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||||
|
|
||||||
|
vmalert-tool-openbsd-amd64:
|
||||||
|
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=openbsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||||
|
|
||||||
|
vmalert-tool-windows-amd64:
|
||||||
|
GOARCH=amd64 APP_NAME=vmalert-tool $(MAKE) app-local-windows-goarch
|
||||||
|
|
||||||
|
vmalert-tool-pure:
|
||||||
|
APP_NAME=vmalert-tool $(MAKE) app-local-pure
|
244
app/vmalert-tool/README.md
Normal file
244
app/vmalert-tool/README.md
Normal file
|
@ -0,0 +1,244 @@
|
||||||
|
|
||||||
|
# vmalert-tool
|
||||||
|
|
||||||
|
VMAlert command-line tool
|
||||||
|
|
||||||
|
## Unit testing for rules
|
||||||
|
|
||||||
|
You can use `vmalert-tool` to run unit tests for alerting and recording rules.
|
||||||
|
It will perform the following actions:
|
||||||
|
* sets up an isolated VictoriaMetrics instance;
|
||||||
|
* simulates the periodic ingestion of time series;
|
||||||
|
* queries the ingested data for recording and alerting rules evaluation like [vmalert](https://docs.victoriametrics.com/vmalert.html);
|
||||||
|
* checks whether the firing alerts or resulting recording rules match the expected results.
|
||||||
|
|
||||||
|
See how to run vmalert-tool for unit test below:
|
||||||
|
```
|
||||||
|
# Run vmalert-tool with one or multiple test files via --files cmd-line flag
|
||||||
|
./vmalert-tool unittest --files test1.yaml --files test2.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
vmalert-tool unittest is compatible with [Prometheus config format for tests](https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/#test-file-format)
|
||||||
|
except `promql_expr_test` field. Use `metricsql_expr_test` field name instead. The name is different because vmalert-tool
|
||||||
|
validates and executes [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html) expressions,
|
||||||
|
which aren't always backward compatible with [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
|
||||||
|
|
||||||
|
### Test file format
|
||||||
|
|
||||||
|
The configuration format for files specified in `--files` cmd-line flag is the following:
|
||||||
|
```
|
||||||
|
# Path to the files or http url containing [rule groups](https://docs.victoriametrics.com/vmalert.html#groups) configuration.
|
||||||
|
# Enterprise version of vmalert-tool supports S3 and GCS paths to rules.
|
||||||
|
rule_files:
|
||||||
|
[ - <string> ]
|
||||||
|
|
||||||
|
# The evaluation interval for rules specified in `rule_files`
|
||||||
|
[ evaluation_interval: <duration> | default = 1m ]
|
||||||
|
|
||||||
|
# Groups listed below will be evaluated by order.
|
||||||
|
# Not All the groups need not be mentioned, if not, they will be evaluated by define order in rule_files.
|
||||||
|
group_eval_order:
|
||||||
|
[ - <string> ]
|
||||||
|
|
||||||
|
# The list of unit test files to be checked during evaluation.
|
||||||
|
tests:
|
||||||
|
[ - <test_group> ]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<test_group>`
|
||||||
|
|
||||||
|
```
|
||||||
|
# Interval between samples for input series
|
||||||
|
interval: <duration>
|
||||||
|
# Time series to persist into the database according to configured <interval> before running tests.
|
||||||
|
input_series:
|
||||||
|
[ - <series> ]
|
||||||
|
|
||||||
|
# Name of the test group, optional
|
||||||
|
[ name: <string> ]
|
||||||
|
|
||||||
|
# Unit tests for alerting rules
|
||||||
|
alert_rule_test:
|
||||||
|
[ - <alert_test_case> ]
|
||||||
|
|
||||||
|
# Unit tests for Metricsql expressions.
|
||||||
|
metricsql_expr_test:
|
||||||
|
[ - <metricsql_expr_test> ]
|
||||||
|
|
||||||
|
# External labels accessible for templating.
|
||||||
|
external_labels:
|
||||||
|
[ <labelname>: <string> ... ]
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<series>`
|
||||||
|
|
||||||
|
```
|
||||||
|
# series in the following format '<metric name>{<label name>=<label value>, ...}'
|
||||||
|
# Examples:
|
||||||
|
# series_name{label1="value1", label2="value2"}
|
||||||
|
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||||
|
series: <string>
|
||||||
|
|
||||||
|
# values support several special equations:
|
||||||
|
# 'a+bxc' becomes 'a a+b a+(2*b) a+(3*b) … a+(c*b)'
|
||||||
|
# Read this as series starts at a, then c further samples incrementing by b.
|
||||||
|
# 'a-bxc' becomes 'a a-b a-(2*b) a-(3*b) … a-(c*b)'
|
||||||
|
# Read this as series starts at a, then c further samples decrementing by b (or incrementing by negative b).
|
||||||
|
# '_' represents a missing sample from scrape
|
||||||
|
# 'stale' indicates a stale sample
|
||||||
|
# Examples:
|
||||||
|
# 1. '-2+4x3' becomes '-2 2 6 10' - series starts at -2, then 3 further samples incrementing by 4.
|
||||||
|
# 2. ' 1-2x4' becomes '1 -1 -3 -5 -7' - series starts at 1, then 4 further samples decrementing by 2.
|
||||||
|
# 3. ' 1x4' becomes '1 1 1 1 1' - shorthand for '1+0x4', series starts at 1, then 4 further samples incrementing by 0.
|
||||||
|
# 4. ' 1 _x3 stale' becomes '1 _ _ _ stale' - the missing sample cannot increment, so 3 missing samples are produced by the '_x3' expression.
|
||||||
|
values: <string>
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<alert_test_case>`
|
||||||
|
|
||||||
|
vmalert by default adds `alertgroup` and `alertname` to the generated alerts and time series.
|
||||||
|
So you will need to specify both `groupname` and `alertname` under a single `<alert_test_case>`,
|
||||||
|
but no need to add them under `exp_alerts`.
|
||||||
|
You can also pass `--disableAlertgroupLabel` to skip `alertgroup` check.
|
||||||
|
|
||||||
|
```
|
||||||
|
# The time elapsed from time=0s when this alerting rule should be checked.
|
||||||
|
# Means this rule should be firing at this point, or shouldn't be firing if 'exp_alerts' is empty.
|
||||||
|
eval_time: <duration>
|
||||||
|
|
||||||
|
# Name of the group name to be tested.
|
||||||
|
groupname: <string>
|
||||||
|
|
||||||
|
# Name of the alert to be tested.
|
||||||
|
alertname: <string>
|
||||||
|
|
||||||
|
# List of the expected alerts that are firing under the given alertname at
|
||||||
|
# the given evaluation time. If you want to test if an alerting rule should
|
||||||
|
# not be firing, then you can mention only the fields above and leave 'exp_alerts' empty.
|
||||||
|
exp_alerts:
|
||||||
|
[ - <alert> ]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<alert>`
|
||||||
|
|
||||||
|
```
|
||||||
|
# These are the expanded labels and annotations of the expected alert.
|
||||||
|
# Note: labels also include the labels of the sample associated with the alert
|
||||||
|
exp_labels:
|
||||||
|
[ <labelname>: <string> ]
|
||||||
|
exp_annotations:
|
||||||
|
[ <labelname>: <string> ]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<metricsql_expr_test>`
|
||||||
|
|
||||||
|
```
|
||||||
|
# Expression to evaluate
|
||||||
|
expr: <string>
|
||||||
|
|
||||||
|
# The time elapsed from time=0s when this expression be evaluated.
|
||||||
|
eval_time: <duration>
|
||||||
|
|
||||||
|
# Expected samples at the given evaluation time.
|
||||||
|
exp_samples:
|
||||||
|
[ - <sample> ]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<sample>`
|
||||||
|
|
||||||
|
```
|
||||||
|
# Labels of the sample in usual series notation '<metric name>{<label name>=<label value>, ...}'
|
||||||
|
# Examples:
|
||||||
|
# series_name{label1="value1", label2="value2"}
|
||||||
|
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||||
|
labels: <string>
|
||||||
|
|
||||||
|
# The expected value of the Metricsql expression.
|
||||||
|
value: <number>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example
|
||||||
|
|
||||||
|
This is an example input file for unit testing which will pass.
|
||||||
|
`test.yaml` is the test file which follows the syntax above and `alerts.yaml` contains the alerting rules.
|
||||||
|
|
||||||
|
With `rules.yaml` in the same directory, run `./vmalert-tool unittest --files=./unittest/testdata/test.yaml`.
|
||||||
|
|
||||||
|
#### `test.yaml`
|
||||||
|
|
||||||
|
```
|
||||||
|
rule_files:
|
||||||
|
- rules.yaml
|
||||||
|
|
||||||
|
evaluation_interval: 1m
|
||||||
|
|
||||||
|
tests:
|
||||||
|
- interval: 1m
|
||||||
|
input_series:
|
||||||
|
- series: 'up{job="prometheus", instance="localhost:9090"}'
|
||||||
|
values: "0+0x1440"
|
||||||
|
|
||||||
|
metricsql_expr_test:
|
||||||
|
- expr: suquery_interval_test
|
||||||
|
eval_time: 4m
|
||||||
|
exp_samples:
|
||||||
|
- labels: '{__name__="suquery_interval_test", datacenter="dc-123", instance="localhost:9090", job="prometheus"}'
|
||||||
|
value: 1
|
||||||
|
|
||||||
|
alert_rule_test:
|
||||||
|
- eval_time: 2h
|
||||||
|
groupname: group1
|
||||||
|
alertname: InstanceDown
|
||||||
|
exp_alerts:
|
||||||
|
- exp_labels:
|
||||||
|
job: prometheus
|
||||||
|
severity: page
|
||||||
|
instance: localhost:9090
|
||||||
|
datacenter: dc-123
|
||||||
|
exp_annotations:
|
||||||
|
summary: "Instance localhost:9090 down"
|
||||||
|
description: "localhost:9090 of job prometheus has been down for more than 5 minutes."
|
||||||
|
|
||||||
|
- eval_time: 0
|
||||||
|
groupname: group1
|
||||||
|
alertname: AlwaysFiring
|
||||||
|
exp_alerts:
|
||||||
|
- exp_labels:
|
||||||
|
datacenter: dc-123
|
||||||
|
|
||||||
|
- eval_time: 0
|
||||||
|
groupname: group1
|
||||||
|
alertname: InstanceDown
|
||||||
|
exp_alerts: []
|
||||||
|
|
||||||
|
external_labels:
|
||||||
|
datacenter: dc-123
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `alerts.yaml`
|
||||||
|
|
||||||
|
```
|
||||||
|
# This is the rules file.
|
||||||
|
|
||||||
|
groups:
|
||||||
|
- name: group1
|
||||||
|
rules:
|
||||||
|
- alert: InstanceDown
|
||||||
|
expr: up == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: page
|
||||||
|
annotations:
|
||||||
|
summary: "Instance {{ $labels.instance }} down"
|
||||||
|
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
|
||||||
|
- alert: AlwaysFiring
|
||||||
|
expr: 1
|
||||||
|
|
||||||
|
- name: group2
|
||||||
|
rules:
|
||||||
|
- record: job:test:count_over_time1m
|
||||||
|
expr: sum without(instance) (count_over_time(test[1m]))
|
||||||
|
- record: suquery_interval_test
|
||||||
|
expr: count_over_time(up[5m:])
|
||||||
|
```
|
54
app/vmalert-tool/main.go
Normal file
54
app/vmalert-tool/main.go
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/urfave/cli/v2"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert-tool/unittest"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
start := time.Now()
|
||||||
|
app := &cli.App{
|
||||||
|
Name: "vmalert-tool",
|
||||||
|
Usage: "VMAlert command-line tool",
|
||||||
|
UsageText: "More info in https://docs.victoriametrics.com/vmalert-tool.html",
|
||||||
|
Version: buildinfo.Version,
|
||||||
|
Commands: []*cli.Command{
|
||||||
|
{
|
||||||
|
Name: "unittest",
|
||||||
|
Usage: "Run unittest for alerting and recording rules.",
|
||||||
|
UsageText: "More info in https://docs.victoriametrics.com/vmalert-tool.html#Unit-testing-for-rules",
|
||||||
|
Flags: []cli.Flag{
|
||||||
|
&cli.StringSliceFlag{
|
||||||
|
Name: "files",
|
||||||
|
Usage: "files to run unittest with. Supports an array of values separated by comma or specified via multiple flags.",
|
||||||
|
Required: true,
|
||||||
|
},
|
||||||
|
&cli.BoolFlag{
|
||||||
|
Name: "disableAlertgroupLabel",
|
||||||
|
Usage: "disable adding group's Name as label to generated alerts and time series.",
|
||||||
|
Required: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Action: func(c *cli.Context) error {
|
||||||
|
if failed := unittest.UnitTest(c.StringSlice("files"), c.Bool("disableAlertgroupLabel")); failed {
|
||||||
|
return fmt.Errorf("unittest failed")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := app.Run(os.Args)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
log.Printf("Total time: %v", time.Since(start))
|
||||||
|
}
|
19
app/vmalert-tool/unittest/alerting.go
Normal file
19
app/vmalert-tool/unittest/alerting.go
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
package unittest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||||
|
)
|
||||||
|
|
||||||
|
// alertTestCase holds alert_rule_test cases defined in test file
|
||||||
|
type alertTestCase struct {
|
||||||
|
EvalTime *promutils.Duration `yaml:"eval_time"`
|
||||||
|
GroupName string `yaml:"groupname"`
|
||||||
|
Alertname string `yaml:"alertname"`
|
||||||
|
ExpAlerts []expAlert `yaml:"exp_alerts"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// expAlert holds exp_alerts defined in test file
|
||||||
|
type expAlert struct {
|
||||||
|
ExpLabels map[string]string `yaml:"exp_labels"`
|
||||||
|
ExpAnnotations map[string]string `yaml:"exp_annotations"`
|
||||||
|
}
|
182
app/vmalert-tool/unittest/input.go
Normal file
182
app/vmalert-tool/unittest/input.go
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
package unittest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
testutil "github.com/VictoriaMetrics/VictoriaMetrics/app/victoria-metrics/test"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||||
|
"github.com/VictoriaMetrics/metricsql"
|
||||||
|
)
|
||||||
|
|
||||||
|
// series holds input_series defined in the test file
|
||||||
|
type series struct {
|
||||||
|
Series string `yaml:"series"`
|
||||||
|
Values string `yaml:"values"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// sequenceValue is an omittable value in a sequence of time series values.
|
||||||
|
type sequenceValue struct {
|
||||||
|
Value float64
|
||||||
|
Omitted bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func httpWrite(address string, r io.Reader) {
|
||||||
|
resp, err := http.Post(address, "", r)
|
||||||
|
if err != nil {
|
||||||
|
logger.Fatalf("failed to send to storage: %v", err)
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeInputSeries send input series to vmstorage and flush them
|
||||||
|
func writeInputSeries(input []series, interval *promutils.Duration, startStamp time.Time, dst string) error {
|
||||||
|
r := testutil.WriteRequest{}
|
||||||
|
for _, data := range input {
|
||||||
|
expr, err := metricsql.Parse(data.Series)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to parse series %s: %v", data.Series, err)
|
||||||
|
}
|
||||||
|
promvals, err := parseInputValue(data.Values, true)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to parse input series value %s: %v", data.Values, err)
|
||||||
|
}
|
||||||
|
metricExpr, ok := expr.(*metricsql.MetricExpr)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("failed to parse series %s to metric expr: %v", data.Series, err)
|
||||||
|
}
|
||||||
|
samples := make([]testutil.Sample, 0, len(promvals))
|
||||||
|
ts := startStamp
|
||||||
|
for _, v := range promvals {
|
||||||
|
if !v.Omitted {
|
||||||
|
samples = append(samples, testutil.Sample{
|
||||||
|
Timestamp: ts.UnixMilli(),
|
||||||
|
Value: v.Value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
ts = ts.Add(interval.Duration())
|
||||||
|
}
|
||||||
|
var ls []testutil.Label
|
||||||
|
for _, filter := range metricExpr.LabelFilterss[0] {
|
||||||
|
ls = append(ls, testutil.Label{Name: filter.Label, Value: filter.Value})
|
||||||
|
}
|
||||||
|
r.Timeseries = append(r.Timeseries, testutil.TimeSeries{Labels: ls, Samples: samples})
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := testutil.Compress(r)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to compress data: %v", err)
|
||||||
|
}
|
||||||
|
// write input series to vm
|
||||||
|
httpWrite(dst, bytes.NewBuffer(data))
|
||||||
|
vmstorage.Storage.DebugFlush()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseInputValue support input like "1", "1+1x1 _ -4 3+20x1", see more examples in test.
|
||||||
|
func parseInputValue(input string, origin bool) ([]sequenceValue, error) {
|
||||||
|
var res []sequenceValue
|
||||||
|
items := strings.Split(input, " ")
|
||||||
|
reg := regexp.MustCompile(`\D?\d*\D?`)
|
||||||
|
for _, item := range items {
|
||||||
|
if item == "stale" {
|
||||||
|
res = append(res, sequenceValue{Value: decimal.StaleNaN})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vals := reg.FindAllString(item, -1)
|
||||||
|
switch len(vals) {
|
||||||
|
case 1:
|
||||||
|
if vals[0] == "_" {
|
||||||
|
res = append(res, sequenceValue{Omitted: true})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v, err := strconv.ParseFloat(vals[0], 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
res = append(res, sequenceValue{Value: v})
|
||||||
|
continue
|
||||||
|
case 2:
|
||||||
|
p1 := vals[0][:len(vals[0])-1]
|
||||||
|
v2, err := strconv.ParseInt(vals[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
option := vals[0][len(vals[0])-1]
|
||||||
|
switch option {
|
||||||
|
case '+':
|
||||||
|
v1, err := strconv.ParseFloat(p1, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
res = append(res, sequenceValue{Value: v1 + float64(v2)})
|
||||||
|
case 'x':
|
||||||
|
for i := int64(0); i <= v2; i++ {
|
||||||
|
if p1 == "_" {
|
||||||
|
if i == 0 {
|
||||||
|
i = 1
|
||||||
|
}
|
||||||
|
res = append(res, sequenceValue{Omitted: true})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v1, err := strconv.ParseFloat(p1, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if !origin || v1 == 0 {
|
||||||
|
res = append(res, sequenceValue{Value: v1 * float64(i)})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
newVal := fmt.Sprintf("%s+0x%s", p1, vals[1])
|
||||||
|
newRes, err := parseInputValue(newVal, false)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
res = append(res, newRes...)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("got invalid operation %b", option)
|
||||||
|
}
|
||||||
|
case 3:
|
||||||
|
r1, err := parseInputValue(fmt.Sprintf("%s%s", vals[1], vals[2]), false)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
p1 := vals[0][:len(vals[0])-1]
|
||||||
|
v1, err := strconv.ParseFloat(p1, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
option := vals[0][len(vals[0])-1]
|
||||||
|
var isAdd bool
|
||||||
|
if option == '+' {
|
||||||
|
isAdd = true
|
||||||
|
}
|
||||||
|
for _, r := range r1 {
|
||||||
|
if isAdd {
|
||||||
|
res = append(res, sequenceValue{
|
||||||
|
Value: r.Value + v1,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
res = append(res, sequenceValue{
|
||||||
|
Value: v1 - r.Value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unsupported input %s", input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res, nil
|
||||||
|
}
|
93
app/vmalert-tool/unittest/input_test.go
Normal file
93
app/vmalert-tool/unittest/input_test.go
Normal file
|
@ -0,0 +1,93 @@
|
||||||
|
package unittest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseInputValue(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
input string
|
||||||
|
exp []sequenceValue
|
||||||
|
failed bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"",
|
||||||
|
nil,
|
||||||
|
true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"testfailed",
|
||||||
|
nil,
|
||||||
|
true,
|
||||||
|
},
|
||||||
|
// stale doesn't support operations
|
||||||
|
{
|
||||||
|
"stalex3",
|
||||||
|
nil,
|
||||||
|
true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"-4",
|
||||||
|
[]sequenceValue{{Value: -4}},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_",
|
||||||
|
[]sequenceValue{{Omitted: true}},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"stale",
|
||||||
|
[]sequenceValue{{Value: decimal.StaleNaN}},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"-4x1",
|
||||||
|
[]sequenceValue{{Value: -4}, {Value: -4}},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_x1",
|
||||||
|
[]sequenceValue{{Omitted: true}},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"1+1x4",
|
||||||
|
[]sequenceValue{{Value: 1}, {Value: 2}, {Value: 3}, {Value: 4}, {Value: 5}},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"2-1x4",
|
||||||
|
[]sequenceValue{{Value: 2}, {Value: 1}, {Value: 0}, {Value: -1}, {Value: -2}},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"1+1x1 _ -4 stale 3+20x1",
|
||||||
|
[]sequenceValue{{Value: 1}, {Value: 2}, {Omitted: true}, {Value: -4}, {Value: decimal.StaleNaN}, {Value: 3}, {Value: 23}},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
output, err := parseInputValue(tc.input, true)
|
||||||
|
if err != nil != tc.failed {
|
||||||
|
t.Fatalf("failed to parse %s, expect %t, got %t", tc.input, tc.failed, err != nil)
|
||||||
|
}
|
||||||
|
if len(tc.exp) != len(output) {
|
||||||
|
t.Fatalf("expect %v, got %v", tc.exp, output)
|
||||||
|
}
|
||||||
|
for i := 0; i < len(tc.exp); i++ {
|
||||||
|
if tc.exp[i].Omitted != output[i].Omitted {
|
||||||
|
t.Fatalf("expect %v, got %v", tc.exp, output)
|
||||||
|
}
|
||||||
|
if tc.exp[i].Value != output[i].Value {
|
||||||
|
if decimal.IsStaleNaN(tc.exp[i].Value) && decimal.IsStaleNaN(output[i].Value) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
t.Fatalf("expect %v, got %v", tc.exp, output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
92
app/vmalert-tool/unittest/recording.go
Normal file
92
app/vmalert-tool/unittest/recording.go
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
package unittest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"net/url"
|
||||||
|
"reflect"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||||
|
"github.com/VictoriaMetrics/metricsql"
|
||||||
|
)
|
||||||
|
|
||||||
|
// metricsqlTestCase holds metricsql_expr_test cases defined in test file
|
||||||
|
type metricsqlTestCase struct {
|
||||||
|
Expr string `yaml:"expr"`
|
||||||
|
EvalTime *promutils.Duration `yaml:"eval_time"`
|
||||||
|
ExpSamples []expSample `yaml:"exp_samples"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type expSample struct {
|
||||||
|
Labels string `yaml:"labels"`
|
||||||
|
Value float64 `yaml:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkMetricsqlCase will check metricsql_expr_test cases
|
||||||
|
func checkMetricsqlCase(cases []metricsqlTestCase, q datasource.QuerierBuilder) (checkErrs []error) {
|
||||||
|
queries := q.BuildWithParams(datasource.QuerierParams{QueryParams: url.Values{"nocache": {"1"}, "latency_offset": {"1ms"}}, DataSourceType: "prometheus"})
|
||||||
|
Outer:
|
||||||
|
for _, mt := range cases {
|
||||||
|
result, _, err := queries.Query(context.Background(), mt.Expr, mt.EvalTime.ParseTime())
|
||||||
|
if err != nil {
|
||||||
|
checkErrs = append(checkErrs, fmt.Errorf(" expr: %q, time: %s, err: %w", mt.Expr,
|
||||||
|
mt.EvalTime.Duration().String(), err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var gotSamples []parsedSample
|
||||||
|
for _, s := range result.Data {
|
||||||
|
sort.Slice(s.Labels, func(i, j int) bool {
|
||||||
|
return s.Labels[i].Name < s.Labels[j].Name
|
||||||
|
})
|
||||||
|
gotSamples = append(gotSamples, parsedSample{
|
||||||
|
Labels: s.Labels,
|
||||||
|
Value: s.Values[0],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
var expSamples []parsedSample
|
||||||
|
for _, s := range mt.ExpSamples {
|
||||||
|
expLb := datasource.Labels{}
|
||||||
|
if s.Labels != "" {
|
||||||
|
metricsqlExpr, err := metricsql.Parse(s.Labels)
|
||||||
|
if err != nil {
|
||||||
|
checkErrs = append(checkErrs, fmt.Errorf("\n expr: %q, time: %s, err: %v", mt.Expr,
|
||||||
|
mt.EvalTime.Duration().String(), fmt.Errorf("failed to parse labels %q: %w", s.Labels, err)))
|
||||||
|
continue Outer
|
||||||
|
}
|
||||||
|
metricsqlMetricExpr, ok := metricsqlExpr.(*metricsql.MetricExpr)
|
||||||
|
if !ok {
|
||||||
|
checkErrs = append(checkErrs, fmt.Errorf("\n expr: %q, time: %s, err: %v", mt.Expr,
|
||||||
|
mt.EvalTime.Duration().String(), fmt.Errorf("got unsupported metricsql type")))
|
||||||
|
continue Outer
|
||||||
|
}
|
||||||
|
for _, l := range metricsqlMetricExpr.LabelFilterss[0] {
|
||||||
|
expLb = append(expLb, datasource.Label{
|
||||||
|
Name: l.Label,
|
||||||
|
Value: l.Value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Slice(expLb, func(i, j int) bool {
|
||||||
|
return expLb[i].Name < expLb[j].Name
|
||||||
|
})
|
||||||
|
expSamples = append(expSamples, parsedSample{
|
||||||
|
Labels: expLb,
|
||||||
|
Value: s.Value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
sort.Slice(expSamples, func(i, j int) bool {
|
||||||
|
return datasource.LabelCompare(expSamples[i].Labels, expSamples[j].Labels) <= 0
|
||||||
|
})
|
||||||
|
sort.Slice(gotSamples, func(i, j int) bool {
|
||||||
|
return datasource.LabelCompare(gotSamples[i].Labels, gotSamples[j].Labels) <= 0
|
||||||
|
})
|
||||||
|
if !reflect.DeepEqual(expSamples, gotSamples) {
|
||||||
|
checkErrs = append(checkErrs, fmt.Errorf("\n expr: %q, time: %s,\n exp: %v\n got: %v", mt.Expr,
|
||||||
|
mt.EvalTime.Duration().String(), parsedSamplesString(expSamples), parsedSamplesString(gotSamples)))
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
43
app/vmalert-tool/unittest/testdata/disable-group-label.yaml
vendored
Normal file
43
app/vmalert-tool/unittest/testdata/disable-group-label.yaml
vendored
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
rule_files:
|
||||||
|
- rules.yaml
|
||||||
|
|
||||||
|
evaluation_interval: 1m
|
||||||
|
|
||||||
|
tests:
|
||||||
|
- interval: 1m
|
||||||
|
input_series:
|
||||||
|
- series: 'up{job="vmagent2", instance="localhost:9090"}'
|
||||||
|
values: "0+0x1440"
|
||||||
|
|
||||||
|
metricsql_expr_test:
|
||||||
|
- expr: suquery_interval_test
|
||||||
|
eval_time: 4m
|
||||||
|
exp_samples:
|
||||||
|
- labels: '{__name__="suquery_interval_test",datacenter="dc-123", instance="localhost:9090", job="vmagent2"}'
|
||||||
|
value: 1
|
||||||
|
|
||||||
|
alert_rule_test:
|
||||||
|
- eval_time: 2h
|
||||||
|
alertname: InstanceDown
|
||||||
|
exp_alerts:
|
||||||
|
- exp_labels:
|
||||||
|
job: vmagent2
|
||||||
|
severity: page
|
||||||
|
instance: localhost:9090
|
||||||
|
datacenter: dc-123
|
||||||
|
exp_annotations:
|
||||||
|
summary: "Instance localhost:9090 down"
|
||||||
|
description: "localhost:9090 of job vmagent2 has been down for more than 5 minutes."
|
||||||
|
|
||||||
|
- eval_time: 0
|
||||||
|
alertname: AlwaysFiring
|
||||||
|
exp_alerts:
|
||||||
|
- exp_labels:
|
||||||
|
datacenter: dc-123
|
||||||
|
|
||||||
|
- eval_time: 0
|
||||||
|
alertname: InstanceDown
|
||||||
|
exp_alerts: []
|
||||||
|
|
||||||
|
external_labels:
|
||||||
|
datacenter: dc-123
|
49
app/vmalert-tool/unittest/testdata/failed-test.yaml
vendored
Normal file
49
app/vmalert-tool/unittest/testdata/failed-test.yaml
vendored
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
rule_files:
|
||||||
|
- rules.yaml
|
||||||
|
|
||||||
|
tests:
|
||||||
|
- interval: 1m
|
||||||
|
name: "Failing test"
|
||||||
|
input_series:
|
||||||
|
- series: test
|
||||||
|
values: "0"
|
||||||
|
|
||||||
|
metricsql_expr_test:
|
||||||
|
- expr: test
|
||||||
|
eval_time: 0m
|
||||||
|
exp_samples:
|
||||||
|
- value: 0
|
||||||
|
labels: test
|
||||||
|
|
||||||
|
# will failed cause there is no "Test" group and rule defined
|
||||||
|
alert_rule_test:
|
||||||
|
- eval_time: 0m
|
||||||
|
groupname: Test
|
||||||
|
alertname: Test
|
||||||
|
exp_alerts:
|
||||||
|
- exp_labels: {}
|
||||||
|
|
||||||
|
- interval: 1m
|
||||||
|
name: Failing alert test
|
||||||
|
input_series:
|
||||||
|
- series: 'up{job="test"}'
|
||||||
|
values: 0x10
|
||||||
|
|
||||||
|
alert_rule_test:
|
||||||
|
# will failed cause rule is firing
|
||||||
|
- eval_time: 5m
|
||||||
|
groupname: group1
|
||||||
|
alertname: InstanceDown
|
||||||
|
exp_alerts: []
|
||||||
|
|
||||||
|
- interval: 1m
|
||||||
|
name: Failing alert test with missing groupname
|
||||||
|
input_series:
|
||||||
|
- series: 'up{job="test"}'
|
||||||
|
values: 0x10
|
||||||
|
|
||||||
|
alert_rule_test:
|
||||||
|
# will failed cause missing groupname
|
||||||
|
- eval_time: 5m
|
||||||
|
alertname: AlwaysFiring
|
||||||
|
exp_alerts: []
|
30
app/vmalert-tool/unittest/testdata/long-period.yaml
vendored
Normal file
30
app/vmalert-tool/unittest/testdata/long-period.yaml
vendored
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
# can be executed successfully but will take more than 1 minute
|
||||||
|
# not included in unit test now
|
||||||
|
evaluation_interval: 100d
|
||||||
|
|
||||||
|
rule_files:
|
||||||
|
- rules.yaml
|
||||||
|
|
||||||
|
tests:
|
||||||
|
- interval: 1d
|
||||||
|
input_series:
|
||||||
|
- series: test
|
||||||
|
# Max time in time.Duration is 106751d from 1970 (2^63/10^9), i.e. 2262.
|
||||||
|
# But VictoriaMetrics supports maxTimestamp value +2 days from now. see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/827.
|
||||||
|
# We input series to 2024-01-01T00:00:00 here.
|
||||||
|
values: "0+1x19723"
|
||||||
|
|
||||||
|
metricsql_expr_test:
|
||||||
|
- expr: timestamp(test)
|
||||||
|
eval_time: 0m
|
||||||
|
exp_samples:
|
||||||
|
- value: 0
|
||||||
|
- expr: test
|
||||||
|
eval_time: 100d
|
||||||
|
exp_samples:
|
||||||
|
- labels: test
|
||||||
|
value: 100
|
||||||
|
- expr: timestamp(test)
|
||||||
|
eval_time: 19000d
|
||||||
|
exp_samples:
|
||||||
|
- value: 1641600000 # 19000d -> seconds.
|
39
app/vmalert-tool/unittest/testdata/rules.yaml
vendored
Normal file
39
app/vmalert-tool/unittest/testdata/rules.yaml
vendored
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
groups:
|
||||||
|
- name: group1
|
||||||
|
rules:
|
||||||
|
- alert: InstanceDown
|
||||||
|
expr: up == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: page
|
||||||
|
annotations:
|
||||||
|
summary: "Instance {{ $labels.instance }} down"
|
||||||
|
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
|
||||||
|
- alert: AlwaysFiring
|
||||||
|
expr: 1
|
||||||
|
- alert: SameAlertNameWithDifferentGroup
|
||||||
|
expr: absent(test)
|
||||||
|
for: 1m
|
||||||
|
|
||||||
|
- name: group2
|
||||||
|
rules:
|
||||||
|
- record: t1
|
||||||
|
expr: test
|
||||||
|
- record: job:test:count_over_time1m
|
||||||
|
expr: sum without(instance) (count_over_time(test[1m]))
|
||||||
|
- record: suquery_interval_test
|
||||||
|
expr: count_over_time(up[5m:])
|
||||||
|
|
||||||
|
- alert: SameAlertNameWithDifferentGroup
|
||||||
|
expr: absent(test)
|
||||||
|
for: 5m
|
||||||
|
|
||||||
|
- name: group3
|
||||||
|
rules:
|
||||||
|
- record: t2
|
||||||
|
expr: t1
|
||||||
|
|
||||||
|
- name: group4
|
||||||
|
rules:
|
||||||
|
- record: t3
|
||||||
|
expr: t1
|
99
app/vmalert-tool/unittest/testdata/test1.yaml
vendored
Normal file
99
app/vmalert-tool/unittest/testdata/test1.yaml
vendored
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
rule_files:
|
||||||
|
- rules.yaml
|
||||||
|
|
||||||
|
evaluation_interval: 1m
|
||||||
|
group_eval_order: ["group4", "group2", "group3"]
|
||||||
|
|
||||||
|
tests:
|
||||||
|
- interval: 1m
|
||||||
|
name: "basic test"
|
||||||
|
input_series:
|
||||||
|
- series: "test"
|
||||||
|
values: "_x5 1x5 _ stale"
|
||||||
|
|
||||||
|
alert_rule_test:
|
||||||
|
- eval_time: 1m
|
||||||
|
groupname: group1
|
||||||
|
alertname: SameAlertNameWithDifferentGroup
|
||||||
|
exp_alerts:
|
||||||
|
- {}
|
||||||
|
- eval_time: 1m
|
||||||
|
groupname: group2
|
||||||
|
alertname: SameAlertNameWithDifferentGroup
|
||||||
|
exp_alerts: []
|
||||||
|
- eval_time: 6m
|
||||||
|
groupname: group1
|
||||||
|
alertname: SameAlertNameWithDifferentGroup
|
||||||
|
exp_alerts: []
|
||||||
|
|
||||||
|
metricsql_expr_test:
|
||||||
|
- expr: test
|
||||||
|
eval_time: 11m
|
||||||
|
exp_samples:
|
||||||
|
- labels: '{__name__="test"}'
|
||||||
|
value: 1
|
||||||
|
- expr: test
|
||||||
|
eval_time: 12m
|
||||||
|
exp_samples: []
|
||||||
|
|
||||||
|
- interval: 1m
|
||||||
|
name: "basic test2"
|
||||||
|
input_series:
|
||||||
|
- series: 'up{job="vmagent1", instance="localhost:9090"}'
|
||||||
|
values: "0+0x1440"
|
||||||
|
- series: "test"
|
||||||
|
values: "0+1x1440"
|
||||||
|
|
||||||
|
metricsql_expr_test:
|
||||||
|
- expr: count(ALERTS) by (alertgroup, alertname, alertstate)
|
||||||
|
eval_time: 4m
|
||||||
|
exp_samples:
|
||||||
|
- labels: '{alertgroup="group1", alertname="AlwaysFiring", alertstate="firing"}'
|
||||||
|
value: 1
|
||||||
|
- labels: '{alertgroup="group1", alertname="InstanceDown", alertstate="pending"}'
|
||||||
|
value: 1
|
||||||
|
- expr: t1
|
||||||
|
eval_time: 4m
|
||||||
|
exp_samples:
|
||||||
|
- value: 4
|
||||||
|
labels: '{__name__="t1", datacenter="dc-123"}'
|
||||||
|
- expr: t2
|
||||||
|
eval_time: 4m
|
||||||
|
exp_samples:
|
||||||
|
- value: 4
|
||||||
|
labels: '{__name__="t2", datacenter="dc-123"}'
|
||||||
|
- expr: t3
|
||||||
|
eval_time: 4m
|
||||||
|
exp_samples:
|
||||||
|
# t3 is 3 instead of 4 cause it's rules3 is evaluated before rules1
|
||||||
|
- value: 3
|
||||||
|
labels: '{__name__="t3", datacenter="dc-123"}'
|
||||||
|
|
||||||
|
alert_rule_test:
|
||||||
|
- eval_time: 10m
|
||||||
|
groupname: group1
|
||||||
|
alertname: InstanceDown
|
||||||
|
exp_alerts:
|
||||||
|
- exp_labels:
|
||||||
|
job: vmagent1
|
||||||
|
severity: page
|
||||||
|
instance: localhost:9090
|
||||||
|
datacenter: dc-123
|
||||||
|
exp_annotations:
|
||||||
|
summary: "Instance localhost:9090 down"
|
||||||
|
description: "localhost:9090 of job vmagent1 has been down for more than 5 minutes."
|
||||||
|
|
||||||
|
- eval_time: 0
|
||||||
|
groupname: group1
|
||||||
|
alertname: AlwaysFiring
|
||||||
|
exp_alerts:
|
||||||
|
- exp_labels:
|
||||||
|
datacenter: dc-123
|
||||||
|
|
||||||
|
- eval_time: 0
|
||||||
|
groupname: alerts
|
||||||
|
alertname: InstanceDown
|
||||||
|
exp_alerts: []
|
||||||
|
|
||||||
|
external_labels:
|
||||||
|
datacenter: dc-123
|
46
app/vmalert-tool/unittest/testdata/test2.yaml
vendored
Normal file
46
app/vmalert-tool/unittest/testdata/test2.yaml
vendored
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
rule_files:
|
||||||
|
- rules.yaml
|
||||||
|
|
||||||
|
evaluation_interval: 1m
|
||||||
|
|
||||||
|
tests:
|
||||||
|
- interval: 1m
|
||||||
|
input_series:
|
||||||
|
- series: 'up{job="vmagent2", instance="localhost:9090"}'
|
||||||
|
values: "0+0x1440"
|
||||||
|
|
||||||
|
metricsql_expr_test:
|
||||||
|
- expr: suquery_interval_test
|
||||||
|
eval_time: 4m
|
||||||
|
exp_samples:
|
||||||
|
- labels: '{__name__="suquery_interval_test",datacenter="dc-123", instance="localhost:9090", job="vmagent2"}'
|
||||||
|
value: 1
|
||||||
|
|
||||||
|
alert_rule_test:
|
||||||
|
- eval_time: 2h
|
||||||
|
groupname: group1
|
||||||
|
alertname: InstanceDown
|
||||||
|
exp_alerts:
|
||||||
|
- exp_labels:
|
||||||
|
job: vmagent2
|
||||||
|
severity: page
|
||||||
|
instance: localhost:9090
|
||||||
|
datacenter: dc-123
|
||||||
|
exp_annotations:
|
||||||
|
summary: "Instance localhost:9090 down"
|
||||||
|
description: "localhost:9090 of job vmagent2 has been down for more than 5 minutes."
|
||||||
|
|
||||||
|
- eval_time: 0
|
||||||
|
groupname: group1
|
||||||
|
alertname: AlwaysFiring
|
||||||
|
exp_alerts:
|
||||||
|
- exp_labels:
|
||||||
|
datacenter: dc-123
|
||||||
|
|
||||||
|
- eval_time: 0
|
||||||
|
groupname: group1
|
||||||
|
alertname: InstanceDown
|
||||||
|
exp_alerts: []
|
||||||
|
|
||||||
|
external_labels:
|
||||||
|
datacenter: dc-123
|
83
app/vmalert-tool/unittest/type.go
Normal file
83
app/vmalert-tool/unittest/type.go
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
package unittest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
|
)
|
||||||
|
|
||||||
|
// parsedSample is a sample with parsed Labels
|
||||||
|
type parsedSample struct {
|
||||||
|
Labels datasource.Labels
|
||||||
|
Value float64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ps *parsedSample) String() string {
|
||||||
|
return ps.Labels.String() + " " + strconv.FormatFloat(ps.Value, 'E', -1, 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
func parsedSamplesString(pss []parsedSample) string {
|
||||||
|
if len(pss) == 0 {
|
||||||
|
return "nil"
|
||||||
|
}
|
||||||
|
s := pss[0].String()
|
||||||
|
for _, ps := range pss[1:] {
|
||||||
|
s += ", " + ps.String()
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// labelAndAnnotation holds labels and annotations
|
||||||
|
type labelAndAnnotation struct {
|
||||||
|
Labels datasource.Labels
|
||||||
|
Annotations datasource.Labels
|
||||||
|
}
|
||||||
|
|
||||||
|
func (la *labelAndAnnotation) String() string {
|
||||||
|
return "Labels:" + la.Labels.String() + "\nAnnotations:" + la.Annotations.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// labelsAndAnnotations is collection of LabelAndAnnotation
|
||||||
|
type labelsAndAnnotations []labelAndAnnotation
|
||||||
|
|
||||||
|
func (la labelsAndAnnotations) Len() int { return len(la) }
|
||||||
|
|
||||||
|
func (la labelsAndAnnotations) Swap(i, j int) { la[i], la[j] = la[j], la[i] }
|
||||||
|
func (la labelsAndAnnotations) Less(i, j int) bool {
|
||||||
|
diff := datasource.LabelCompare(la[i].Labels, la[j].Labels)
|
||||||
|
if diff != 0 {
|
||||||
|
return diff < 0
|
||||||
|
}
|
||||||
|
return datasource.LabelCompare(la[i].Annotations, la[j].Annotations) < 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (la labelsAndAnnotations) String() string {
|
||||||
|
if len(la) == 0 {
|
||||||
|
return "[]"
|
||||||
|
}
|
||||||
|
s := "[\n0:" + indentLines("\n"+la[0].String(), " ")
|
||||||
|
for i, l := range la[1:] {
|
||||||
|
s += ",\n" + fmt.Sprintf("%d", i+1) + ":" + indentLines("\n"+l.String(), " ")
|
||||||
|
}
|
||||||
|
s += "\n]"
|
||||||
|
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// indentLines prefixes each line in the supplied string with the given "indent" string.
|
||||||
|
func indentLines(lines, indent string) string {
|
||||||
|
sb := strings.Builder{}
|
||||||
|
n := strings.Split(lines, "\n")
|
||||||
|
for i, l := range n {
|
||||||
|
if i > 0 {
|
||||||
|
sb.WriteString(indent)
|
||||||
|
}
|
||||||
|
sb.WriteString(l)
|
||||||
|
if i != len(n)-1 {
|
||||||
|
sb.WriteRune('\n')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sb.String()
|
||||||
|
}
|
443
app/vmalert-tool/unittest/unittest.go
Normal file
443
app/vmalert-tool/unittest/unittest.go
Normal file
|
@ -0,0 +1,443 @@
|
||||||
|
package unittest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"reflect"
|
||||||
|
"sort"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"gopkg.in/yaml.v2"
|
||||||
|
|
||||||
|
vmalertconfig "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/promremotewrite"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||||
|
"github.com/VictoriaMetrics/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
storagePath string
|
||||||
|
httpListenAddr = ":8880"
|
||||||
|
// insert series from 1970-01-01T00:00:00
|
||||||
|
testStartTime = time.Unix(0, 0).UTC()
|
||||||
|
|
||||||
|
testPromWriteHTTPPath = "http://127.0.0.1" + httpListenAddr + "/api/v1/write"
|
||||||
|
testDataSourcePath = "http://127.0.0.1" + httpListenAddr + "/prometheus"
|
||||||
|
testRemoteWritePath = "http://127.0.0.1" + httpListenAddr
|
||||||
|
testHealthHTTPPath = "http://127.0.0.1" + httpListenAddr + "/health"
|
||||||
|
|
||||||
|
disableAlertgroupLabel bool
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
testStoragePath = "vmalert-unittest"
|
||||||
|
testLogLevel = "ERROR"
|
||||||
|
)
|
||||||
|
|
||||||
|
// UnitTest runs unittest for files
|
||||||
|
func UnitTest(files []string, disableGroupLabel bool) bool {
|
||||||
|
if err := templates.Load([]string{}, true); err != nil {
|
||||||
|
logger.Fatalf("failed to load template: %v", err)
|
||||||
|
}
|
||||||
|
storagePath = filepath.Join(os.TempDir(), testStoragePath)
|
||||||
|
processFlags()
|
||||||
|
vminsert.Init()
|
||||||
|
vmselect.Init()
|
||||||
|
// storagePath will be created again when closing vmselect, so remove it again.
|
||||||
|
defer fs.MustRemoveAll(storagePath)
|
||||||
|
defer vminsert.Stop()
|
||||||
|
defer vmselect.Stop()
|
||||||
|
disableAlertgroupLabel = disableGroupLabel
|
||||||
|
return rulesUnitTest(files)
|
||||||
|
}
|
||||||
|
|
||||||
|
func rulesUnitTest(files []string) bool {
|
||||||
|
var failed bool
|
||||||
|
for _, f := range files {
|
||||||
|
if err := ruleUnitTest(f); err != nil {
|
||||||
|
fmt.Println(" FAILED")
|
||||||
|
fmt.Printf("\nfailed to run unit test for file %q: \n%v", f, err)
|
||||||
|
failed = true
|
||||||
|
} else {
|
||||||
|
fmt.Println(" SUCCESS")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return failed
|
||||||
|
}
|
||||||
|
|
||||||
|
func ruleUnitTest(filename string) []error {
|
||||||
|
fmt.Println("\nUnit Testing: ", filename)
|
||||||
|
b, err := os.ReadFile(filename)
|
||||||
|
if err != nil {
|
||||||
|
return []error{fmt.Errorf("failed to read file: %w", err)}
|
||||||
|
}
|
||||||
|
|
||||||
|
var unitTestInp unitTestFile
|
||||||
|
if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil {
|
||||||
|
return []error{fmt.Errorf("failed to unmarshal file: %w", err)}
|
||||||
|
}
|
||||||
|
if err := resolveAndGlobFilepaths(filepath.Dir(filename), &unitTestInp); err != nil {
|
||||||
|
return []error{fmt.Errorf("failed to resolve path for `rule_files`: %w", err)}
|
||||||
|
}
|
||||||
|
|
||||||
|
if unitTestInp.EvaluationInterval.Duration() == 0 {
|
||||||
|
fmt.Println("evaluation_interval set to 1m by default")
|
||||||
|
unitTestInp.EvaluationInterval = &promutils.Duration{D: 1 * time.Minute}
|
||||||
|
}
|
||||||
|
|
||||||
|
groupOrderMap := make(map[string]int)
|
||||||
|
for i, gn := range unitTestInp.GroupEvalOrder {
|
||||||
|
if _, ok := groupOrderMap[gn]; ok {
|
||||||
|
return []error{fmt.Errorf("group name repeated in `group_eval_order`: %s", gn)}
|
||||||
|
}
|
||||||
|
groupOrderMap[gn] = i
|
||||||
|
}
|
||||||
|
|
||||||
|
testGroups, err := vmalertconfig.Parse(unitTestInp.RuleFiles, nil, true)
|
||||||
|
if err != nil {
|
||||||
|
return []error{fmt.Errorf("failed to parse `rule_files`: %w", err)}
|
||||||
|
}
|
||||||
|
|
||||||
|
var errs []error
|
||||||
|
for _, t := range unitTestInp.Tests {
|
||||||
|
if err := verifyTestGroup(t); err != nil {
|
||||||
|
errs = append(errs, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
testErrs := t.test(unitTestInp.EvaluationInterval.Duration(), groupOrderMap, testGroups)
|
||||||
|
errs = append(errs, testErrs...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(errs) > 0 {
|
||||||
|
return errs
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func verifyTestGroup(group testGroup) error {
|
||||||
|
var testGroupName string
|
||||||
|
if group.TestGroupName != "" {
|
||||||
|
testGroupName = fmt.Sprintf("testGroupName: %s\n", group.TestGroupName)
|
||||||
|
}
|
||||||
|
for _, at := range group.AlertRuleTests {
|
||||||
|
if at.Alertname == "" {
|
||||||
|
return fmt.Errorf("\n%s missing required filed \"alertname\"", testGroupName)
|
||||||
|
}
|
||||||
|
if !disableAlertgroupLabel && at.GroupName == "" {
|
||||||
|
return fmt.Errorf("\n%s missing required filed \"groupname\" when flag \"disableAlertGroupLabel\" is false", testGroupName)
|
||||||
|
}
|
||||||
|
if disableAlertgroupLabel && at.GroupName != "" {
|
||||||
|
return fmt.Errorf("\n%s shouldn't set filed \"groupname\" when flag \"disableAlertGroupLabel\" is true", testGroupName)
|
||||||
|
}
|
||||||
|
if at.EvalTime == nil {
|
||||||
|
return fmt.Errorf("\n%s missing required filed \"eval_time\"", testGroupName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, et := range group.MetricsqlExprTests {
|
||||||
|
if et.Expr == "" {
|
||||||
|
return fmt.Errorf("\n%s missing required filed \"expr\"", testGroupName)
|
||||||
|
}
|
||||||
|
if et.EvalTime == nil {
|
||||||
|
return fmt.Errorf("\n%s missing required filed \"eval_time\"", testGroupName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func processFlags() {
|
||||||
|
flag.Parse()
|
||||||
|
for _, fv := range []struct {
|
||||||
|
flag string
|
||||||
|
value string
|
||||||
|
}{
|
||||||
|
{flag: "storageDataPath", value: storagePath},
|
||||||
|
{flag: "loggerLevel", value: testLogLevel},
|
||||||
|
{flag: "search.disableCache", value: "true"},
|
||||||
|
// set storage retention time to 100 years, allow to store series from 1970-01-01T00:00:00.
|
||||||
|
{flag: "retentionPeriod", value: "100y"},
|
||||||
|
{flag: "datasource.url", value: testDataSourcePath},
|
||||||
|
{flag: "remoteWrite.url", value: testRemoteWritePath},
|
||||||
|
} {
|
||||||
|
// panics if flag doesn't exist
|
||||||
|
if err := flag.Lookup(fv.flag).Value.Set(fv.value); err != nil {
|
||||||
|
logger.Fatalf("unable to set %q with value %q, err: %v", fv.flag, fv.value, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func setUp() {
|
||||||
|
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
|
||||||
|
go httpserver.Serve(httpListenAddr, false, func(w http.ResponseWriter, r *http.Request) bool {
|
||||||
|
switch r.URL.Path {
|
||||||
|
case "/prometheus/api/v1/query":
|
||||||
|
if err := prometheus.QueryHandler(nil, time.Now(), w, r); err != nil {
|
||||||
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
case "/prometheus/api/v1/write", "/api/v1/write":
|
||||||
|
if err := promremotewrite.InsertHandler(r); err != nil {
|
||||||
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
})
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
readyCheckFunc := func() bool {
|
||||||
|
resp, err := http.Get(testHealthHTTPPath)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
return resp.StatusCode == 200
|
||||||
|
}
|
||||||
|
checkCheck:
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
logger.Fatalf("http server can't be ready in 30s")
|
||||||
|
default:
|
||||||
|
if readyCheckFunc() {
|
||||||
|
break checkCheck
|
||||||
|
}
|
||||||
|
time.Sleep(3 * time.Second)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func tearDown() {
|
||||||
|
if err := httpserver.Stop(httpListenAddr); err != nil {
|
||||||
|
logger.Errorf("cannot stop the webservice: %s", err)
|
||||||
|
}
|
||||||
|
vmstorage.Stop()
|
||||||
|
metrics.UnregisterAllMetrics()
|
||||||
|
fs.MustRemoveAll(storagePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveAndGlobFilepaths joins all relative paths in a configuration
|
||||||
|
// with a given base directory and replaces all globs with matching files.
|
||||||
|
func resolveAndGlobFilepaths(baseDir string, utf *unitTestFile) error {
|
||||||
|
for i, rf := range utf.RuleFiles {
|
||||||
|
if rf != "" && !filepath.IsAbs(rf) {
|
||||||
|
utf.RuleFiles[i] = filepath.Join(baseDir, rf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var globbedFiles []string
|
||||||
|
for _, rf := range utf.RuleFiles {
|
||||||
|
m, err := filepath.Glob(rf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if len(m) == 0 {
|
||||||
|
fmt.Fprintln(os.Stderr, " WARNING: no file match pattern", rf)
|
||||||
|
}
|
||||||
|
globbedFiles = append(globbedFiles, m...)
|
||||||
|
}
|
||||||
|
utf.RuleFiles = globbedFiles
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, testGroups []vmalertconfig.Group) (checkErrs []error) {
|
||||||
|
// set up vmstorage and http server for ingest and read queries
|
||||||
|
setUp()
|
||||||
|
// tear down vmstorage and clean the data dir
|
||||||
|
defer tearDown()
|
||||||
|
|
||||||
|
err := writeInputSeries(tg.InputSeries, tg.Interval, testStartTime, testPromWriteHTTPPath)
|
||||||
|
if err != nil {
|
||||||
|
return []error{err}
|
||||||
|
}
|
||||||
|
|
||||||
|
q, err := datasource.Init(nil)
|
||||||
|
if err != nil {
|
||||||
|
return []error{fmt.Errorf("failed to init datasource: %v", err)}
|
||||||
|
}
|
||||||
|
rw, err := remotewrite.NewDebugClient()
|
||||||
|
if err != nil {
|
||||||
|
return []error{fmt.Errorf("failed to init wr: %v", err)}
|
||||||
|
}
|
||||||
|
|
||||||
|
alertEvalTimesMap := map[time.Duration]struct{}{}
|
||||||
|
alertExpResultMap := map[time.Duration]map[string]map[string][]expAlert{}
|
||||||
|
for _, at := range tg.AlertRuleTests {
|
||||||
|
et := at.EvalTime.Duration()
|
||||||
|
alertEvalTimesMap[et] = struct{}{}
|
||||||
|
if _, ok := alertExpResultMap[et]; !ok {
|
||||||
|
alertExpResultMap[et] = make(map[string]map[string][]expAlert)
|
||||||
|
}
|
||||||
|
if _, ok := alertExpResultMap[et][at.GroupName]; !ok {
|
||||||
|
alertExpResultMap[et][at.GroupName] = make(map[string][]expAlert)
|
||||||
|
}
|
||||||
|
alertExpResultMap[et][at.GroupName][at.Alertname] = at.ExpAlerts
|
||||||
|
}
|
||||||
|
alertEvalTimes := make([]time.Duration, 0, len(alertEvalTimesMap))
|
||||||
|
for k := range alertEvalTimesMap {
|
||||||
|
alertEvalTimes = append(alertEvalTimes, k)
|
||||||
|
}
|
||||||
|
sort.Slice(alertEvalTimes, func(i, j int) bool {
|
||||||
|
return alertEvalTimes[i] < alertEvalTimes[j]
|
||||||
|
})
|
||||||
|
|
||||||
|
// sort group eval order according to the given "group_eval_order".
|
||||||
|
sort.Slice(testGroups, func(i, j int) bool {
|
||||||
|
return groupOrderMap[testGroups[i].Name] < groupOrderMap[testGroups[j].Name]
|
||||||
|
})
|
||||||
|
|
||||||
|
// create groups with given rule
|
||||||
|
var groups []*rule.Group
|
||||||
|
for _, group := range testGroups {
|
||||||
|
ng := rule.NewGroup(group, q, time.Minute, tg.ExternalLabels)
|
||||||
|
groups = append(groups, ng)
|
||||||
|
}
|
||||||
|
|
||||||
|
evalIndex := 0
|
||||||
|
maxEvalTime := testStartTime.Add(tg.maxEvalTime())
|
||||||
|
for ts := testStartTime; ts.Before(maxEvalTime) || ts.Equal(maxEvalTime); ts = ts.Add(evalInterval) {
|
||||||
|
for _, g := range groups {
|
||||||
|
errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
|
||||||
|
for err := range errs {
|
||||||
|
if err != nil {
|
||||||
|
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,
|
||||||
|
ts, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// flush series after each group evaluation
|
||||||
|
vmstorage.Storage.DebugFlush()
|
||||||
|
}
|
||||||
|
|
||||||
|
// check alert_rule_test case at every eval time
|
||||||
|
for evalIndex < len(alertEvalTimes) {
|
||||||
|
if ts.Sub(testStartTime) > alertEvalTimes[evalIndex] ||
|
||||||
|
alertEvalTimes[evalIndex] >= ts.Add(evalInterval).Sub(testStartTime) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
gotAlertsMap := map[string]map[string]labelsAndAnnotations{}
|
||||||
|
for _, g := range groups {
|
||||||
|
if disableAlertgroupLabel {
|
||||||
|
g.Name = ""
|
||||||
|
}
|
||||||
|
if _, ok := alertExpResultMap[time.Duration(ts.UnixNano())][g.Name]; !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok := gotAlertsMap[g.Name]; !ok {
|
||||||
|
gotAlertsMap[g.Name] = make(map[string]labelsAndAnnotations)
|
||||||
|
}
|
||||||
|
for _, r := range g.Rules {
|
||||||
|
ar, isAlertRule := r.(*rule.AlertingRule)
|
||||||
|
if !isAlertRule {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok := alertExpResultMap[time.Duration(ts.UnixNano())][g.Name][ar.Name]; ok {
|
||||||
|
for _, got := range ar.GetAlerts() {
|
||||||
|
if got.State != notifier.StateFiring {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if disableAlertgroupLabel {
|
||||||
|
delete(got.Labels, "alertgroup")
|
||||||
|
}
|
||||||
|
laa := labelAndAnnotation{
|
||||||
|
Labels: datasource.ConvertToLabels(got.Labels),
|
||||||
|
Annotations: datasource.ConvertToLabels(got.Annotations),
|
||||||
|
}
|
||||||
|
gotAlertsMap[g.Name][ar.Name] = append(gotAlertsMap[g.Name][ar.Name], laa)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for groupname, gres := range alertExpResultMap[alertEvalTimes[evalIndex]] {
|
||||||
|
for alertname, res := range gres {
|
||||||
|
var expAlerts labelsAndAnnotations
|
||||||
|
for _, expAlert := range res {
|
||||||
|
if expAlert.ExpLabels == nil {
|
||||||
|
expAlert.ExpLabels = make(map[string]string)
|
||||||
|
}
|
||||||
|
// alertGroupNameLabel is added as additional labels when `disableAlertGroupLabel` is false
|
||||||
|
if !disableAlertgroupLabel {
|
||||||
|
expAlert.ExpLabels["alertgroup"] = groupname
|
||||||
|
}
|
||||||
|
// alertNameLabel is added as additional labels in vmalert.
|
||||||
|
expAlert.ExpLabels["alertname"] = alertname
|
||||||
|
expAlerts = append(expAlerts, labelAndAnnotation{
|
||||||
|
Labels: datasource.ConvertToLabels(expAlert.ExpLabels),
|
||||||
|
Annotations: datasource.ConvertToLabels(expAlert.ExpAnnotations),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
sort.Sort(expAlerts)
|
||||||
|
|
||||||
|
gotAlerts := gotAlertsMap[groupname][alertname]
|
||||||
|
sort.Sort(gotAlerts)
|
||||||
|
if !reflect.DeepEqual(expAlerts, gotAlerts) {
|
||||||
|
var testGroupName string
|
||||||
|
if tg.TestGroupName != "" {
|
||||||
|
testGroupName = fmt.Sprintf("testGroupName: %s,\n", tg.TestGroupName)
|
||||||
|
}
|
||||||
|
expString := indentLines(expAlerts.String(), " ")
|
||||||
|
gotString := indentLines(gotAlerts.String(), " ")
|
||||||
|
checkErrs = append(checkErrs, fmt.Errorf("\n%s groupname: %s, alertname: %s, time: %s, \n exp:%v, \n got:%v ",
|
||||||
|
testGroupName, groupname, alertname, alertEvalTimes[evalIndex].String(), expString, gotString))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
evalIndex++
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
checkErrs = append(checkErrs, checkMetricsqlCase(tg.MetricsqlExprTests, q)...)
|
||||||
|
return checkErrs
|
||||||
|
}
|
||||||
|
|
||||||
|
// unitTestFile holds the contents of a single unit test file
|
||||||
|
type unitTestFile struct {
|
||||||
|
RuleFiles []string `yaml:"rule_files"`
|
||||||
|
EvaluationInterval *promutils.Duration `yaml:"evaluation_interval"`
|
||||||
|
GroupEvalOrder []string `yaml:"group_eval_order"`
|
||||||
|
Tests []testGroup `yaml:"tests"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// testGroup is a group of input series and test cases associated with it
|
||||||
|
type testGroup struct {
|
||||||
|
Interval *promutils.Duration `yaml:"interval"`
|
||||||
|
InputSeries []series `yaml:"input_series"`
|
||||||
|
AlertRuleTests []alertTestCase `yaml:"alert_rule_test"`
|
||||||
|
MetricsqlExprTests []metricsqlTestCase `yaml:"metricsql_expr_test"`
|
||||||
|
ExternalLabels map[string]string `yaml:"external_labels"`
|
||||||
|
TestGroupName string `yaml:"name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// maxEvalTime returns the max eval time among all alert_rule_test and metricsql_expr_test
|
||||||
|
func (tg *testGroup) maxEvalTime() time.Duration {
|
||||||
|
var maxd time.Duration
|
||||||
|
for _, alert := range tg.AlertRuleTests {
|
||||||
|
if alert.EvalTime.Duration() > maxd {
|
||||||
|
maxd = alert.EvalTime.Duration()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, met := range tg.MetricsqlExprTests {
|
||||||
|
if met.EvalTime.Duration() > maxd {
|
||||||
|
maxd = met.EvalTime.Duration()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return maxd
|
||||||
|
}
|
47
app/vmalert-tool/unittest/unittest_test.go
Normal file
47
app/vmalert-tool/unittest/unittest_test.go
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
package unittest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMain(m *testing.M) {
|
||||||
|
if err := templates.Load([]string{}, true); err != nil {
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
os.Exit(m.Run())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUnitRule(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
disableGroupLabel bool
|
||||||
|
files []string
|
||||||
|
failed bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "run multi files",
|
||||||
|
files: []string{"./testdata/test1.yaml", "./testdata/test2.yaml"},
|
||||||
|
failed: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "disable group label",
|
||||||
|
disableGroupLabel: true,
|
||||||
|
files: []string{"./testdata/disable-group-label.yaml"},
|
||||||
|
failed: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "failing test",
|
||||||
|
files: []string{"./testdata/failed-test.yaml"},
|
||||||
|
failed: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range testCases {
|
||||||
|
fail := UnitTest(tc.files, tc.disableGroupLabel)
|
||||||
|
if fail != tc.failed {
|
||||||
|
t.Fatalf("failed to test %s, expect %t, got %t", tc.name, tc.failed, fail)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -754,6 +754,11 @@ See full description for these flags in `./vmalert -help`.
|
||||||
* `limit` group's param has no effect during replay (might be changed in future);
|
* `limit` group's param has no effect during replay (might be changed in future);
|
||||||
* `keep_firing_for` alerting rule param has no effect during replay (might be changed in future).
|
* `keep_firing_for` alerting rule param has no effect during replay (might be changed in future).
|
||||||
|
|
||||||
|
## Unit Testing for Rules
|
||||||
|
|
||||||
|
You can use `vmalert-tool` to test your alerting and recording rules like [promtool does](https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/).
|
||||||
|
See more details [here](https://docs.victoriametrics.com/vmalert-tool.html#Unit-testing-for-rules).
|
||||||
|
|
||||||
## Monitoring
|
## Monitoring
|
||||||
|
|
||||||
`vmalert` exports various metrics in Prometheus exposition format at `http://vmalert-host:8880/metrics` page.
|
`vmalert` exports various metrics in Prometheus exposition format at `http://vmalert-host:8880/metrics` page.
|
||||||
|
|
131
app/vmalert/datasource/faker.go
Normal file
131
app/vmalert/datasource/faker.go
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
package datasource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FakeQuerier is a mock querier that return predefined results and error message
|
||||||
|
type FakeQuerier struct {
|
||||||
|
sync.Mutex
|
||||||
|
metrics []Metric
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetErr sets query error message
|
||||||
|
func (fq *FakeQuerier) SetErr(err error) {
|
||||||
|
fq.Lock()
|
||||||
|
fq.err = err
|
||||||
|
fq.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset reset querier's error message and results
|
||||||
|
func (fq *FakeQuerier) Reset() {
|
||||||
|
fq.Lock()
|
||||||
|
fq.err = nil
|
||||||
|
fq.metrics = fq.metrics[:0]
|
||||||
|
fq.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add appends metrics to querier result metrics
|
||||||
|
func (fq *FakeQuerier) Add(metrics ...Metric) {
|
||||||
|
fq.Lock()
|
||||||
|
fq.metrics = append(fq.metrics, metrics...)
|
||||||
|
fq.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildWithParams return FakeQuerier itself
|
||||||
|
func (fq *FakeQuerier) BuildWithParams(_ QuerierParams) Querier {
|
||||||
|
return fq
|
||||||
|
}
|
||||||
|
|
||||||
|
// QueryRange performs query
|
||||||
|
func (fq *FakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) (Result, error) {
|
||||||
|
req, _, err := fq.Query(ctx, q, time.Now())
|
||||||
|
return req, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query returns metrics restored in querier
|
||||||
|
func (fq *FakeQuerier) Query(_ context.Context, _ string, _ time.Time) (Result, *http.Request, error) {
|
||||||
|
fq.Lock()
|
||||||
|
defer fq.Unlock()
|
||||||
|
if fq.err != nil {
|
||||||
|
return Result{}, nil, fq.err
|
||||||
|
}
|
||||||
|
cp := make([]Metric, len(fq.metrics))
|
||||||
|
copy(cp, fq.metrics)
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||||
|
return Result{Data: cp}, req, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FakeQuerierWithRegistry can store different results for different query expr
|
||||||
|
type FakeQuerierWithRegistry struct {
|
||||||
|
sync.Mutex
|
||||||
|
registry map[string][]Metric
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set stores query result for given key
|
||||||
|
func (fqr *FakeQuerierWithRegistry) Set(key string, metrics ...Metric) {
|
||||||
|
fqr.Lock()
|
||||||
|
if fqr.registry == nil {
|
||||||
|
fqr.registry = make(map[string][]Metric)
|
||||||
|
}
|
||||||
|
fqr.registry[key] = metrics
|
||||||
|
fqr.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset clean querier's results registry
|
||||||
|
func (fqr *FakeQuerierWithRegistry) Reset() {
|
||||||
|
fqr.Lock()
|
||||||
|
fqr.registry = nil
|
||||||
|
fqr.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildWithParams returns itself
|
||||||
|
func (fqr *FakeQuerierWithRegistry) BuildWithParams(_ QuerierParams) Querier {
|
||||||
|
return fqr
|
||||||
|
}
|
||||||
|
|
||||||
|
// QueryRange performs query
|
||||||
|
func (fqr *FakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) (Result, error) {
|
||||||
|
req, _, err := fqr.Query(ctx, q, time.Now())
|
||||||
|
return req, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query returns metrics restored in querier registry
|
||||||
|
func (fqr *FakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) (Result, *http.Request, error) {
|
||||||
|
fqr.Lock()
|
||||||
|
defer fqr.Unlock()
|
||||||
|
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||||
|
metrics, ok := fqr.registry[expr]
|
||||||
|
if !ok {
|
||||||
|
return Result{}, req, nil
|
||||||
|
}
|
||||||
|
cp := make([]Metric, len(metrics))
|
||||||
|
copy(cp, metrics)
|
||||||
|
return Result{Data: cp}, req, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FakeQuerierWithDelay mock querier with given delay duration
|
||||||
|
type FakeQuerierWithDelay struct {
|
||||||
|
FakeQuerier
|
||||||
|
Delay time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query returns query result after delay duration
|
||||||
|
func (fqd *FakeQuerierWithDelay) Query(ctx context.Context, expr string, ts time.Time) (Result, *http.Request, error) {
|
||||||
|
timer := time.NewTimer(fqd.Delay)
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
case <-timer.C:
|
||||||
|
}
|
||||||
|
return fqd.FakeQuerier.Query(ctx, expr, ts)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildWithParams returns itself
|
||||||
|
func (fqd *FakeQuerierWithDelay) BuildWithParams(_ QuerierParams) Querier {
|
||||||
|
return fqd
|
||||||
|
}
|
|
@ -18,6 +18,7 @@ import (
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remoteread"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remoteread"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||||
|
@ -66,11 +67,6 @@ absolute path to all .tpl files in root.
|
||||||
|
|
||||||
validateTemplates = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
|
validateTemplates = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
|
||||||
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
|
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
|
||||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
|
|
||||||
"which by default is 4 times evaluationInterval of the parent group.")
|
|
||||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier")
|
|
||||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
|
||||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
|
||||||
|
|
||||||
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier. By default, hostname is used as address.")
|
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier. By default, hostname is used as address.")
|
||||||
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager `+
|
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager `+
|
||||||
|
@ -82,12 +78,8 @@ absolute path to all .tpl files in root.
|
||||||
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
|
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
|
||||||
"Pass multiple -label flags in order to add multiple label sets.")
|
"Pass multiple -label flags in order to add multiple label sets.")
|
||||||
|
|
||||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
|
||||||
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
|
||||||
remoteReadIgnoreRestoreErrors = flag.Bool("remoteRead.ignoreRestoreErrors", true, "Whether to ignore errors from remote storage when restoring alerts state on startup. DEPRECATED - this flag has no effect and will be removed in the next releases.")
|
remoteReadIgnoreRestoreErrors = flag.Bool("remoteRead.ignoreRestoreErrors", true, "Whether to ignore errors from remote storage when restoring alerts state on startup. DEPRECATED - this flag has no effect and will be removed in the next releases.")
|
||||||
|
|
||||||
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
|
||||||
|
|
||||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
|
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -229,7 +221,7 @@ func newManager(ctx context.Context) (*manager, error) {
|
||||||
return nil, fmt.Errorf("failed to init notifier: %w", err)
|
return nil, fmt.Errorf("failed to init notifier: %w", err)
|
||||||
}
|
}
|
||||||
manager := &manager{
|
manager := &manager{
|
||||||
groups: make(map[uint64]*Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
querierBuilder: q,
|
querierBuilder: q,
|
||||||
notifiers: nts,
|
notifiers: nts,
|
||||||
labels: labels,
|
labels: labels,
|
||||||
|
|
|
@ -8,11 +8,19 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// Disable rand sleep on group start during tests in order to speed up test execution.
|
||||||
|
// Rand sleep is needed only in prod code.
|
||||||
|
rule.SkipRandSleepOnGroupStart = true
|
||||||
|
}
|
||||||
|
|
||||||
func TestGetExternalURL(t *testing.T) {
|
func TestGetExternalURL(t *testing.T) {
|
||||||
expURL := "https://vicotriametrics.com/path"
|
expURL := "https://vicotriametrics.com/path"
|
||||||
u, err := getExternalURL(expURL, "", false)
|
u, err := getExternalURL(expURL, "", false)
|
||||||
|
@ -98,10 +106,10 @@ groups:
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
m := &manager{
|
m := &manager{
|
||||||
querierBuilder: &fakeQuerier{},
|
querierBuilder: &datasource.FakeQuerier{},
|
||||||
groups: make(map[uint64]*Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
labels: map[string]string{},
|
labels: map[string]string{},
|
||||||
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} },
|
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||||
rw: &remotewrite.Client{},
|
rw: &remotewrite.Client{},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,14 +3,13 @@ package main
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
|
||||||
"sort"
|
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -19,7 +18,7 @@ type manager struct {
|
||||||
querierBuilder datasource.QuerierBuilder
|
querierBuilder datasource.QuerierBuilder
|
||||||
notifiers func() []notifier.Notifier
|
notifiers func() []notifier.Notifier
|
||||||
|
|
||||||
rw *remotewrite.Client
|
rw remotewrite.RWClient
|
||||||
// remote read builder.
|
// remote read builder.
|
||||||
rr datasource.QuerierBuilder
|
rr datasource.QuerierBuilder
|
||||||
|
|
||||||
|
@ -27,28 +26,28 @@ type manager struct {
|
||||||
labels map[string]string
|
labels map[string]string
|
||||||
|
|
||||||
groupsMu sync.RWMutex
|
groupsMu sync.RWMutex
|
||||||
groups map[uint64]*Group
|
groups map[uint64]*rule.Group
|
||||||
}
|
}
|
||||||
|
|
||||||
// RuleAPI generates APIRule object from alert by its ID(hash)
|
// ruleAPI generates apiRule object from alert by its ID(hash)
|
||||||
func (m *manager) RuleAPI(gID, rID uint64) (APIRule, error) {
|
func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
|
||||||
m.groupsMu.RLock()
|
m.groupsMu.RLock()
|
||||||
defer m.groupsMu.RUnlock()
|
defer m.groupsMu.RUnlock()
|
||||||
|
|
||||||
g, ok := m.groups[gID]
|
g, ok := m.groups[gID]
|
||||||
if !ok {
|
if !ok {
|
||||||
return APIRule{}, fmt.Errorf("can't find group with id %d", gID)
|
return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
|
||||||
}
|
}
|
||||||
for _, rule := range g.Rules {
|
for _, rule := range g.Rules {
|
||||||
if rule.ID() == rID {
|
if rule.ID() == rID {
|
||||||
return rule.ToAPI(), nil
|
return ruleToAPI(rule), nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return APIRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
|
return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
// AlertAPI generates APIAlert object from alert by its ID(hash)
|
// alertAPI generates apiAlert object from alert by its ID(hash)
|
||||||
func (m *manager) AlertAPI(gID, aID uint64) (*APIAlert, error) {
|
func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
|
||||||
m.groupsMu.RLock()
|
m.groupsMu.RLock()
|
||||||
defer m.groupsMu.RUnlock()
|
defer m.groupsMu.RUnlock()
|
||||||
|
|
||||||
|
@ -56,12 +55,12 @@ func (m *manager) AlertAPI(gID, aID uint64) (*APIAlert, error) {
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, fmt.Errorf("can't find group with id %d", gID)
|
return nil, fmt.Errorf("can't find group with id %d", gID)
|
||||||
}
|
}
|
||||||
for _, rule := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
ar, ok := rule.(*AlertingRule)
|
ar, ok := r.(*rule.AlertingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if apiAlert := ar.AlertAPI(aID); apiAlert != nil {
|
if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
|
||||||
return apiAlert, nil
|
return apiAlert, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -82,15 +81,15 @@ func (m *manager) close() {
|
||||||
m.wg.Wait()
|
m.wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *manager) startGroup(ctx context.Context, g *Group, restore bool) error {
|
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
|
||||||
m.wg.Add(1)
|
m.wg.Add(1)
|
||||||
id := g.ID()
|
id := g.ID()
|
||||||
go func() {
|
go func() {
|
||||||
defer m.wg.Done()
|
defer m.wg.Done()
|
||||||
if restore {
|
if restore {
|
||||||
g.start(ctx, m.notifiers, m.rw, m.rr)
|
g.Start(ctx, m.notifiers, m.rw, m.rr)
|
||||||
} else {
|
} else {
|
||||||
g.start(ctx, m.notifiers, m.rw, nil)
|
g.Start(ctx, m.notifiers, m.rw, nil)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
m.groups[id] = g
|
m.groups[id] = g
|
||||||
|
@ -99,7 +98,7 @@ func (m *manager) startGroup(ctx context.Context, g *Group, restore bool) error
|
||||||
|
|
||||||
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
|
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
|
||||||
var rrPresent, arPresent bool
|
var rrPresent, arPresent bool
|
||||||
groupsRegistry := make(map[uint64]*Group)
|
groupsRegistry := make(map[uint64]*rule.Group)
|
||||||
for _, cfg := range groupsCfg {
|
for _, cfg := range groupsCfg {
|
||||||
for _, r := range cfg.Rules {
|
for _, r := range cfg.Rules {
|
||||||
if rrPresent && arPresent {
|
if rrPresent && arPresent {
|
||||||
|
@ -112,7 +111,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||||
arPresent = true
|
arPresent = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ng := newGroup(cfg, m.querierBuilder, *evaluationInterval, m.labels)
|
ng := rule.NewGroup(cfg, m.querierBuilder, *evaluationInterval, m.labels)
|
||||||
groupsRegistry[ng.ID()] = ng
|
groupsRegistry[ng.ID()] = ng
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,8 +123,8 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||||
}
|
}
|
||||||
|
|
||||||
type updateItem struct {
|
type updateItem struct {
|
||||||
old *Group
|
old *rule.Group
|
||||||
new *Group
|
new *rule.Group
|
||||||
}
|
}
|
||||||
var toUpdate []updateItem
|
var toUpdate []updateItem
|
||||||
|
|
||||||
|
@ -135,7 +134,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||||
if !ok {
|
if !ok {
|
||||||
// old group is not present in new list,
|
// old group is not present in new list,
|
||||||
// so must be stopped and deleted
|
// so must be stopped and deleted
|
||||||
og.close()
|
og.Close()
|
||||||
delete(m.groups, og.ID())
|
delete(m.groups, og.ID())
|
||||||
og = nil
|
og = nil
|
||||||
continue
|
continue
|
||||||
|
@ -157,81 +156,13 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
for _, item := range toUpdate {
|
for _, item := range toUpdate {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(old *Group, new *Group) {
|
go func(old *rule.Group, new *rule.Group) {
|
||||||
old.updateCh <- new
|
old.UpdateWith(new)
|
||||||
wg.Done()
|
wg.Done()
|
||||||
}(item.old, item.new)
|
}(item.old, item.new)
|
||||||
item.old.interruptEval()
|
item.old.InterruptEval()
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *Group) toAPI() APIGroup {
|
|
||||||
g.mu.RLock()
|
|
||||||
defer g.mu.RUnlock()
|
|
||||||
|
|
||||||
ag := APIGroup{
|
|
||||||
// encode as string to avoid rounding
|
|
||||||
ID: fmt.Sprintf("%d", g.ID()),
|
|
||||||
|
|
||||||
Name: g.Name,
|
|
||||||
Type: g.Type.String(),
|
|
||||||
File: g.File,
|
|
||||||
Interval: g.Interval.Seconds(),
|
|
||||||
LastEvaluation: g.LastEvaluation,
|
|
||||||
Concurrency: g.Concurrency,
|
|
||||||
Params: urlValuesToStrings(g.Params),
|
|
||||||
Headers: headersToStrings(g.Headers),
|
|
||||||
NotifierHeaders: headersToStrings(g.NotifierHeaders),
|
|
||||||
|
|
||||||
Labels: g.Labels,
|
|
||||||
}
|
|
||||||
ag.Rules = make([]APIRule, 0)
|
|
||||||
for _, r := range g.Rules {
|
|
||||||
ag.Rules = append(ag.Rules, r.ToAPI())
|
|
||||||
}
|
|
||||||
return ag
|
|
||||||
}
|
|
||||||
|
|
||||||
func urlValuesToStrings(values url.Values) []string {
|
|
||||||
if len(values) < 1 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
keys := make([]string, 0, len(values))
|
|
||||||
for k := range values {
|
|
||||||
keys = append(keys, k)
|
|
||||||
}
|
|
||||||
sort.Strings(keys)
|
|
||||||
|
|
||||||
var res []string
|
|
||||||
for _, k := range keys {
|
|
||||||
params := values[k]
|
|
||||||
for _, v := range params {
|
|
||||||
res = append(res, fmt.Sprintf("%s=%s", k, v))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res
|
|
||||||
}
|
|
||||||
|
|
||||||
func headersToStrings(headers map[string]string) []string {
|
|
||||||
if len(headers) < 1 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
keys := make([]string, 0, len(headers))
|
|
||||||
for k := range headers {
|
|
||||||
keys = append(keys, k)
|
|
||||||
}
|
|
||||||
sort.Strings(keys)
|
|
||||||
|
|
||||||
var res []string
|
|
||||||
for _, k := range keys {
|
|
||||||
v := headers[k]
|
|
||||||
res = append(res, fmt.Sprintf("%s: %s", k, v))
|
|
||||||
}
|
|
||||||
|
|
||||||
return res
|
|
||||||
}
|
|
||||||
|
|
|
@ -10,8 +10,10 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -26,7 +28,7 @@ func TestMain(m *testing.M) {
|
||||||
// successful cases of
|
// successful cases of
|
||||||
// starting with empty rules folder
|
// starting with empty rules folder
|
||||||
func TestManagerEmptyRulesDir(t *testing.T) {
|
func TestManagerEmptyRulesDir(t *testing.T) {
|
||||||
m := &manager{groups: make(map[uint64]*Group)}
|
m := &manager{groups: make(map[uint64]*rule.Group)}
|
||||||
cfg := loadCfg(t, []string{"foo/bar"}, true, true)
|
cfg := loadCfg(t, []string{"foo/bar"}, true, true)
|
||||||
if err := m.update(context.Background(), cfg, false); err != nil {
|
if err := m.update(context.Background(), cfg, false); err != nil {
|
||||||
t.Fatalf("expected to load successfully with empty rules dir; got err instead: %v", err)
|
t.Fatalf("expected to load successfully with empty rules dir; got err instead: %v", err)
|
||||||
|
@ -38,9 +40,9 @@ func TestManagerEmptyRulesDir(t *testing.T) {
|
||||||
// Should be executed with -race flag
|
// Should be executed with -race flag
|
||||||
func TestManagerUpdateConcurrent(t *testing.T) {
|
func TestManagerUpdateConcurrent(t *testing.T) {
|
||||||
m := &manager{
|
m := &manager{
|
||||||
groups: make(map[uint64]*Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
querierBuilder: &fakeQuerier{},
|
querierBuilder: &datasource.FakeQuerier{},
|
||||||
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} },
|
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||||
}
|
}
|
||||||
paths := []string{
|
paths := []string{
|
||||||
"config/testdata/dir/rules0-good.rules",
|
"config/testdata/dir/rules0-good.rules",
|
||||||
|
@ -91,7 +93,7 @@ func TestManagerUpdate(t *testing.T) {
|
||||||
}()
|
}()
|
||||||
|
|
||||||
var (
|
var (
|
||||||
VMRows = &AlertingRule{
|
VMRows = &rule.AlertingRule{
|
||||||
Name: "VMRows",
|
Name: "VMRows",
|
||||||
Expr: "vm_rows > 0",
|
Expr: "vm_rows > 0",
|
||||||
For: 10 * time.Second,
|
For: 10 * time.Second,
|
||||||
|
@ -104,7 +106,7 @@ func TestManagerUpdate(t *testing.T) {
|
||||||
"description": "{{$labels}}",
|
"description": "{{$labels}}",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
Conns = &AlertingRule{
|
Conns = &rule.AlertingRule{
|
||||||
Name: "Conns",
|
Name: "Conns",
|
||||||
Expr: "sum(vm_tcplistener_conns) by(instance) > 1",
|
Expr: "sum(vm_tcplistener_conns) by(instance) > 1",
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
|
@ -112,7 +114,7 @@ func TestManagerUpdate(t *testing.T) {
|
||||||
"description": "It is {{ $value }} connections for {{$labels.instance}}",
|
"description": "It is {{ $value }} connections for {{$labels.instance}}",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
ExampleAlertAlwaysFiring = &AlertingRule{
|
ExampleAlertAlwaysFiring = &rule.AlertingRule{
|
||||||
Name: "ExampleAlertAlwaysFiring",
|
Name: "ExampleAlertAlwaysFiring",
|
||||||
Expr: "sum by(job) (up == 1)",
|
Expr: "sum by(job) (up == 1)",
|
||||||
}
|
}
|
||||||
|
@ -122,20 +124,20 @@ func TestManagerUpdate(t *testing.T) {
|
||||||
name string
|
name string
|
||||||
initPath string
|
initPath string
|
||||||
updatePath string
|
updatePath string
|
||||||
want []*Group
|
want []*rule.Group
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "update good rules",
|
name: "update good rules",
|
||||||
initPath: "config/testdata/rules/rules0-good.rules",
|
initPath: "config/testdata/rules/rules0-good.rules",
|
||||||
updatePath: "config/testdata/dir/rules1-good.rules",
|
updatePath: "config/testdata/dir/rules1-good.rules",
|
||||||
want: []*Group{
|
want: []*rule.Group{
|
||||||
{
|
{
|
||||||
File: "config/testdata/dir/rules1-good.rules",
|
File: "config/testdata/dir/rules1-good.rules",
|
||||||
Name: "duplicatedGroupDiffFiles",
|
Name: "duplicatedGroupDiffFiles",
|
||||||
Type: config.NewPrometheusType(),
|
Type: config.NewPrometheusType(),
|
||||||
Interval: defaultEvalInterval,
|
Interval: defaultEvalInterval,
|
||||||
Rules: []Rule{
|
Rules: []rule.Rule{
|
||||||
&AlertingRule{
|
&rule.AlertingRule{
|
||||||
Name: "VMRows",
|
Name: "VMRows",
|
||||||
Expr: "vm_rows > 0",
|
Expr: "vm_rows > 0",
|
||||||
For: 5 * time.Minute,
|
For: 5 * time.Minute,
|
||||||
|
@ -153,19 +155,20 @@ func TestManagerUpdate(t *testing.T) {
|
||||||
name: "update good rules from 1 to 2 groups",
|
name: "update good rules from 1 to 2 groups",
|
||||||
initPath: "config/testdata/dir/rules/rules1-good.rules",
|
initPath: "config/testdata/dir/rules/rules1-good.rules",
|
||||||
updatePath: "config/testdata/rules/rules0-good.rules",
|
updatePath: "config/testdata/rules/rules0-good.rules",
|
||||||
want: []*Group{
|
want: []*rule.Group{
|
||||||
{
|
{
|
||||||
File: "config/testdata/rules/rules0-good.rules",
|
File: "config/testdata/rules/rules0-good.rules",
|
||||||
Name: "groupGorSingleAlert",
|
Name: "groupGorSingleAlert",
|
||||||
Type: config.NewPrometheusType(),
|
Type: config.NewPrometheusType(),
|
||||||
Rules: []Rule{VMRows},
|
|
||||||
Interval: defaultEvalInterval,
|
Interval: defaultEvalInterval,
|
||||||
|
Rules: []rule.Rule{VMRows},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
File: "config/testdata/rules/rules0-good.rules",
|
File: "config/testdata/rules/rules0-good.rules",
|
||||||
Interval: defaultEvalInterval,
|
Interval: defaultEvalInterval,
|
||||||
Type: config.NewPrometheusType(),
|
Type: config.NewPrometheusType(),
|
||||||
Name: "TestGroup", Rules: []Rule{
|
Name: "TestGroup",
|
||||||
|
Rules: []rule.Rule{
|
||||||
Conns,
|
Conns,
|
||||||
ExampleAlertAlwaysFiring,
|
ExampleAlertAlwaysFiring,
|
||||||
},
|
},
|
||||||
|
@ -176,20 +179,20 @@ func TestManagerUpdate(t *testing.T) {
|
||||||
name: "update with one bad rule file",
|
name: "update with one bad rule file",
|
||||||
initPath: "config/testdata/rules/rules0-good.rules",
|
initPath: "config/testdata/rules/rules0-good.rules",
|
||||||
updatePath: "config/testdata/dir/rules2-bad.rules",
|
updatePath: "config/testdata/dir/rules2-bad.rules",
|
||||||
want: []*Group{
|
want: []*rule.Group{
|
||||||
{
|
{
|
||||||
File: "config/testdata/rules/rules0-good.rules",
|
File: "config/testdata/rules/rules0-good.rules",
|
||||||
Name: "groupGorSingleAlert",
|
Name: "groupGorSingleAlert",
|
||||||
Type: config.NewPrometheusType(),
|
Type: config.NewPrometheusType(),
|
||||||
Interval: defaultEvalInterval,
|
Interval: defaultEvalInterval,
|
||||||
Rules: []Rule{VMRows},
|
Rules: []rule.Rule{VMRows},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
File: "config/testdata/rules/rules0-good.rules",
|
File: "config/testdata/rules/rules0-good.rules",
|
||||||
Interval: defaultEvalInterval,
|
Interval: defaultEvalInterval,
|
||||||
Name: "TestGroup",
|
Name: "TestGroup",
|
||||||
Type: config.NewPrometheusType(),
|
Type: config.NewPrometheusType(),
|
||||||
Rules: []Rule{
|
Rules: []rule.Rule{
|
||||||
Conns,
|
Conns,
|
||||||
ExampleAlertAlwaysFiring,
|
ExampleAlertAlwaysFiring,
|
||||||
},
|
},
|
||||||
|
@ -200,19 +203,20 @@ func TestManagerUpdate(t *testing.T) {
|
||||||
name: "update empty dir rules from 0 to 2 groups",
|
name: "update empty dir rules from 0 to 2 groups",
|
||||||
initPath: "config/testdata/empty/*",
|
initPath: "config/testdata/empty/*",
|
||||||
updatePath: "config/testdata/rules/rules0-good.rules",
|
updatePath: "config/testdata/rules/rules0-good.rules",
|
||||||
want: []*Group{
|
want: []*rule.Group{
|
||||||
{
|
{
|
||||||
File: "config/testdata/rules/rules0-good.rules",
|
File: "config/testdata/rules/rules0-good.rules",
|
||||||
Name: "groupGorSingleAlert",
|
Name: "groupGorSingleAlert",
|
||||||
Type: config.NewPrometheusType(),
|
Type: config.NewPrometheusType(),
|
||||||
Interval: defaultEvalInterval,
|
Interval: defaultEvalInterval,
|
||||||
Rules: []Rule{VMRows},
|
Rules: []rule.Rule{VMRows},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
File: "config/testdata/rules/rules0-good.rules",
|
File: "config/testdata/rules/rules0-good.rules",
|
||||||
Interval: defaultEvalInterval,
|
Interval: defaultEvalInterval,
|
||||||
Type: config.NewPrometheusType(),
|
Type: config.NewPrometheusType(),
|
||||||
Name: "TestGroup", Rules: []Rule{
|
Name: "TestGroup",
|
||||||
|
Rules: []rule.Rule{
|
||||||
Conns,
|
Conns,
|
||||||
ExampleAlertAlwaysFiring,
|
ExampleAlertAlwaysFiring,
|
||||||
},
|
},
|
||||||
|
@ -224,9 +228,9 @@ func TestManagerUpdate(t *testing.T) {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
ctx, cancel := context.WithCancel(context.TODO())
|
ctx, cancel := context.WithCancel(context.TODO())
|
||||||
m := &manager{
|
m := &manager{
|
||||||
groups: make(map[uint64]*Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
querierBuilder: &fakeQuerier{},
|
querierBuilder: &datasource.FakeQuerier{},
|
||||||
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} },
|
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||||
}
|
}
|
||||||
|
|
||||||
cfgInit := loadCfg(t, []string{tc.initPath}, true, true)
|
cfgInit := loadCfg(t, []string{tc.initPath}, true, true)
|
||||||
|
@ -255,11 +259,36 @@ func TestManagerUpdate(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func compareGroups(t *testing.T, a, b *rule.Group) {
|
||||||
|
t.Helper()
|
||||||
|
if a.Name != b.Name {
|
||||||
|
t.Fatalf("expected group name %q; got %q", a.Name, b.Name)
|
||||||
|
}
|
||||||
|
if a.File != b.File {
|
||||||
|
t.Fatalf("expected group %q file name %q; got %q", a.Name, a.File, b.File)
|
||||||
|
}
|
||||||
|
if a.Interval != b.Interval {
|
||||||
|
t.Fatalf("expected group %q interval %v; got %v", a.Name, a.Interval, b.Interval)
|
||||||
|
}
|
||||||
|
if len(a.Rules) != len(b.Rules) {
|
||||||
|
t.Fatalf("expected group %s to have %d rules; got: %d",
|
||||||
|
a.Name, len(a.Rules), len(b.Rules))
|
||||||
|
}
|
||||||
|
for i, r := range a.Rules {
|
||||||
|
got, want := r, b.Rules[i]
|
||||||
|
if a.ID() != b.ID() {
|
||||||
|
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
|
||||||
|
}
|
||||||
|
if err := rule.CompareRules(t, want, got); err != nil {
|
||||||
|
t.Fatalf("comparison error: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestManagerUpdateNegative(t *testing.T) {
|
func TestManagerUpdateNegative(t *testing.T) {
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
notifiers []notifier.Notifier
|
notifiers []notifier.Notifier
|
||||||
rw *remotewrite.Client
|
rw remotewrite.RWClient
|
||||||
cfg config.Group
|
cfg config.Group
|
||||||
expErr string
|
expErr string
|
||||||
}{
|
}{
|
||||||
|
@ -286,7 +315,7 @@ func TestManagerUpdateNegative(t *testing.T) {
|
||||||
"contains alerting rules",
|
"contains alerting rules",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
[]notifier.Notifier{&fakeNotifier{}},
|
[]notifier.Notifier{¬ifier.FakeNotifier{}},
|
||||||
nil,
|
nil,
|
||||||
config.Group{
|
config.Group{
|
||||||
Name: "Recording and alerting rules",
|
Name: "Recording and alerting rules",
|
||||||
|
@ -316,8 +345,8 @@ func TestManagerUpdateNegative(t *testing.T) {
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.cfg.Name, func(t *testing.T) {
|
t.Run(tc.cfg.Name, func(t *testing.T) {
|
||||||
m := &manager{
|
m := &manager{
|
||||||
groups: make(map[uint64]*Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
querierBuilder: &fakeQuerier{},
|
querierBuilder: &datasource.FakeQuerier{},
|
||||||
rw: tc.rw,
|
rw: tc.rw,
|
||||||
}
|
}
|
||||||
if tc.notifiers != nil {
|
if tc.notifiers != nil {
|
||||||
|
@ -346,21 +375,3 @@ func loadCfg(t *testing.T, path []string, validateAnnotations, validateExpressio
|
||||||
}
|
}
|
||||||
return cfg
|
return cfg
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestUrlValuesToStrings(t *testing.T) {
|
|
||||||
mapQueryParams := map[string][]string{
|
|
||||||
"param1": {"param1"},
|
|
||||||
"param2": {"anotherparam"},
|
|
||||||
}
|
|
||||||
expectedRes := []string{"param1=param1", "param2=anotherparam"}
|
|
||||||
res := urlValuesToStrings(mapQueryParams)
|
|
||||||
|
|
||||||
if len(res) != len(expectedRes) {
|
|
||||||
t.Errorf("Expected length %d, but got %d", len(expectedRes), len(res))
|
|
||||||
}
|
|
||||||
for ind, val := range expectedRes {
|
|
||||||
if val != res[ind] {
|
|
||||||
t.Errorf("Expected %v; but got %v", val, res[ind])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
59
app/vmalert/notifier/faker.go
Normal file
59
app/vmalert/notifier/faker.go
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
package notifier
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FakeNotifier is a mock notifier
|
||||||
|
type FakeNotifier struct {
|
||||||
|
sync.Mutex
|
||||||
|
alerts []Alert
|
||||||
|
// records number of received alerts in total
|
||||||
|
counter int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close does nothing
|
||||||
|
func (*FakeNotifier) Close() {}
|
||||||
|
|
||||||
|
// Addr returns ""
|
||||||
|
func (*FakeNotifier) Addr() string { return "" }
|
||||||
|
|
||||||
|
// Send sets alerts and increases counter
|
||||||
|
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
|
||||||
|
fn.Lock()
|
||||||
|
defer fn.Unlock()
|
||||||
|
fn.counter += len(alerts)
|
||||||
|
fn.alerts = alerts
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCounter returns received alerts count
|
||||||
|
func (fn *FakeNotifier) GetCounter() int {
|
||||||
|
fn.Lock()
|
||||||
|
defer fn.Unlock()
|
||||||
|
return fn.counter
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAlerts returns stored alerts
|
||||||
|
func (fn *FakeNotifier) GetAlerts() []Alert {
|
||||||
|
fn.Lock()
|
||||||
|
defer fn.Unlock()
|
||||||
|
return fn.alerts
|
||||||
|
}
|
||||||
|
|
||||||
|
// FaultyNotifier is a mock notifier that Send() will return failed response
|
||||||
|
type FaultyNotifier struct {
|
||||||
|
FakeNotifier
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send returns failed response
|
||||||
|
func (fn *FaultyNotifier) Send(ctx context.Context, _ []Alert, _ map[string]string) error {
|
||||||
|
d, ok := ctx.Deadline()
|
||||||
|
if ok {
|
||||||
|
time.Sleep(time.Until(d))
|
||||||
|
}
|
||||||
|
return fmt.Errorf("send failed")
|
||||||
|
}
|
322
app/vmalert/remotewrite/client.go
Normal file
322
app/vmalert/remotewrite/client.go
Normal file
|
@ -0,0 +1,322 @@
|
||||||
|
package remotewrite
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"path"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/golang/snappy"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
|
"github.com/VictoriaMetrics/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultConcurrency = 4
|
||||||
|
defaultMaxBatchSize = 1e3
|
||||||
|
defaultMaxQueueSize = 1e5
|
||||||
|
defaultFlushInterval = 5 * time.Second
|
||||||
|
defaultWriteTimeout = 30 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
disablePathAppend = flag.Bool("remoteWrite.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.")
|
||||||
|
sendTimeout = flag.Duration("remoteWrite.sendTimeout", 30*time.Second, "Timeout for sending data to the configured -remoteWrite.url.")
|
||||||
|
retryMinInterval = flag.Duration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxInterval")
|
||||||
|
retryMaxTime = flag.Duration("remoteWrite.retryMaxTime", time.Second*30, "The max time spent on retry attempts for the failed remote-write request. Change this value if it is expected for remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Client is an asynchronous HTTP client for writing
|
||||||
|
// timeseries via remote write protocol.
|
||||||
|
type Client struct {
|
||||||
|
addr string
|
||||||
|
c *http.Client
|
||||||
|
authCfg *promauth.Config
|
||||||
|
input chan prompbmarshal.TimeSeries
|
||||||
|
flushInterval time.Duration
|
||||||
|
maxBatchSize int
|
||||||
|
maxQueueSize int
|
||||||
|
|
||||||
|
wg sync.WaitGroup
|
||||||
|
doneCh chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Config is config for remote write client.
|
||||||
|
type Config struct {
|
||||||
|
// Addr of remote storage
|
||||||
|
Addr string
|
||||||
|
AuthCfg *promauth.Config
|
||||||
|
|
||||||
|
// Concurrency defines number of readers that
|
||||||
|
// concurrently read from the queue and flush data
|
||||||
|
Concurrency int
|
||||||
|
// MaxBatchSize defines max number of timeseries
|
||||||
|
// to be flushed at once
|
||||||
|
MaxBatchSize int
|
||||||
|
// MaxQueueSize defines max length of input queue
|
||||||
|
// populated by Push method.
|
||||||
|
// Push will be rejected once queue is full.
|
||||||
|
MaxQueueSize int
|
||||||
|
// FlushInterval defines time interval for flushing batches
|
||||||
|
FlushInterval time.Duration
|
||||||
|
// Transport will be used by the underlying http.Client
|
||||||
|
Transport *http.Transport
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewClient returns asynchronous client for
|
||||||
|
// writing timeseries via remotewrite protocol.
|
||||||
|
func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||||
|
if cfg.Addr == "" {
|
||||||
|
return nil, fmt.Errorf("config.Addr can't be empty")
|
||||||
|
}
|
||||||
|
if cfg.MaxBatchSize == 0 {
|
||||||
|
cfg.MaxBatchSize = defaultMaxBatchSize
|
||||||
|
}
|
||||||
|
if cfg.MaxQueueSize == 0 {
|
||||||
|
cfg.MaxQueueSize = defaultMaxQueueSize
|
||||||
|
}
|
||||||
|
if cfg.FlushInterval == 0 {
|
||||||
|
cfg.FlushInterval = defaultFlushInterval
|
||||||
|
}
|
||||||
|
if cfg.Transport == nil {
|
||||||
|
cfg.Transport = http.DefaultTransport.(*http.Transport).Clone()
|
||||||
|
}
|
||||||
|
cc := defaultConcurrency
|
||||||
|
if cfg.Concurrency > 0 {
|
||||||
|
cc = cfg.Concurrency
|
||||||
|
}
|
||||||
|
c := &Client{
|
||||||
|
c: &http.Client{
|
||||||
|
Timeout: *sendTimeout,
|
||||||
|
Transport: cfg.Transport,
|
||||||
|
},
|
||||||
|
addr: strings.TrimSuffix(cfg.Addr, "/"),
|
||||||
|
authCfg: cfg.AuthCfg,
|
||||||
|
flushInterval: cfg.FlushInterval,
|
||||||
|
maxBatchSize: cfg.MaxBatchSize,
|
||||||
|
maxQueueSize: cfg.MaxQueueSize,
|
||||||
|
doneCh: make(chan struct{}),
|
||||||
|
input: make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < cc; i++ {
|
||||||
|
c.run(ctx)
|
||||||
|
}
|
||||||
|
return c, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push adds timeseries into queue for writing into remote storage.
|
||||||
|
// Push returns and error if client is stopped or if queue is full.
|
||||||
|
func (c *Client) Push(s prompbmarshal.TimeSeries) error {
|
||||||
|
select {
|
||||||
|
case <-c.doneCh:
|
||||||
|
return fmt.Errorf("client is closed")
|
||||||
|
case c.input <- s:
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
|
||||||
|
"Queue size is controlled by -remoteWrite.maxQueueSize flag",
|
||||||
|
c.maxQueueSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close stops the client and waits for all goroutines
|
||||||
|
// to exit.
|
||||||
|
func (c *Client) Close() error {
|
||||||
|
if c.doneCh == nil {
|
||||||
|
return fmt.Errorf("client is already closed")
|
||||||
|
}
|
||||||
|
close(c.input)
|
||||||
|
close(c.doneCh)
|
||||||
|
c.wg.Wait()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Client) run(ctx context.Context) {
|
||||||
|
ticker := time.NewTicker(c.flushInterval)
|
||||||
|
wr := &prompbmarshal.WriteRequest{}
|
||||||
|
shutdown := func() {
|
||||||
|
for ts := range c.input {
|
||||||
|
wr.Timeseries = append(wr.Timeseries, ts)
|
||||||
|
}
|
||||||
|
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
|
||||||
|
logger.Infof("shutting down remote write client and flushing remained %d series", len(wr.Timeseries))
|
||||||
|
c.flush(lastCtx, wr)
|
||||||
|
cancel()
|
||||||
|
}
|
||||||
|
c.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer c.wg.Done()
|
||||||
|
defer ticker.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-c.doneCh:
|
||||||
|
shutdown()
|
||||||
|
return
|
||||||
|
case <-ctx.Done():
|
||||||
|
shutdown()
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
c.flush(ctx, wr)
|
||||||
|
case ts, ok := <-c.input:
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
wr.Timeseries = append(wr.Timeseries, ts)
|
||||||
|
if len(wr.Timeseries) >= c.maxBatchSize {
|
||||||
|
c.flush(ctx, wr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
||||||
|
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
||||||
|
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
||||||
|
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
||||||
|
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
|
||||||
|
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
|
||||||
|
|
||||||
|
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
|
||||||
|
return float64(*concurrency)
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
// flush is a blocking function that marshals WriteRequest and sends
|
||||||
|
// it to remote-write endpoint. Flush performs limited amount of retries
|
||||||
|
// if request fails.
|
||||||
|
func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||||
|
if len(wr.Timeseries) < 1 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer prompbmarshal.ResetWriteRequest(wr)
|
||||||
|
defer bufferFlushDuration.UpdateDuration(time.Now())
|
||||||
|
|
||||||
|
data, err := wr.Marshal()
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("failed to marshal WriteRequest: %s", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
b := snappy.Encode(nil, data)
|
||||||
|
|
||||||
|
retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
|
||||||
|
if retryInterval > maxRetryInterval {
|
||||||
|
retryInterval = maxRetryInterval
|
||||||
|
}
|
||||||
|
timeStart := time.Now()
|
||||||
|
defer func() {
|
||||||
|
sendDuration.Add(time.Since(timeStart).Seconds())
|
||||||
|
}()
|
||||||
|
L:
|
||||||
|
for attempts := 0; ; attempts++ {
|
||||||
|
err := c.send(ctx, b)
|
||||||
|
if err == nil {
|
||||||
|
sentRows.Add(len(wr.Timeseries))
|
||||||
|
sentBytes.Add(len(b))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
_, isNotRetriable := err.(*nonRetriableError)
|
||||||
|
logger.Warnf("attempt %d to send request failed: %s (retriable: %v)", attempts+1, err, !isNotRetriable)
|
||||||
|
|
||||||
|
if isNotRetriable {
|
||||||
|
// exit fast if error isn't retriable
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if request has been cancelled before backoff
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
logger.Errorf("interrupting retry attempt %d: context cancelled", attempts+1)
|
||||||
|
break L
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
timeLeftForRetries := maxRetryInterval - time.Since(timeStart)
|
||||||
|
if timeLeftForRetries < 0 {
|
||||||
|
// the max retry time has passed, so we give up
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if retryInterval > timeLeftForRetries {
|
||||||
|
retryInterval = timeLeftForRetries
|
||||||
|
}
|
||||||
|
// sleeping to prevent remote db hammering
|
||||||
|
time.Sleep(retryInterval)
|
||||||
|
retryInterval *= 2
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
droppedRows.Add(len(wr.Timeseries))
|
||||||
|
droppedBytes.Add(len(b))
|
||||||
|
logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
|
||||||
|
len(wr.Timeseries))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Client) send(ctx context.Context, data []byte) error {
|
||||||
|
r := bytes.NewReader(data)
|
||||||
|
req, err := http.NewRequest(http.MethodPost, c.addr, r)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create new HTTP request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RFC standard compliant headers
|
||||||
|
req.Header.Set("Content-Encoding", "snappy")
|
||||||
|
req.Header.Set("Content-Type", "application/x-protobuf")
|
||||||
|
|
||||||
|
// Prometheus compliant headers
|
||||||
|
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||||
|
|
||||||
|
if c.authCfg != nil {
|
||||||
|
c.authCfg.SetHeaders(req, true)
|
||||||
|
}
|
||||||
|
if !*disablePathAppend {
|
||||||
|
req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")
|
||||||
|
}
|
||||||
|
resp, err := c.c.Do(req.WithContext(ctx))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
|
||||||
|
req.URL.Redacted(), err, len(data), r.Size())
|
||||||
|
}
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
|
||||||
|
// according to https://prometheus.io/docs/concepts/remote_write_spec/
|
||||||
|
// Prometheus remote Write compatible receivers MUST
|
||||||
|
switch resp.StatusCode / 100 {
|
||||||
|
case 2:
|
||||||
|
// respond with a HTTP 2xx status code when the write is successful.
|
||||||
|
return nil
|
||||||
|
case 4:
|
||||||
|
if resp.StatusCode != http.StatusTooManyRequests {
|
||||||
|
// MUST NOT retry write requests on HTTP 4xx responses other than 429
|
||||||
|
return &nonRetriableError{fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
||||||
|
resp.StatusCode, req.URL.Redacted(), body)}
|
||||||
|
}
|
||||||
|
fallthrough
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
||||||
|
resp.StatusCode, req.URL.Redacted(), body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type nonRetriableError struct {
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *nonRetriableError) Error() string {
|
||||||
|
return e.err.Error()
|
||||||
|
}
|
97
app/vmalert/remotewrite/debug_client.go
Normal file
97
app/vmalert/remotewrite/debug_client.go
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
package remotewrite
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"path"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/golang/snappy"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DebugClient won't push series periodically, but will write data to remote endpoint
|
||||||
|
// immediately when Push() is called
|
||||||
|
type DebugClient struct {
|
||||||
|
addr string
|
||||||
|
c *http.Client
|
||||||
|
|
||||||
|
wg sync.WaitGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDebugClient initiates and returns a new DebugClient
|
||||||
|
func NewDebugClient() (*DebugClient, error) {
|
||||||
|
if *addr == "" {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
t, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create transport: %w", err)
|
||||||
|
}
|
||||||
|
c := &DebugClient{
|
||||||
|
c: &http.Client{
|
||||||
|
Timeout: *sendTimeout,
|
||||||
|
Transport: t,
|
||||||
|
},
|
||||||
|
addr: strings.TrimSuffix(*addr, "/"),
|
||||||
|
}
|
||||||
|
return c, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push sends the given timeseries to the remote storage.
|
||||||
|
func (c *DebugClient) Push(s prompbmarshal.TimeSeries) error {
|
||||||
|
c.wg.Add(1)
|
||||||
|
defer c.wg.Done()
|
||||||
|
wr := &prompbmarshal.WriteRequest{Timeseries: []prompbmarshal.TimeSeries{s}}
|
||||||
|
data, err := wr.Marshal()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to marshal the given time series: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.send(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close stops the DebugClient
|
||||||
|
func (c *DebugClient) Close() error {
|
||||||
|
c.wg.Wait()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *DebugClient) send(data []byte) error {
|
||||||
|
b := snappy.Encode(nil, data)
|
||||||
|
r := bytes.NewReader(b)
|
||||||
|
req, err := http.NewRequest(http.MethodPost, c.addr, r)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create new HTTP request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RFC standard compliant headers
|
||||||
|
req.Header.Set("Content-Encoding", "snappy")
|
||||||
|
req.Header.Set("Content-Type", "application/x-protobuf")
|
||||||
|
|
||||||
|
// Prometheus compliant headers
|
||||||
|
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||||
|
|
||||||
|
if !*disablePathAppend {
|
||||||
|
req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")
|
||||||
|
}
|
||||||
|
resp, err := c.c.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
|
||||||
|
req.URL.Redacted(), err, len(data), r.Size())
|
||||||
|
}
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
|
||||||
|
if resp.StatusCode/100 == 2 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
||||||
|
resp.StatusCode, req.URL.Redacted(), body)
|
||||||
|
}
|
50
app/vmalert/remotewrite/debug_client_test.go
Normal file
50
app/vmalert/remotewrite/debug_client_test.go
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
package remotewrite
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestDebugClient_Push(t *testing.T) {
|
||||||
|
testSrv := newRWServer()
|
||||||
|
oldAddr := *addr
|
||||||
|
*addr = testSrv.URL
|
||||||
|
defer func() {
|
||||||
|
*addr = oldAddr
|
||||||
|
}()
|
||||||
|
|
||||||
|
client, err := NewDebugClient()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create debug client: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
const rowsN = 100
|
||||||
|
var sent int
|
||||||
|
for i := 0; i < rowsN; i++ {
|
||||||
|
s := prompbmarshal.TimeSeries{
|
||||||
|
Samples: []prompbmarshal.Sample{{
|
||||||
|
Value: float64(i),
|
||||||
|
Timestamp: time.Now().Unix(),
|
||||||
|
}},
|
||||||
|
}
|
||||||
|
err := client.Push(s)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected err: %s", err)
|
||||||
|
}
|
||||||
|
if err == nil {
|
||||||
|
sent++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if sent == 0 {
|
||||||
|
t.Fatalf("0 series sent")
|
||||||
|
}
|
||||||
|
if err := client.Close(); err != nil {
|
||||||
|
t.Fatalf("failed to close client: %s", err)
|
||||||
|
}
|
||||||
|
got := testSrv.accepted()
|
||||||
|
if got != sent {
|
||||||
|
t.Fatalf("expected to have %d series; got %d", sent, got)
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,322 +1,13 @@
|
||||||
package remotewrite
|
package remotewrite
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"context"
|
|
||||||
"flag"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
"path"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/golang/snappy"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
"github.com/VictoriaMetrics/metrics"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
// RWClient represents an HTTP client for pushing data via remote write protocol
|
||||||
disablePathAppend = flag.Bool("remoteWrite.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.")
|
type RWClient interface {
|
||||||
sendTimeout = flag.Duration("remoteWrite.sendTimeout", 30*time.Second, "Timeout for sending data to the configured -remoteWrite.url.")
|
// Push pushes the give time series to remote storage
|
||||||
retryMinInterval = flag.Duration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxInterval")
|
Push(s prompbmarshal.TimeSeries) error
|
||||||
retryMaxTime = flag.Duration("remoteWrite.retryMaxTime", time.Second*30, "The max time spent on retry attempts for the failed remote-write request. Change this value if it is expected for remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval")
|
// Close stops the client. Client can't be reused after Close call.
|
||||||
)
|
Close() error
|
||||||
|
|
||||||
// Client is an asynchronous HTTP client for writing
|
|
||||||
// timeseries via remote write protocol.
|
|
||||||
type Client struct {
|
|
||||||
addr string
|
|
||||||
c *http.Client
|
|
||||||
authCfg *promauth.Config
|
|
||||||
input chan prompbmarshal.TimeSeries
|
|
||||||
flushInterval time.Duration
|
|
||||||
maxBatchSize int
|
|
||||||
maxQueueSize int
|
|
||||||
|
|
||||||
wg sync.WaitGroup
|
|
||||||
doneCh chan struct{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Config is config for remote write.
|
|
||||||
type Config struct {
|
|
||||||
// Addr of remote storage
|
|
||||||
Addr string
|
|
||||||
AuthCfg *promauth.Config
|
|
||||||
|
|
||||||
// Concurrency defines number of readers that
|
|
||||||
// concurrently read from the queue and flush data
|
|
||||||
Concurrency int
|
|
||||||
// MaxBatchSize defines max number of timeseries
|
|
||||||
// to be flushed at once
|
|
||||||
MaxBatchSize int
|
|
||||||
// MaxQueueSize defines max length of input queue
|
|
||||||
// populated by Push method.
|
|
||||||
// Push will be rejected once queue is full.
|
|
||||||
MaxQueueSize int
|
|
||||||
// FlushInterval defines time interval for flushing batches
|
|
||||||
FlushInterval time.Duration
|
|
||||||
// Transport will be used by the underlying http.Client
|
|
||||||
Transport *http.Transport
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
defaultConcurrency = 4
|
|
||||||
defaultMaxBatchSize = 1e3
|
|
||||||
defaultMaxQueueSize = 1e5
|
|
||||||
defaultFlushInterval = 5 * time.Second
|
|
||||||
defaultWriteTimeout = 30 * time.Second
|
|
||||||
)
|
|
||||||
|
|
||||||
// NewClient returns asynchronous client for
|
|
||||||
// writing timeseries via remotewrite protocol.
|
|
||||||
func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
|
||||||
if cfg.Addr == "" {
|
|
||||||
return nil, fmt.Errorf("config.Addr can't be empty")
|
|
||||||
}
|
|
||||||
if cfg.MaxBatchSize == 0 {
|
|
||||||
cfg.MaxBatchSize = defaultMaxBatchSize
|
|
||||||
}
|
|
||||||
if cfg.MaxQueueSize == 0 {
|
|
||||||
cfg.MaxQueueSize = defaultMaxQueueSize
|
|
||||||
}
|
|
||||||
if cfg.FlushInterval == 0 {
|
|
||||||
cfg.FlushInterval = defaultFlushInterval
|
|
||||||
}
|
|
||||||
if cfg.Transport == nil {
|
|
||||||
cfg.Transport = http.DefaultTransport.(*http.Transport).Clone()
|
|
||||||
}
|
|
||||||
cc := defaultConcurrency
|
|
||||||
if cfg.Concurrency > 0 {
|
|
||||||
cc = cfg.Concurrency
|
|
||||||
}
|
|
||||||
c := &Client{
|
|
||||||
c: &http.Client{
|
|
||||||
Timeout: *sendTimeout,
|
|
||||||
Transport: cfg.Transport,
|
|
||||||
},
|
|
||||||
addr: strings.TrimSuffix(cfg.Addr, "/"),
|
|
||||||
authCfg: cfg.AuthCfg,
|
|
||||||
flushInterval: cfg.FlushInterval,
|
|
||||||
maxBatchSize: cfg.MaxBatchSize,
|
|
||||||
maxQueueSize: cfg.MaxQueueSize,
|
|
||||||
doneCh: make(chan struct{}),
|
|
||||||
input: make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < cc; i++ {
|
|
||||||
c.run(ctx)
|
|
||||||
}
|
|
||||||
return c, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Push adds timeseries into queue for writing into remote storage.
|
|
||||||
// Push returns and error if client is stopped or if queue is full.
|
|
||||||
func (c *Client) Push(s prompbmarshal.TimeSeries) error {
|
|
||||||
select {
|
|
||||||
case <-c.doneCh:
|
|
||||||
return fmt.Errorf("client is closed")
|
|
||||||
case c.input <- s:
|
|
||||||
return nil
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
|
|
||||||
"Queue size is controlled by -remoteWrite.maxQueueSize flag",
|
|
||||||
c.maxQueueSize)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close stops the client and waits for all goroutines
|
|
||||||
// to exit.
|
|
||||||
func (c *Client) Close() error {
|
|
||||||
if c.doneCh == nil {
|
|
||||||
return fmt.Errorf("client is already closed")
|
|
||||||
}
|
|
||||||
close(c.input)
|
|
||||||
close(c.doneCh)
|
|
||||||
c.wg.Wait()
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Client) run(ctx context.Context) {
|
|
||||||
ticker := time.NewTicker(c.flushInterval)
|
|
||||||
wr := &prompbmarshal.WriteRequest{}
|
|
||||||
shutdown := func() {
|
|
||||||
for ts := range c.input {
|
|
||||||
wr.Timeseries = append(wr.Timeseries, ts)
|
|
||||||
}
|
|
||||||
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
|
|
||||||
logger.Infof("shutting down remote write client and flushing remained %d series", len(wr.Timeseries))
|
|
||||||
c.flush(lastCtx, wr)
|
|
||||||
cancel()
|
|
||||||
}
|
|
||||||
c.wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer c.wg.Done()
|
|
||||||
defer ticker.Stop()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-c.doneCh:
|
|
||||||
shutdown()
|
|
||||||
return
|
|
||||||
case <-ctx.Done():
|
|
||||||
shutdown()
|
|
||||||
return
|
|
||||||
case <-ticker.C:
|
|
||||||
c.flush(ctx, wr)
|
|
||||||
case ts, ok := <-c.input:
|
|
||||||
if !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
wr.Timeseries = append(wr.Timeseries, ts)
|
|
||||||
if len(wr.Timeseries) >= c.maxBatchSize {
|
|
||||||
c.flush(ctx, wr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
|
||||||
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
|
||||||
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
|
||||||
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
|
||||||
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
|
|
||||||
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
|
|
||||||
|
|
||||||
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
|
|
||||||
return float64(*concurrency)
|
|
||||||
})
|
|
||||||
)
|
|
||||||
|
|
||||||
// flush is a blocking function that marshals WriteRequest and sends
|
|
||||||
// it to remote-write endpoint. Flush performs limited amount of retries
|
|
||||||
// if request fails.
|
|
||||||
func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
|
||||||
if len(wr.Timeseries) < 1 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer prompbmarshal.ResetWriteRequest(wr)
|
|
||||||
defer bufferFlushDuration.UpdateDuration(time.Now())
|
|
||||||
|
|
||||||
data, err := wr.Marshal()
|
|
||||||
if err != nil {
|
|
||||||
logger.Errorf("failed to marshal WriteRequest: %s", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
b := snappy.Encode(nil, data)
|
|
||||||
|
|
||||||
retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
|
|
||||||
if retryInterval > maxRetryInterval {
|
|
||||||
retryInterval = maxRetryInterval
|
|
||||||
}
|
|
||||||
timeStart := time.Now()
|
|
||||||
defer func() {
|
|
||||||
sendDuration.Add(time.Since(timeStart).Seconds())
|
|
||||||
}()
|
|
||||||
L:
|
|
||||||
for attempts := 0; ; attempts++ {
|
|
||||||
err := c.send(ctx, b)
|
|
||||||
if err == nil {
|
|
||||||
sentRows.Add(len(wr.Timeseries))
|
|
||||||
sentBytes.Add(len(b))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
_, isNotRetriable := err.(*nonRetriableError)
|
|
||||||
logger.Warnf("attempt %d to send request failed: %s (retriable: %v)", attempts+1, err, !isNotRetriable)
|
|
||||||
|
|
||||||
if isNotRetriable {
|
|
||||||
// exit fast if error isn't retriable
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if request has been cancelled before backoff
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
logger.Errorf("interrupting retry attempt %d: context cancelled", attempts+1)
|
|
||||||
break L
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
|
|
||||||
timeLeftForRetries := maxRetryInterval - time.Since(timeStart)
|
|
||||||
if timeLeftForRetries < 0 {
|
|
||||||
// the max retry time has passed, so we give up
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
if retryInterval > timeLeftForRetries {
|
|
||||||
retryInterval = timeLeftForRetries
|
|
||||||
}
|
|
||||||
// sleeping to prevent remote db hammering
|
|
||||||
time.Sleep(retryInterval)
|
|
||||||
retryInterval *= 2
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
droppedRows.Add(len(wr.Timeseries))
|
|
||||||
droppedBytes.Add(len(b))
|
|
||||||
logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
|
|
||||||
len(wr.Timeseries))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Client) send(ctx context.Context, data []byte) error {
|
|
||||||
r := bytes.NewReader(data)
|
|
||||||
req, err := http.NewRequest(http.MethodPost, c.addr, r)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create new HTTP request: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// RFC standard compliant headers
|
|
||||||
req.Header.Set("Content-Encoding", "snappy")
|
|
||||||
req.Header.Set("Content-Type", "application/x-protobuf")
|
|
||||||
|
|
||||||
// Prometheus compliant headers
|
|
||||||
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
|
||||||
|
|
||||||
if c.authCfg != nil {
|
|
||||||
c.authCfg.SetHeaders(req, true)
|
|
||||||
}
|
|
||||||
if !*disablePathAppend {
|
|
||||||
req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")
|
|
||||||
}
|
|
||||||
resp, err := c.c.Do(req.WithContext(ctx))
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
|
|
||||||
req.URL.Redacted(), err, len(data), r.Size())
|
|
||||||
}
|
|
||||||
defer func() { _ = resp.Body.Close() }()
|
|
||||||
|
|
||||||
body, _ := io.ReadAll(resp.Body)
|
|
||||||
|
|
||||||
// according to https://prometheus.io/docs/concepts/remote_write_spec/
|
|
||||||
// Prometheus remote Write compatible receivers MUST
|
|
||||||
switch resp.StatusCode / 100 {
|
|
||||||
case 2:
|
|
||||||
// respond with a HTTP 2xx status code when the write is successful.
|
|
||||||
return nil
|
|
||||||
case 4:
|
|
||||||
if resp.StatusCode != http.StatusTooManyRequests {
|
|
||||||
// MUST NOT retry write requests on HTTP 4xx responses other than 429
|
|
||||||
return &nonRetriableError{fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
|
||||||
resp.StatusCode, req.URL.Redacted(), body)}
|
|
||||||
}
|
|
||||||
fallthrough
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
|
||||||
resp.StatusCode, req.URL.Redacted(), body)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type nonRetriableError struct {
|
|
||||||
err error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *nonRetriableError) Error() string {
|
|
||||||
return e.err.Error()
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,19 +1,16 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/cheggaaa/pb/v3"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -33,7 +30,7 @@ var (
|
||||||
"Progress bar rendering might be verbose or break the logs parsing, so it is recommended to be disabled when not used in interactive mode.")
|
"Progress bar rendering might be verbose or break the logs parsing, so it is recommended to be disabled when not used in interactive mode.")
|
||||||
)
|
)
|
||||||
|
|
||||||
func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw *remotewrite.Client) error {
|
func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewrite.RWClient) error {
|
||||||
if *replayMaxDatapoints < 1 {
|
if *replayMaxDatapoints < 1 {
|
||||||
return fmt.Errorf("replay.maxDatapointsPerQuery can't be lower than 1")
|
return fmt.Errorf("replay.maxDatapointsPerQuery can't be lower than 1")
|
||||||
}
|
}
|
||||||
|
@ -68,8 +65,8 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw *remotewr
|
||||||
|
|
||||||
var total int
|
var total int
|
||||||
for _, cfg := range groupsCfg {
|
for _, cfg := range groupsCfg {
|
||||||
ng := newGroup(cfg, qb, *evaluationInterval, labels)
|
ng := rule.NewGroup(cfg, qb, *evaluationInterval, labels)
|
||||||
total += ng.replay(tFrom, tTo, rw)
|
total += ng.Replay(tFrom, tTo, rw, *replayMaxDatapoints, *replayRuleRetryAttempts, *replayRulesDelay, *disableProgressBar)
|
||||||
}
|
}
|
||||||
logger.Infof("replay finished! Imported %d samples", total)
|
logger.Infof("replay finished! Imported %d samples", total)
|
||||||
if rw != nil {
|
if rw != nil {
|
||||||
|
@ -77,99 +74,3 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw *remotewr
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *Group) replay(start, end time.Time, rw *remotewrite.Client) int {
|
|
||||||
var total int
|
|
||||||
step := g.Interval * time.Duration(*replayMaxDatapoints)
|
|
||||||
start = g.adjustReqTimestamp(start)
|
|
||||||
ri := rangeIterator{start: start, end: end, step: step}
|
|
||||||
iterations := int(end.Sub(start)/step) + 1
|
|
||||||
fmt.Printf("\nGroup %q"+
|
|
||||||
"\ninterval: \t%v"+
|
|
||||||
"\neval_offset: \t%v"+
|
|
||||||
"\nrequests to make: \t%d"+
|
|
||||||
"\nmax range per request: \t%v\n",
|
|
||||||
g.Name, g.Interval, g.EvalOffset, iterations, step)
|
|
||||||
if g.Limit > 0 {
|
|
||||||
fmt.Printf("\nPlease note, `limit: %d` param has no effect during replay.\n",
|
|
||||||
g.Limit)
|
|
||||||
}
|
|
||||||
for _, rule := range g.Rules {
|
|
||||||
fmt.Printf("> Rule %q (ID: %d)\n", rule, rule.ID())
|
|
||||||
var bar *pb.ProgressBar
|
|
||||||
if !*disableProgressBar {
|
|
||||||
bar = pb.StartNew(iterations)
|
|
||||||
}
|
|
||||||
ri.reset()
|
|
||||||
for ri.next() {
|
|
||||||
n, err := replayRule(rule, ri.s, ri.e, rw)
|
|
||||||
if err != nil {
|
|
||||||
logger.Fatalf("rule %q: %s", rule, err)
|
|
||||||
}
|
|
||||||
total += n
|
|
||||||
if bar != nil {
|
|
||||||
bar.Increment()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if bar != nil {
|
|
||||||
bar.Finish()
|
|
||||||
}
|
|
||||||
// sleep to let remote storage to flush data on-disk
|
|
||||||
// so chained rules could be calculated correctly
|
|
||||||
time.Sleep(*replayRulesDelay)
|
|
||||||
}
|
|
||||||
return total
|
|
||||||
}
|
|
||||||
|
|
||||||
func replayRule(rule Rule, start, end time.Time, rw *remotewrite.Client) (int, error) {
|
|
||||||
var err error
|
|
||||||
var tss []prompbmarshal.TimeSeries
|
|
||||||
for i := 0; i < *replayRuleRetryAttempts; i++ {
|
|
||||||
tss, err = rule.ExecRange(context.Background(), start, end)
|
|
||||||
if err == nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
logger.Errorf("attempt %d to execute rule %q failed: %s", i+1, rule, err)
|
|
||||||
time.Sleep(time.Second)
|
|
||||||
}
|
|
||||||
if err != nil { // means all attempts failed
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
if len(tss) < 1 {
|
|
||||||
return 0, nil
|
|
||||||
}
|
|
||||||
var n int
|
|
||||||
for _, ts := range tss {
|
|
||||||
if err := rw.Push(ts); err != nil {
|
|
||||||
return n, fmt.Errorf("remote write failure: %s", err)
|
|
||||||
}
|
|
||||||
n += len(ts.Samples)
|
|
||||||
}
|
|
||||||
return n, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type rangeIterator struct {
|
|
||||||
step time.Duration
|
|
||||||
start, end time.Time
|
|
||||||
|
|
||||||
iter int
|
|
||||||
s, e time.Time
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ri *rangeIterator) reset() {
|
|
||||||
ri.iter = 0
|
|
||||||
ri.s, ri.e = time.Time{}, time.Time{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ri *rangeIterator) next() bool {
|
|
||||||
ri.s = ri.start.Add(ri.step * time.Duration(ri.iter))
|
|
||||||
if !ri.end.After(ri.s) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
ri.e = ri.s.Add(ri.step)
|
|
||||||
if ri.e.After(ri.end) {
|
|
||||||
ri.e = ri.end
|
|
||||||
}
|
|
||||||
ri.iter++
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type fakeReplayQuerier struct {
|
type fakeReplayQuerier struct {
|
||||||
fakeQuerier
|
datasource.FakeQuerier
|
||||||
registry map[string]map[string]struct{}
|
registry map[string]map[string]struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,81 +170,3 @@ func TestReplay(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRangeIterator(t *testing.T) {
|
|
||||||
testCases := []struct {
|
|
||||||
ri rangeIterator
|
|
||||||
result [][2]time.Time
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
ri: rangeIterator{
|
|
||||||
start: parseTime(t, "2021-01-01T12:00:00.000Z"),
|
|
||||||
end: parseTime(t, "2021-01-01T12:30:00.000Z"),
|
|
||||||
step: 5 * time.Minute,
|
|
||||||
},
|
|
||||||
result: [][2]time.Time{
|
|
||||||
{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:05:00.000Z")},
|
|
||||||
{parseTime(t, "2021-01-01T12:05:00.000Z"), parseTime(t, "2021-01-01T12:10:00.000Z")},
|
|
||||||
{parseTime(t, "2021-01-01T12:10:00.000Z"), parseTime(t, "2021-01-01T12:15:00.000Z")},
|
|
||||||
{parseTime(t, "2021-01-01T12:15:00.000Z"), parseTime(t, "2021-01-01T12:20:00.000Z")},
|
|
||||||
{parseTime(t, "2021-01-01T12:20:00.000Z"), parseTime(t, "2021-01-01T12:25:00.000Z")},
|
|
||||||
{parseTime(t, "2021-01-01T12:25:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ri: rangeIterator{
|
|
||||||
start: parseTime(t, "2021-01-01T12:00:00.000Z"),
|
|
||||||
end: parseTime(t, "2021-01-01T12:30:00.000Z"),
|
|
||||||
step: 45 * time.Minute,
|
|
||||||
},
|
|
||||||
result: [][2]time.Time{
|
|
||||||
{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
|
|
||||||
{parseTime(t, "2021-01-01T12:30:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ri: rangeIterator{
|
|
||||||
start: parseTime(t, "2021-01-01T12:00:12.000Z"),
|
|
||||||
end: parseTime(t, "2021-01-01T12:00:17.000Z"),
|
|
||||||
step: time.Second,
|
|
||||||
},
|
|
||||||
result: [][2]time.Time{
|
|
||||||
{parseTime(t, "2021-01-01T12:00:12.000Z"), parseTime(t, "2021-01-01T12:00:13.000Z")},
|
|
||||||
{parseTime(t, "2021-01-01T12:00:13.000Z"), parseTime(t, "2021-01-01T12:00:14.000Z")},
|
|
||||||
{parseTime(t, "2021-01-01T12:00:14.000Z"), parseTime(t, "2021-01-01T12:00:15.000Z")},
|
|
||||||
{parseTime(t, "2021-01-01T12:00:15.000Z"), parseTime(t, "2021-01-01T12:00:16.000Z")},
|
|
||||||
{parseTime(t, "2021-01-01T12:00:16.000Z"), parseTime(t, "2021-01-01T12:00:17.000Z")},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, tc := range testCases {
|
|
||||||
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
|
|
||||||
var j int
|
|
||||||
for tc.ri.next() {
|
|
||||||
if len(tc.result) < j+1 {
|
|
||||||
t.Fatalf("unexpected result for iterator on step %d: %v - %v",
|
|
||||||
j, tc.ri.s, tc.ri.e)
|
|
||||||
}
|
|
||||||
s, e := tc.ri.s, tc.ri.e
|
|
||||||
expS, expE := tc.result[j][0], tc.result[j][1]
|
|
||||||
if s != expS {
|
|
||||||
t.Fatalf("expected to get start=%v; got %v", expS, s)
|
|
||||||
}
|
|
||||||
if e != expE {
|
|
||||||
t.Fatalf("expected to get end=%v; got %v", expE, e)
|
|
||||||
}
|
|
||||||
j++
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseTime(t *testing.T, s string) time.Time {
|
|
||||||
t.Helper()
|
|
||||||
tt, err := time.Parse("2006-01-02T15:04:05.000Z", s)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
return tt
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,118 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"errors"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Rule represents alerting or recording rule
|
|
||||||
// that has unique ID, can be Executed and
|
|
||||||
// updated with other Rule.
|
|
||||||
type Rule interface {
|
|
||||||
// ID returns unique ID that may be used for
|
|
||||||
// identifying this Rule among others.
|
|
||||||
ID() uint64
|
|
||||||
// Exec executes the rule with given context at the given timestamp and limit.
|
|
||||||
// returns an err if number of resulting time series exceeds the limit.
|
|
||||||
Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error)
|
|
||||||
// ExecRange executes the rule on the given time range.
|
|
||||||
ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error)
|
|
||||||
// UpdateWith performs modification of current Rule
|
|
||||||
// with fields of the given Rule.
|
|
||||||
UpdateWith(Rule) error
|
|
||||||
// ToAPI converts Rule into APIRule
|
|
||||||
ToAPI() APIRule
|
|
||||||
// Close performs the shutdown procedures for rule
|
|
||||||
// such as metrics unregister
|
|
||||||
Close()
|
|
||||||
}
|
|
||||||
|
|
||||||
var errDuplicate = errors.New("result contains metrics with the same labelset after applying rule labels. See https://docs.victoriametrics.com/vmalert.html#series-with-the-same-labelset for details")
|
|
||||||
|
|
||||||
type ruleState struct {
|
|
||||||
sync.RWMutex
|
|
||||||
entries []ruleStateEntry
|
|
||||||
cur int
|
|
||||||
}
|
|
||||||
|
|
||||||
type ruleStateEntry struct {
|
|
||||||
// stores last moment of time rule.Exec was called
|
|
||||||
time time.Time
|
|
||||||
// stores the timestamp rule.Exec was called with
|
|
||||||
at time.Time
|
|
||||||
// stores the duration of the last rule.Exec call
|
|
||||||
duration time.Duration
|
|
||||||
// stores last error that happened in Exec func
|
|
||||||
// resets on every successful Exec
|
|
||||||
// may be used as Health ruleState
|
|
||||||
err error
|
|
||||||
// stores the number of samples returned during
|
|
||||||
// the last evaluation
|
|
||||||
samples int
|
|
||||||
// stores the number of time series fetched during
|
|
||||||
// the last evaluation.
|
|
||||||
// Is supported by VictoriaMetrics only, starting from v1.90.0
|
|
||||||
// If seriesFetched == nil, then this attribute was missing in
|
|
||||||
// datasource response (unsupported).
|
|
||||||
seriesFetched *int
|
|
||||||
// stores the curl command reflecting the HTTP request used during rule.Exec
|
|
||||||
curl string
|
|
||||||
}
|
|
||||||
|
|
||||||
func newRuleState(size int) *ruleState {
|
|
||||||
if size < 1 {
|
|
||||||
size = 1
|
|
||||||
}
|
|
||||||
return &ruleState{
|
|
||||||
entries: make([]ruleStateEntry, size),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ruleState) getLast() ruleStateEntry {
|
|
||||||
s.RLock()
|
|
||||||
defer s.RUnlock()
|
|
||||||
return s.entries[s.cur]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ruleState) size() int {
|
|
||||||
s.RLock()
|
|
||||||
defer s.RUnlock()
|
|
||||||
return len(s.entries)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ruleState) getAll() []ruleStateEntry {
|
|
||||||
entries := make([]ruleStateEntry, 0)
|
|
||||||
|
|
||||||
s.RLock()
|
|
||||||
defer s.RUnlock()
|
|
||||||
|
|
||||||
cur := s.cur
|
|
||||||
for {
|
|
||||||
e := s.entries[cur]
|
|
||||||
if !e.time.IsZero() || !e.at.IsZero() {
|
|
||||||
entries = append(entries, e)
|
|
||||||
}
|
|
||||||
cur--
|
|
||||||
if cur < 0 {
|
|
||||||
cur = cap(s.entries) - 1
|
|
||||||
}
|
|
||||||
if cur == s.cur {
|
|
||||||
return entries
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *ruleState) add(e ruleStateEntry) {
|
|
||||||
s.Lock()
|
|
||||||
defer s.Unlock()
|
|
||||||
|
|
||||||
s.cur++
|
|
||||||
if s.cur > cap(s.entries)-1 {
|
|
||||||
s.cur = 0
|
|
||||||
}
|
|
||||||
s.entries[s.cur] = e
|
|
||||||
}
|
|
|
@ -1,11 +1,10 @@
|
||||||
package main
|
package rule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
@ -55,7 +54,8 @@ type alertingRuleMetrics struct {
|
||||||
seriesFetched *utils.Gauge
|
seriesFetched *utils.Gauge
|
||||||
}
|
}
|
||||||
|
|
||||||
func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
|
// NewAlertingRule creates a new AlertingRule
|
||||||
|
func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
|
||||||
ar := &AlertingRule{
|
ar := &AlertingRule{
|
||||||
Type: group.Type,
|
Type: group.Type,
|
||||||
RuleID: cfg.ID,
|
RuleID: cfg.ID,
|
||||||
|
@ -80,10 +80,15 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||||
metrics: &alertingRuleMetrics{},
|
metrics: &alertingRuleMetrics{},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
entrySize := *ruleUpdateEntriesLimit
|
||||||
if cfg.UpdateEntriesLimit != nil {
|
if cfg.UpdateEntriesLimit != nil {
|
||||||
ar.state = newRuleState(*cfg.UpdateEntriesLimit)
|
entrySize = *cfg.UpdateEntriesLimit
|
||||||
} else {
|
}
|
||||||
ar.state = newRuleState(*ruleUpdateEntriesLimit)
|
if entrySize < 1 {
|
||||||
|
entrySize = 1
|
||||||
|
}
|
||||||
|
ar.state = &ruleState{
|
||||||
|
entries: make([]StateEntry, entrySize),
|
||||||
}
|
}
|
||||||
|
|
||||||
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
|
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
|
||||||
|
@ -114,7 +119,7 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||||
ar.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_error{%s}`, labels),
|
ar.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_error{%s}`, labels),
|
||||||
func() float64 {
|
func() float64 {
|
||||||
e := ar.state.getLast()
|
e := ar.state.getLast()
|
||||||
if e.err == nil {
|
if e.Err == nil {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
return 1
|
return 1
|
||||||
|
@ -122,28 +127,28 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||||
ar.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_samples{%s}`, labels),
|
ar.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_samples{%s}`, labels),
|
||||||
func() float64 {
|
func() float64 {
|
||||||
e := ar.state.getLast()
|
e := ar.state.getLast()
|
||||||
return float64(e.samples)
|
return float64(e.Samples)
|
||||||
})
|
})
|
||||||
ar.metrics.seriesFetched = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_series_fetched{%s}`, labels),
|
ar.metrics.seriesFetched = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_series_fetched{%s}`, labels),
|
||||||
func() float64 {
|
func() float64 {
|
||||||
e := ar.state.getLast()
|
e := ar.state.getLast()
|
||||||
if e.seriesFetched == nil {
|
if e.SeriesFetched == nil {
|
||||||
// means seriesFetched is unsupported
|
// means seriesFetched is unsupported
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
seriesFetched := float64(*e.seriesFetched)
|
seriesFetched := float64(*e.SeriesFetched)
|
||||||
if seriesFetched == 0 && e.samples > 0 {
|
if seriesFetched == 0 && e.Samples > 0 {
|
||||||
// `alert: 0.95` will fetch no series
|
// `alert: 0.95` will fetch no series
|
||||||
// but will get one time series in response.
|
// but will get one time series in response.
|
||||||
seriesFetched = float64(e.samples)
|
seriesFetched = float64(e.Samples)
|
||||||
}
|
}
|
||||||
return seriesFetched
|
return seriesFetched
|
||||||
})
|
})
|
||||||
return ar
|
return ar
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close unregisters rule metrics
|
// close unregisters rule metrics
|
||||||
func (ar *AlertingRule) Close() {
|
func (ar *AlertingRule) close() {
|
||||||
ar.metrics.active.Unregister()
|
ar.metrics.active.Unregister()
|
||||||
ar.metrics.pending.Unregister()
|
ar.metrics.pending.Unregister()
|
||||||
ar.metrics.errors.Unregister()
|
ar.metrics.errors.Unregister()
|
||||||
|
@ -162,6 +167,27 @@ func (ar *AlertingRule) ID() uint64 {
|
||||||
return ar.RuleID
|
return ar.RuleID
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetAlerts returns active alerts of rule
|
||||||
|
func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
|
||||||
|
ar.alertsMu.RLock()
|
||||||
|
defer ar.alertsMu.RUnlock()
|
||||||
|
var alerts []*notifier.Alert
|
||||||
|
for _, a := range ar.alerts {
|
||||||
|
alerts = append(alerts, a)
|
||||||
|
}
|
||||||
|
return alerts
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAlert returns alert if id exists
|
||||||
|
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
|
||||||
|
ar.alertsMu.RLock()
|
||||||
|
defer ar.alertsMu.RUnlock()
|
||||||
|
if ar.alerts == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return ar.alerts[id]
|
||||||
|
}
|
||||||
|
|
||||||
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...interface{}) {
|
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...interface{}) {
|
||||||
if !ar.Debug {
|
if !ar.Debug {
|
||||||
return
|
return
|
||||||
|
@ -188,6 +214,26 @@ func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string
|
||||||
logger.Infof("%s", prefix+msg)
|
logger.Infof("%s", prefix+msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// updateWith copies all significant fields.
|
||||||
|
// alerts state isn't copied since
|
||||||
|
// it should be updated in next 2 Execs
|
||||||
|
func (ar *AlertingRule) updateWith(r Rule) error {
|
||||||
|
nr, ok := r.(*AlertingRule)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("BUG: attempt to update alerting rule with wrong type %#v", r)
|
||||||
|
}
|
||||||
|
ar.Expr = nr.Expr
|
||||||
|
ar.For = nr.For
|
||||||
|
ar.KeepFiringFor = nr.KeepFiringFor
|
||||||
|
ar.Labels = nr.Labels
|
||||||
|
ar.Annotations = nr.Annotations
|
||||||
|
ar.EvalInterval = nr.EvalInterval
|
||||||
|
ar.Debug = nr.Debug
|
||||||
|
ar.q = nr.q
|
||||||
|
ar.state = nr.state
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
type labelSet struct {
|
type labelSet struct {
|
||||||
// origin labels extracted from received time series
|
// origin labels extracted from received time series
|
||||||
// plus extra labels (group labels, service labels like alertNameLabel).
|
// plus extra labels (group labels, service labels like alertNameLabel).
|
||||||
|
@ -248,11 +294,11 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||||
return ls, nil
|
return ls, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExecRange executes alerting rule on the given time range similarly to Exec.
|
// execRange executes alerting rule on the given time range similarly to exec.
|
||||||
// It doesn't update internal states of the Rule and meant to be used just
|
// It doesn't update internal states of the Rule and meant to be used just
|
||||||
// to get time series for backfilling.
|
// to get time series for backfilling.
|
||||||
// It returns ALERT and ALERT_FOR_STATE time series as result.
|
// It returns ALERT and ALERT_FOR_STATE time series as result.
|
||||||
func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
||||||
res, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
|
res, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -297,19 +343,19 @@ func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]
|
||||||
// is kept in memory state and consequently repeatedly sent to the AlertManager.
|
// is kept in memory state and consequently repeatedly sent to the AlertManager.
|
||||||
const resolvedRetention = 15 * time.Minute
|
const resolvedRetention = 15 * time.Minute
|
||||||
|
|
||||||
// Exec executes AlertingRule expression via the given Querier.
|
// exec executes AlertingRule expression via the given Querier.
|
||||||
// Based on the Querier results AlertingRule maintains notifier.Alerts
|
// Based on the Querier results AlertingRule maintains notifier.Alerts
|
||||||
func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
res, req, err := ar.q.Query(ctx, ar.Expr, ts)
|
res, req, err := ar.q.Query(ctx, ar.Expr, ts)
|
||||||
curState := ruleStateEntry{
|
curState := StateEntry{
|
||||||
time: start,
|
Time: start,
|
||||||
at: ts,
|
At: ts,
|
||||||
duration: time.Since(start),
|
Duration: time.Since(start),
|
||||||
samples: len(res.Data),
|
Samples: len(res.Data),
|
||||||
seriesFetched: res.SeriesFetched,
|
SeriesFetched: res.SeriesFetched,
|
||||||
err: err,
|
Err: err,
|
||||||
curl: requestToCurl(req),
|
Curl: requestToCurl(req),
|
||||||
}
|
}
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
|
@ -323,7 +369,7 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||||
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ar.logDebugf(ts, nil, "query returned %d samples (elapsed: %s)", curState.samples, curState.duration)
|
ar.logDebugf(ts, nil, "query returned %d samples (elapsed: %s)", curState.Samples, curState.Duration)
|
||||||
|
|
||||||
for h, a := range ar.alerts {
|
for h, a := range ar.alerts {
|
||||||
// cleanup inactive alerts from previous Exec
|
// cleanup inactive alerts from previous Exec
|
||||||
|
@ -342,15 +388,15 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||||
for _, m := range res.Data {
|
for _, m := range res.Data {
|
||||||
ls, err := ar.toLabels(m, qFn)
|
ls, err := ar.toLabels(m, qFn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
curState.err = fmt.Errorf("failed to expand labels: %s", err)
|
curState.Err = fmt.Errorf("failed to expand labels: %s", err)
|
||||||
return nil, curState.err
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
h := hash(ls.processed)
|
h := hash(ls.processed)
|
||||||
if _, ok := updated[h]; ok {
|
if _, ok := updated[h]; ok {
|
||||||
// duplicate may be caused by extra labels
|
// duplicate may be caused by extra labels
|
||||||
// conflicting with the metric labels
|
// conflicting with the metric labels
|
||||||
curState.err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
|
curState.Err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
|
||||||
return nil, curState.err
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
updated[h] = struct{}{}
|
updated[h] = struct{}{}
|
||||||
if a, ok := ar.alerts[h]; ok {
|
if a, ok := ar.alerts[h]; ok {
|
||||||
|
@ -373,8 +419,8 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||||
}
|
}
|
||||||
a, err := ar.newAlert(m, ls, start, qFn)
|
a, err := ar.newAlert(m, ls, start, qFn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
curState.err = fmt.Errorf("failed to create alert: %w", err)
|
curState.Err = fmt.Errorf("failed to create alert: %w", err)
|
||||||
return nil, curState.err
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
a.ID = h
|
a.ID = h
|
||||||
a.State = notifier.StatePending
|
a.State = notifier.StatePending
|
||||||
|
@ -423,8 +469,8 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||||
}
|
}
|
||||||
if limit > 0 && numActivePending > limit {
|
if limit > 0 && numActivePending > limit {
|
||||||
ar.alerts = map[uint64]*notifier.Alert{}
|
ar.alerts = map[uint64]*notifier.Alert{}
|
||||||
curState.err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
|
curState.Err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
|
||||||
return nil, curState.err
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
return ar.toTimeSeries(ts.Unix()), nil
|
return ar.toTimeSeries(ts.Unix()), nil
|
||||||
}
|
}
|
||||||
|
@ -441,26 +487,6 @@ func (ar *AlertingRule) toTimeSeries(timestamp int64) []prompbmarshal.TimeSeries
|
||||||
return tss
|
return tss
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateWith copies all significant fields.
|
|
||||||
// alerts state isn't copied since
|
|
||||||
// it should be updated in next 2 Execs
|
|
||||||
func (ar *AlertingRule) UpdateWith(r Rule) error {
|
|
||||||
nr, ok := r.(*AlertingRule)
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("BUG: attempt to update alerting rule with wrong type %#v", r)
|
|
||||||
}
|
|
||||||
ar.Expr = nr.Expr
|
|
||||||
ar.For = nr.For
|
|
||||||
ar.KeepFiringFor = nr.KeepFiringFor
|
|
||||||
ar.Labels = nr.Labels
|
|
||||||
ar.Annotations = nr.Annotations
|
|
||||||
ar.EvalInterval = nr.EvalInterval
|
|
||||||
ar.Debug = nr.Debug
|
|
||||||
ar.q = nr.q
|
|
||||||
ar.state = nr.state
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: consider hashing algorithm in VM
|
// TODO: consider hashing algorithm in VM
|
||||||
func hash(labels map[string]string) uint64 {
|
func hash(labels map[string]string) uint64 {
|
||||||
hash := fnv.New64a()
|
hash := fnv.New64a()
|
||||||
|
@ -503,102 +529,6 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
|
||||||
return a, err
|
return a, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// AlertAPI generates APIAlert object from alert by its id(hash)
|
|
||||||
func (ar *AlertingRule) AlertAPI(id uint64) *APIAlert {
|
|
||||||
ar.alertsMu.RLock()
|
|
||||||
defer ar.alertsMu.RUnlock()
|
|
||||||
a, ok := ar.alerts[id]
|
|
||||||
if !ok {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return ar.newAlertAPI(*a)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ToAPI returns Rule representation in form of APIRule
|
|
||||||
// Isn't thread-safe. Call must be protected by AlertingRule mutex.
|
|
||||||
func (ar *AlertingRule) ToAPI() APIRule {
|
|
||||||
lastState := ar.state.getLast()
|
|
||||||
r := APIRule{
|
|
||||||
Type: "alerting",
|
|
||||||
DatasourceType: ar.Type.String(),
|
|
||||||
Name: ar.Name,
|
|
||||||
Query: ar.Expr,
|
|
||||||
Duration: ar.For.Seconds(),
|
|
||||||
KeepFiringFor: ar.KeepFiringFor.Seconds(),
|
|
||||||
Labels: ar.Labels,
|
|
||||||
Annotations: ar.Annotations,
|
|
||||||
LastEvaluation: lastState.time,
|
|
||||||
EvaluationTime: lastState.duration.Seconds(),
|
|
||||||
Health: "ok",
|
|
||||||
State: "inactive",
|
|
||||||
Alerts: ar.AlertsToAPI(),
|
|
||||||
LastSamples: lastState.samples,
|
|
||||||
LastSeriesFetched: lastState.seriesFetched,
|
|
||||||
MaxUpdates: ar.state.size(),
|
|
||||||
Updates: ar.state.getAll(),
|
|
||||||
Debug: ar.Debug,
|
|
||||||
|
|
||||||
// encode as strings to avoid rounding in JSON
|
|
||||||
ID: fmt.Sprintf("%d", ar.ID()),
|
|
||||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
|
||||||
}
|
|
||||||
if lastState.err != nil {
|
|
||||||
r.LastError = lastState.err.Error()
|
|
||||||
r.Health = "err"
|
|
||||||
}
|
|
||||||
// satisfy APIRule.State logic
|
|
||||||
if len(r.Alerts) > 0 {
|
|
||||||
r.State = notifier.StatePending.String()
|
|
||||||
stateFiring := notifier.StateFiring.String()
|
|
||||||
for _, a := range r.Alerts {
|
|
||||||
if a.State == stateFiring {
|
|
||||||
r.State = stateFiring
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
// AlertsToAPI generates list of APIAlert objects from existing alerts
|
|
||||||
func (ar *AlertingRule) AlertsToAPI() []*APIAlert {
|
|
||||||
var alerts []*APIAlert
|
|
||||||
ar.alertsMu.RLock()
|
|
||||||
for _, a := range ar.alerts {
|
|
||||||
if a.State == notifier.StateInactive {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
alerts = append(alerts, ar.newAlertAPI(*a))
|
|
||||||
}
|
|
||||||
ar.alertsMu.RUnlock()
|
|
||||||
return alerts
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ar *AlertingRule) newAlertAPI(a notifier.Alert) *APIAlert {
|
|
||||||
aa := &APIAlert{
|
|
||||||
// encode as strings to avoid rounding
|
|
||||||
ID: fmt.Sprintf("%d", a.ID),
|
|
||||||
GroupID: fmt.Sprintf("%d", a.GroupID),
|
|
||||||
RuleID: fmt.Sprintf("%d", ar.RuleID),
|
|
||||||
|
|
||||||
Name: a.Name,
|
|
||||||
Expression: ar.Expr,
|
|
||||||
Labels: a.Labels,
|
|
||||||
Annotations: a.Annotations,
|
|
||||||
State: a.State.String(),
|
|
||||||
ActiveAt: a.ActiveAt,
|
|
||||||
Restored: a.Restored,
|
|
||||||
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
|
|
||||||
}
|
|
||||||
if alertURLGeneratorFn != nil {
|
|
||||||
aa.SourceLink = alertURLGeneratorFn(a)
|
|
||||||
}
|
|
||||||
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
|
|
||||||
aa.Stabilizing = true
|
|
||||||
}
|
|
||||||
return aa
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// alertMetricName is the metric name for synthetic alert timeseries.
|
// alertMetricName is the metric name for synthetic alert timeseries.
|
||||||
alertMetricName = "ALERTS"
|
alertMetricName = "ALERTS"
|
||||||
|
@ -646,10 +576,10 @@ func alertForToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.Time
|
||||||
return newTimeSeries([]float64{float64(a.ActiveAt.Unix())}, []int64{timestamp}, labels)
|
return newTimeSeries([]float64{float64(a.ActiveAt.Unix())}, []int64{timestamp}, labels)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restore restores the value of ActiveAt field for active alerts,
|
// restore restores the value of ActiveAt field for active alerts,
|
||||||
// based on previously written time series `alertForStateMetricName`.
|
// based on previously written time series `alertForStateMetricName`.
|
||||||
// Only rules with For > 0 can be restored.
|
// Only rules with For > 0 can be restored.
|
||||||
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
|
func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
|
||||||
if ar.For < 1 {
|
if ar.For < 1 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
package main
|
package rule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
@ -303,13 +303,13 @@ func TestAlertingRule_Exec(t *testing.T) {
|
||||||
fakeGroup := Group{Name: "TestRule_Exec"}
|
fakeGroup := Group{Name: "TestRule_Exec"}
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||||
fq := &fakeQuerier{}
|
fq := &datasource.FakeQuerier{}
|
||||||
tc.rule.q = fq
|
tc.rule.q = fq
|
||||||
tc.rule.GroupID = fakeGroup.ID()
|
tc.rule.GroupID = fakeGroup.ID()
|
||||||
for i, step := range tc.steps {
|
for i, step := range tc.steps {
|
||||||
fq.reset()
|
fq.Reset()
|
||||||
fq.add(step...)
|
fq.Add(step...)
|
||||||
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
|
if _, err := tc.rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
||||||
t.Fatalf("unexpected err: %s", err)
|
t.Fatalf("unexpected err: %s", err)
|
||||||
}
|
}
|
||||||
// artificial delay between applying steps
|
// artificial delay between applying steps
|
||||||
|
@ -482,11 +482,11 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||||
fakeGroup := Group{Name: "TestRule_ExecRange"}
|
fakeGroup := Group{Name: "TestRule_ExecRange"}
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||||
fq := &fakeQuerier{}
|
fq := &datasource.FakeQuerier{}
|
||||||
tc.rule.q = fq
|
tc.rule.q = fq
|
||||||
tc.rule.GroupID = fakeGroup.ID()
|
tc.rule.GroupID = fakeGroup.ID()
|
||||||
fq.add(tc.data...)
|
fq.Add(tc.data...)
|
||||||
gotTS, err := tc.rule.ExecRange(context.TODO(), time.Now(), time.Now())
|
gotTS, err := tc.rule.execRange(context.TODO(), time.Now(), time.Now())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected err: %s", err)
|
t.Fatalf("unexpected err: %s", err)
|
||||||
}
|
}
|
||||||
|
@ -518,24 +518,24 @@ func TestAlertingRule_ExecRange(t *testing.T) {
|
||||||
|
|
||||||
func TestGroup_Restore(t *testing.T) {
|
func TestGroup_Restore(t *testing.T) {
|
||||||
defaultTS := time.Now()
|
defaultTS := time.Now()
|
||||||
fqr := &fakeQuerierWithRegistry{}
|
fqr := &datasource.FakeQuerierWithRegistry{}
|
||||||
fn := func(rules []config.Rule, expAlerts map[uint64]*notifier.Alert) {
|
fn := func(rules []config.Rule, expAlerts map[uint64]*notifier.Alert) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
defer fqr.reset()
|
defer fqr.Reset()
|
||||||
|
|
||||||
for _, r := range rules {
|
for _, r := range rules {
|
||||||
fqr.set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
|
fqr.Set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
|
||||||
}
|
}
|
||||||
|
|
||||||
fg := newGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
||||||
wg := sync.WaitGroup{}
|
wg := sync.WaitGroup{}
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
nts := func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} }
|
nts := func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} }
|
||||||
fg.start(context.Background(), nts, nil, fqr)
|
fg.Start(context.Background(), nts, nil, fqr)
|
||||||
wg.Done()
|
wg.Done()
|
||||||
}()
|
}()
|
||||||
fg.close()
|
fg.Close()
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
gotAlerts := make(map[uint64]*notifier.Alert)
|
gotAlerts := make(map[uint64]*notifier.Alert)
|
||||||
|
@ -582,11 +582,11 @@ func TestGroup_Restore(t *testing.T) {
|
||||||
ActiveAt: defaultTS,
|
ActiveAt: defaultTS,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
fqr.reset()
|
fqr.Reset()
|
||||||
|
|
||||||
// one active alert with state restore
|
// one active alert with state restore
|
||||||
ts := time.Now().Truncate(time.Hour)
|
ts := time.Now().Truncate(time.Hour)
|
||||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||||
stateMetric("foo", ts))
|
stateMetric("foo", ts))
|
||||||
fn(
|
fn(
|
||||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||||
|
@ -598,7 +598,7 @@ func TestGroup_Restore(t *testing.T) {
|
||||||
|
|
||||||
// two rules, two active alerts, one with state restored
|
// two rules, two active alerts, one with state restored
|
||||||
ts = time.Now().Truncate(time.Hour)
|
ts = time.Now().Truncate(time.Hour)
|
||||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||||
stateMetric("foo", ts))
|
stateMetric("foo", ts))
|
||||||
fn(
|
fn(
|
||||||
[]config.Rule{
|
[]config.Rule{
|
||||||
|
@ -616,9 +616,9 @@ func TestGroup_Restore(t *testing.T) {
|
||||||
|
|
||||||
// two rules, two active alerts, two with state restored
|
// two rules, two active alerts, two with state restored
|
||||||
ts = time.Now().Truncate(time.Hour)
|
ts = time.Now().Truncate(time.Hour)
|
||||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||||
stateMetric("foo", ts))
|
stateMetric("foo", ts))
|
||||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||||
stateMetric("bar", ts))
|
stateMetric("bar", ts))
|
||||||
fn(
|
fn(
|
||||||
[]config.Rule{
|
[]config.Rule{
|
||||||
|
@ -636,7 +636,7 @@ func TestGroup_Restore(t *testing.T) {
|
||||||
|
|
||||||
// one active alert but wrong state restore
|
// one active alert but wrong state restore
|
||||||
ts = time.Now().Truncate(time.Hour)
|
ts = time.Now().Truncate(time.Hour)
|
||||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
|
||||||
stateMetric("wrong alert", ts))
|
stateMetric("wrong alert", ts))
|
||||||
fn(
|
fn(
|
||||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
||||||
|
@ -648,7 +648,7 @@ func TestGroup_Restore(t *testing.T) {
|
||||||
|
|
||||||
// one active alert with labels
|
// one active alert with labels
|
||||||
ts = time.Now().Truncate(time.Hour)
|
ts = time.Now().Truncate(time.Hour)
|
||||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||||
stateMetric("foo", ts, "env", "dev"))
|
stateMetric("foo", ts, "env", "dev"))
|
||||||
fn(
|
fn(
|
||||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||||
|
@ -660,7 +660,7 @@ func TestGroup_Restore(t *testing.T) {
|
||||||
|
|
||||||
// one active alert with restore labels missmatch
|
// one active alert with restore labels missmatch
|
||||||
ts = time.Now().Truncate(time.Hour)
|
ts = time.Now().Truncate(time.Hour)
|
||||||
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||||
stateMetric("foo", ts, "env", "dev", "team", "foo"))
|
stateMetric("foo", ts, "env", "dev", "team", "foo"))
|
||||||
fn(
|
fn(
|
||||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
||||||
|
@ -672,30 +672,30 @@ func TestGroup_Restore(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAlertingRule_Exec_Negative(t *testing.T) {
|
func TestAlertingRule_Exec_Negative(t *testing.T) {
|
||||||
fq := &fakeQuerier{}
|
fq := &datasource.FakeQuerier{}
|
||||||
ar := newTestAlertingRule("test", 0)
|
ar := newTestAlertingRule("test", 0)
|
||||||
ar.Labels = map[string]string{"job": "test"}
|
ar.Labels = map[string]string{"job": "test"}
|
||||||
ar.q = fq
|
ar.q = fq
|
||||||
|
|
||||||
// successful attempt
|
// successful attempt
|
||||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||||
_, err := ar.Exec(context.TODO(), time.Now(), 0)
|
_, err := ar.exec(context.TODO(), time.Now(), 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// label `job` will collide with rule extra label and will make both time series equal
|
// label `job` will collide with rule extra label and will make both time series equal
|
||||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
|
||||||
_, err = ar.Exec(context.TODO(), time.Now(), 0)
|
_, err = ar.exec(context.TODO(), time.Now(), 0)
|
||||||
if !errors.Is(err, errDuplicate) {
|
if !errors.Is(err, errDuplicate) {
|
||||||
t.Fatalf("expected to have %s error; got %s", errDuplicate, err)
|
t.Fatalf("expected to have %s error; got %s", errDuplicate, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fq.reset()
|
fq.Reset()
|
||||||
|
|
||||||
expErr := "connection reset by peer"
|
expErr := "connection reset by peer"
|
||||||
fq.setErr(errors.New(expErr))
|
fq.SetErr(errors.New(expErr))
|
||||||
_, err = ar.Exec(context.TODO(), time.Now(), 0)
|
_, err = ar.exec(context.TODO(), time.Now(), 0)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatalf("expected to get err; got nil")
|
t.Fatalf("expected to get err; got nil")
|
||||||
}
|
}
|
||||||
|
@ -705,7 +705,7 @@ func TestAlertingRule_Exec_Negative(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAlertingRuleLimit(t *testing.T) {
|
func TestAlertingRuleLimit(t *testing.T) {
|
||||||
fq := &fakeQuerier{}
|
fq := &datasource.FakeQuerier{}
|
||||||
ar := newTestAlertingRule("test", 0)
|
ar := newTestAlertingRule("test", 0)
|
||||||
ar.Labels = map[string]string{"job": "test"}
|
ar.Labels = map[string]string{"job": "test"}
|
||||||
ar.q = fq
|
ar.q = fq
|
||||||
|
@ -737,15 +737,15 @@ func TestAlertingRuleLimit(t *testing.T) {
|
||||||
err error
|
err error
|
||||||
timestamp = time.Now()
|
timestamp = time.Now()
|
||||||
)
|
)
|
||||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "bar", "job"))
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "bar", "job"))
|
||||||
for _, testCase := range testCases {
|
for _, testCase := range testCases {
|
||||||
_, err = ar.Exec(context.TODO(), timestamp, testCase.limit)
|
_, err = ar.exec(context.TODO(), timestamp, testCase.limit)
|
||||||
if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
|
if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fq.reset()
|
fq.Reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAlertingRule_Template(t *testing.T) {
|
func TestAlertingRule_Template(t *testing.T) {
|
||||||
|
@ -870,12 +870,12 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||||
fakeGroup := Group{Name: "TestRule_Exec"}
|
fakeGroup := Group{Name: "TestRule_Exec"}
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||||
fq := &fakeQuerier{}
|
fq := &datasource.FakeQuerier{}
|
||||||
tc.rule.GroupID = fakeGroup.ID()
|
tc.rule.GroupID = fakeGroup.ID()
|
||||||
tc.rule.q = fq
|
tc.rule.q = fq
|
||||||
tc.rule.state = newRuleState(10)
|
tc.rule.state = &ruleState{entries: make([]StateEntry, 10)}
|
||||||
fq.add(tc.metrics...)
|
fq.Add(tc.metrics...)
|
||||||
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
|
if _, err := tc.rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
||||||
t.Fatalf("unexpected err: %s", err)
|
t.Fatalf("unexpected err: %s", err)
|
||||||
}
|
}
|
||||||
for hash, expAlert := range tc.expAlerts {
|
for hash, expAlert := range tc.expAlerts {
|
||||||
|
@ -989,7 +989,7 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
|
||||||
For: waitFor,
|
For: waitFor,
|
||||||
EvalInterval: waitFor,
|
EvalInterval: waitFor,
|
||||||
alerts: make(map[uint64]*notifier.Alert),
|
alerts: make(map[uint64]*notifier.Alert),
|
||||||
state: newRuleState(10),
|
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||||
}
|
}
|
||||||
return &rule
|
return &rule
|
||||||
}
|
}
|
|
@ -1,8 +1,10 @@
|
||||||
package main
|
package rule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
@ -11,7 +13,7 @@ import (
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/metrics"
|
"github.com/cheggaaa/pb/v3"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
|
@ -21,6 +23,18 @@ import (
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
|
"github.com/VictoriaMetrics/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||||
|
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||||
|
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier")
|
||||||
|
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
|
||||||
|
"which by default is 4 times evaluationInterval of the parent ")
|
||||||
|
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
||||||
|
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||||
|
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||||
)
|
)
|
||||||
|
|
||||||
// Group is an entity for grouping rules
|
// Group is an entity for grouping rules
|
||||||
|
@ -96,7 +110,8 @@ func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[s
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval time.Duration, labels map[string]string) *Group {
|
// NewGroup returns a new group
|
||||||
|
func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval time.Duration, labels map[string]string) *Group {
|
||||||
g := &Group{
|
g := &Group{
|
||||||
Type: cfg.Type,
|
Type: cfg.Type,
|
||||||
Name: cfg.Name,
|
Name: cfg.Name,
|
||||||
|
@ -153,11 +168,11 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||||
return g
|
return g
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *Group) newRule(qb datasource.QuerierBuilder, rule config.Rule) Rule {
|
func (g *Group) newRule(qb datasource.QuerierBuilder, r config.Rule) Rule {
|
||||||
if rule.Alert != "" {
|
if r.Alert != "" {
|
||||||
return newAlertingRule(qb, g, rule)
|
return NewAlertingRule(qb, g, r)
|
||||||
}
|
}
|
||||||
return newRecordingRule(qb, g, rule)
|
return NewRecordingRule(qb, g, r)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ID return unique group ID that consists of
|
// ID return unique group ID that consists of
|
||||||
|
@ -178,8 +193,8 @@ func (g *Group) ID() uint64 {
|
||||||
return hash.Sum64()
|
return hash.Sum64()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restore restores alerts state for group rules
|
// restore restores alerts state for group rules
|
||||||
func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
|
func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
|
||||||
for _, rule := range g.Rules {
|
for _, rule := range g.Rules {
|
||||||
ar, ok := rule.(*AlertingRule)
|
ar, ok := rule.(*AlertingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -195,7 +210,7 @@ func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
|
||||||
Headers: g.Headers,
|
Headers: g.Headers,
|
||||||
Debug: ar.Debug,
|
Debug: ar.Debug,
|
||||||
})
|
})
|
||||||
if err := ar.Restore(ctx, q, ts, lookback); err != nil {
|
if err := ar.restore(ctx, q, ts, lookback); err != nil {
|
||||||
return fmt.Errorf("error while restoring rule %q: %w", rule, err)
|
return fmt.Errorf("error while restoring rule %q: %w", rule, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -205,7 +220,7 @@ func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
|
||||||
// updateWith updates existing group with
|
// updateWith updates existing group with
|
||||||
// passed group object. This function ignores group
|
// passed group object. This function ignores group
|
||||||
// evaluation interval change. It supposed to be updated
|
// evaluation interval change. It supposed to be updated
|
||||||
// in group.start function.
|
// in group.Start function.
|
||||||
// Not thread-safe.
|
// Not thread-safe.
|
||||||
func (g *Group) updateWith(newGroup *Group) error {
|
func (g *Group) updateWith(newGroup *Group) error {
|
||||||
rulesRegistry := make(map[uint64]Rule)
|
rulesRegistry := make(map[uint64]Rule)
|
||||||
|
@ -218,11 +233,11 @@ func (g *Group) updateWith(newGroup *Group) error {
|
||||||
if !ok {
|
if !ok {
|
||||||
// old rule is not present in the new list
|
// old rule is not present in the new list
|
||||||
// so we mark it for removing
|
// so we mark it for removing
|
||||||
g.Rules[i].Close()
|
g.Rules[i].close()
|
||||||
g.Rules[i] = nil
|
g.Rules[i] = nil
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err := or.UpdateWith(nr); err != nil {
|
if err := or.updateWith(nr); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
delete(rulesRegistry, nr.ID())
|
delete(rulesRegistry, nr.ID())
|
||||||
|
@ -255,10 +270,10 @@ func (g *Group) updateWith(newGroup *Group) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// interruptEval interrupts in-flight rules evaluations
|
// InterruptEval interrupts in-flight rules evaluations
|
||||||
// within the group. It is expected that g.evalCancel
|
// within the group. It is expected that g.evalCancel
|
||||||
// will be repopulated after the call.
|
// will be repopulated after the call.
|
||||||
func (g *Group) interruptEval() {
|
func (g *Group) InterruptEval() {
|
||||||
g.mu.RLock()
|
g.mu.RLock()
|
||||||
defer g.mu.RUnlock()
|
defer g.mu.RUnlock()
|
||||||
|
|
||||||
|
@ -267,12 +282,13 @@ func (g *Group) interruptEval() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *Group) close() {
|
// Close stops the group and it's rules, unregisters group metrics
|
||||||
|
func (g *Group) Close() {
|
||||||
if g.doneCh == nil {
|
if g.doneCh == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
close(g.doneCh)
|
close(g.doneCh)
|
||||||
g.interruptEval()
|
g.InterruptEval()
|
||||||
<-g.finishedCh
|
<-g.finishedCh
|
||||||
|
|
||||||
g.metrics.iterationDuration.Unregister()
|
g.metrics.iterationDuration.Unregister()
|
||||||
|
@ -280,19 +296,21 @@ func (g *Group) close() {
|
||||||
g.metrics.iterationMissed.Unregister()
|
g.metrics.iterationMissed.Unregister()
|
||||||
g.metrics.iterationInterval.Unregister()
|
g.metrics.iterationInterval.Unregister()
|
||||||
for _, rule := range g.Rules {
|
for _, rule := range g.Rules {
|
||||||
rule.Close()
|
rule.close()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var skipRandSleepOnGroupStart bool
|
// SkipRandSleepOnGroupStart will skip random sleep delay in group first evaluation
|
||||||
|
var SkipRandSleepOnGroupStart bool
|
||||||
|
|
||||||
func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *remotewrite.Client, rr datasource.QuerierBuilder) {
|
// Start starts group's evaluation
|
||||||
|
func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
||||||
defer func() { close(g.finishedCh) }()
|
defer func() { close(g.finishedCh) }()
|
||||||
|
|
||||||
evalTS := time.Now()
|
evalTS := time.Now()
|
||||||
// sleep random duration to spread group rules evaluation
|
// sleep random duration to spread group rules evaluation
|
||||||
// over time in order to reduce load on datasource.
|
// over time in order to reduce load on datasource.
|
||||||
if !skipRandSleepOnGroupStart {
|
if !SkipRandSleepOnGroupStart {
|
||||||
sleepBeforeStart := delayBeforeStart(evalTS, g.ID(), g.Interval, g.EvalOffset)
|
sleepBeforeStart := delayBeforeStart(evalTS, g.ID(), g.Interval, g.EvalOffset)
|
||||||
g.infof("will start in %v", sleepBeforeStart)
|
g.infof("will start in %v", sleepBeforeStart)
|
||||||
|
|
||||||
|
@ -310,10 +328,10 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
||||||
}
|
}
|
||||||
|
|
||||||
e := &executor{
|
e := &executor{
|
||||||
rw: rw,
|
Rw: rw,
|
||||||
notifiers: nts,
|
Notifiers: nts,
|
||||||
notifierHeaders: g.NotifierHeaders,
|
notifierHeaders: g.NotifierHeaders,
|
||||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||||
}
|
}
|
||||||
|
|
||||||
g.infof("started")
|
g.infof("started")
|
||||||
|
@ -355,7 +373,7 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
||||||
// restore the rules state after the first evaluation
|
// restore the rules state after the first evaluation
|
||||||
// so only active alerts can be restored.
|
// so only active alerts can be restored.
|
||||||
if rr != nil {
|
if rr != nil {
|
||||||
err := g.Restore(ctx, rr, evalTS, *remoteReadLookBack)
|
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
||||||
}
|
}
|
||||||
|
@ -409,6 +427,22 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateWith inserts new group to updateCh
|
||||||
|
func (g *Group) UpdateWith(new *Group) {
|
||||||
|
g.updateCh <- new
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeepCopy returns a deep copy of group
|
||||||
|
func (g *Group) DeepCopy() *Group {
|
||||||
|
g.mu.RLock()
|
||||||
|
data, _ := json.Marshal(g)
|
||||||
|
g.mu.RUnlock()
|
||||||
|
newG := Group{}
|
||||||
|
_ = json.Unmarshal(data, &newG)
|
||||||
|
newG.Rules = g.Rules
|
||||||
|
return &newG
|
||||||
|
}
|
||||||
|
|
||||||
// delayBeforeStart returns a duration on the interval between [ts..ts+interval].
|
// delayBeforeStart returns a duration on the interval between [ts..ts+interval].
|
||||||
// delayBeforeStart accounts for `offset`, so returned duration should be always
|
// delayBeforeStart accounts for `offset`, so returned duration should be always
|
||||||
// bigger than the `offset`.
|
// bigger than the `offset`.
|
||||||
|
@ -438,6 +472,89 @@ func (g *Group) infof(format string, args ...interface{}) {
|
||||||
g.Name, msg, g.Interval, g.EvalOffset, g.Concurrency)
|
g.Name, msg, g.Interval, g.EvalOffset, g.Concurrency)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Replay performs group replay
|
||||||
|
func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoint, replayRuleRetryAttempts int, replayDelay time.Duration, disableProgressBar bool) int {
|
||||||
|
var total int
|
||||||
|
step := g.Interval * time.Duration(maxDataPoint)
|
||||||
|
ri := rangeIterator{start: start, end: end, step: step}
|
||||||
|
iterations := int(end.Sub(start)/step) + 1
|
||||||
|
fmt.Printf("\nGroup %q"+
|
||||||
|
"\ninterval: \t%v"+
|
||||||
|
"\nrequests to make: \t%d"+
|
||||||
|
"\nmax range per request: \t%v\n",
|
||||||
|
g.Name, g.Interval, iterations, step)
|
||||||
|
if g.Limit > 0 {
|
||||||
|
fmt.Printf("\nPlease note, `limit: %d` param has no effect during replay.\n",
|
||||||
|
g.Limit)
|
||||||
|
}
|
||||||
|
for _, rule := range g.Rules {
|
||||||
|
fmt.Printf("> Rule %q (ID: %d)\n", rule, rule.ID())
|
||||||
|
var bar *pb.ProgressBar
|
||||||
|
if !disableProgressBar {
|
||||||
|
bar = pb.StartNew(iterations)
|
||||||
|
}
|
||||||
|
ri.reset()
|
||||||
|
for ri.next() {
|
||||||
|
n, err := replayRule(rule, ri.s, ri.e, rw, replayRuleRetryAttempts)
|
||||||
|
if err != nil {
|
||||||
|
logger.Fatalf("rule %q: %s", rule, err)
|
||||||
|
}
|
||||||
|
total += n
|
||||||
|
if bar != nil {
|
||||||
|
bar.Increment()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if bar != nil {
|
||||||
|
bar.Finish()
|
||||||
|
}
|
||||||
|
// sleep to let remote storage to flush data on-disk
|
||||||
|
// so chained rules could be calculated correctly
|
||||||
|
time.Sleep(replayDelay)
|
||||||
|
}
|
||||||
|
return total
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExecOnce evaluates all the rules under group for once with given timestamp.
|
||||||
|
func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
|
||||||
|
e := &executor{
|
||||||
|
Rw: rw,
|
||||||
|
Notifiers: nts,
|
||||||
|
notifierHeaders: g.NotifierHeaders,
|
||||||
|
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||||
|
}
|
||||||
|
if len(g.Rules) < 1 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
|
||||||
|
return e.execConcurrently(ctx, g.Rules, evalTS, g.Concurrency, resolveDuration, g.Limit)
|
||||||
|
}
|
||||||
|
|
||||||
|
type rangeIterator struct {
|
||||||
|
step time.Duration
|
||||||
|
start, end time.Time
|
||||||
|
|
||||||
|
iter int
|
||||||
|
s, e time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ri *rangeIterator) reset() {
|
||||||
|
ri.iter = 0
|
||||||
|
ri.s, ri.e = time.Time{}, time.Time{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ri *rangeIterator) next() bool {
|
||||||
|
ri.s = ri.start.Add(ri.step * time.Duration(ri.iter))
|
||||||
|
if !ri.end.After(ri.s) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
ri.e = ri.s.Add(ri.step)
|
||||||
|
if ri.e.After(ri.end) {
|
||||||
|
ri.e = ri.end
|
||||||
|
}
|
||||||
|
ri.iter++
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// getResolveDuration returns the duration after which firing alert
|
// getResolveDuration returns the duration after which firing alert
|
||||||
// can be considered as resolved.
|
// can be considered as resolved.
|
||||||
func getResolveDuration(groupInterval, delta, maxDuration time.Duration) time.Duration {
|
func getResolveDuration(groupInterval, delta, maxDuration time.Duration) time.Duration {
|
||||||
|
@ -477,20 +594,22 @@ func (g *Group) adjustReqTimestamp(timestamp time.Time) time.Time {
|
||||||
return timestamp
|
return timestamp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// executor contains group's notify and rw configs
|
||||||
type executor struct {
|
type executor struct {
|
||||||
notifiers func() []notifier.Notifier
|
Notifiers func() []notifier.Notifier
|
||||||
notifierHeaders map[string]string
|
notifierHeaders map[string]string
|
||||||
|
|
||||||
rw *remotewrite.Client
|
Rw remotewrite.RWClient
|
||||||
|
|
||||||
previouslySentSeriesToRWMu sync.Mutex
|
previouslySentSeriesToRWMu sync.Mutex
|
||||||
// previouslySentSeriesToRW stores series sent to RW on previous iteration
|
// PreviouslySentSeriesToRW stores series sent to RW on previous iteration
|
||||||
// map[ruleID]map[ruleLabels][]prompb.Label
|
// map[ruleID]map[ruleLabels][]prompb.Label
|
||||||
// where `ruleID` is ID of the Rule within a Group
|
// where `ruleID` is ID of the Rule within a Group
|
||||||
// and `ruleLabels` is []prompb.Label marshalled to a string
|
// and `ruleLabels` is []prompb.Label marshalled to a string
|
||||||
previouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
|
PreviouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// execConcurrently executes rules concurrently if concurrency>1
|
||||||
func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.Time, concurrency int, resolveDuration time.Duration, limit int) chan error {
|
func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.Time, concurrency int, resolveDuration time.Duration, limit int) chan error {
|
||||||
res := make(chan error, len(rules))
|
res := make(chan error, len(rules))
|
||||||
if concurrency == 1 {
|
if concurrency == 1 {
|
||||||
|
@ -505,14 +624,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
|
||||||
sem := make(chan struct{}, concurrency)
|
sem := make(chan struct{}, concurrency)
|
||||||
go func() {
|
go func() {
|
||||||
wg := sync.WaitGroup{}
|
wg := sync.WaitGroup{}
|
||||||
for _, rule := range rules {
|
for _, r := range rules {
|
||||||
sem <- struct{}{}
|
sem <- struct{}{}
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(r Rule) {
|
go func(r Rule) {
|
||||||
res <- e.exec(ctx, r, ts, resolveDuration, limit)
|
res <- e.exec(ctx, r, ts, resolveDuration, limit)
|
||||||
<-sem
|
<-sem
|
||||||
wg.Done()
|
wg.Done()
|
||||||
}(rule)
|
}(r)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
close(res)
|
close(res)
|
||||||
|
@ -530,10 +649,10 @@ var (
|
||||||
remoteWriteTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
|
remoteWriteTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
|
||||||
)
|
)
|
||||||
|
|
||||||
func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDuration time.Duration, limit int) error {
|
func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDuration time.Duration, limit int) error {
|
||||||
execTotal.Inc()
|
execTotal.Inc()
|
||||||
|
|
||||||
tss, err := rule.Exec(ctx, ts, limit)
|
tss, err := r.exec(ctx, ts, limit)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, context.Canceled) {
|
if errors.Is(err, context.Canceled) {
|
||||||
// the context can be cancelled on graceful shutdown
|
// the context can be cancelled on graceful shutdown
|
||||||
|
@ -541,17 +660,17 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
execErrors.Inc()
|
execErrors.Inc()
|
||||||
return fmt.Errorf("rule %q: failed to execute: %w", rule, err)
|
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if e.rw != nil {
|
if e.Rw != nil {
|
||||||
pushToRW := func(tss []prompbmarshal.TimeSeries) error {
|
pushToRW := func(tss []prompbmarshal.TimeSeries) error {
|
||||||
var lastErr error
|
var lastErr error
|
||||||
for _, ts := range tss {
|
for _, ts := range tss {
|
||||||
remoteWriteTotal.Inc()
|
remoteWriteTotal.Inc()
|
||||||
if err := e.rw.Push(ts); err != nil {
|
if err := e.Rw.Push(ts); err != nil {
|
||||||
remoteWriteErrors.Inc()
|
remoteWriteErrors.Inc()
|
||||||
lastErr = fmt.Errorf("rule %q: remote write failure: %w", rule, err)
|
lastErr = fmt.Errorf("rule %q: remote write failure: %w", r, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return lastErr
|
return lastErr
|
||||||
|
@ -560,13 +679,13 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
staleSeries := e.getStaleSeries(rule, tss, ts)
|
staleSeries := e.getStaleSeries(r, tss, ts)
|
||||||
if err := pushToRW(staleSeries); err != nil {
|
if err := pushToRW(staleSeries); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ar, ok := rule.(*AlertingRule)
|
ar, ok := r.(*AlertingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -578,11 +697,11 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
|
||||||
|
|
||||||
wg := sync.WaitGroup{}
|
wg := sync.WaitGroup{}
|
||||||
errGr := new(utils.ErrGroup)
|
errGr := new(utils.ErrGroup)
|
||||||
for _, nt := range e.notifiers() {
|
for _, nt := range e.Notifiers() {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(nt notifier.Notifier) {
|
go func(nt notifier.Notifier) {
|
||||||
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
|
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
|
||||||
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", rule, nt.Addr(), err))
|
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
|
||||||
}
|
}
|
||||||
wg.Done()
|
wg.Done()
|
||||||
}(nt)
|
}(nt)
|
||||||
|
@ -592,7 +711,7 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
|
||||||
}
|
}
|
||||||
|
|
||||||
// getStaledSeries checks whether there are stale series from previously sent ones.
|
// getStaledSeries checks whether there are stale series from previously sent ones.
|
||||||
func (e *executor) getStaleSeries(rule Rule, tss []prompbmarshal.TimeSeries, timestamp time.Time) []prompbmarshal.TimeSeries {
|
func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timestamp time.Time) []prompbmarshal.TimeSeries {
|
||||||
ruleLabels := make(map[string][]prompbmarshal.Label, len(tss))
|
ruleLabels := make(map[string][]prompbmarshal.Label, len(tss))
|
||||||
for _, ts := range tss {
|
for _, ts := range tss {
|
||||||
// convert labels to strings so we can compare with previously sent series
|
// convert labels to strings so we can compare with previously sent series
|
||||||
|
@ -600,11 +719,11 @@ func (e *executor) getStaleSeries(rule Rule, tss []prompbmarshal.TimeSeries, tim
|
||||||
ruleLabels[key] = ts.Labels
|
ruleLabels[key] = ts.Labels
|
||||||
}
|
}
|
||||||
|
|
||||||
rID := rule.ID()
|
rID := r.ID()
|
||||||
var staleS []prompbmarshal.TimeSeries
|
var staleS []prompbmarshal.TimeSeries
|
||||||
// check whether there are series which disappeared and need to be marked as stale
|
// check whether there are series which disappeared and need to be marked as stale
|
||||||
e.previouslySentSeriesToRWMu.Lock()
|
e.previouslySentSeriesToRWMu.Lock()
|
||||||
for key, labels := range e.previouslySentSeriesToRW[rID] {
|
for key, labels := range e.PreviouslySentSeriesToRW[rID] {
|
||||||
if _, ok := ruleLabels[key]; ok {
|
if _, ok := ruleLabels[key]; ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -613,7 +732,7 @@ func (e *executor) getStaleSeries(rule Rule, tss []prompbmarshal.TimeSeries, tim
|
||||||
staleS = append(staleS, ss)
|
staleS = append(staleS, ss)
|
||||||
}
|
}
|
||||||
// set previous series to current
|
// set previous series to current
|
||||||
e.previouslySentSeriesToRW[rID] = ruleLabels
|
e.PreviouslySentSeriesToRW[rID] = ruleLabels
|
||||||
e.previouslySentSeriesToRWMu.Unlock()
|
e.previouslySentSeriesToRWMu.Unlock()
|
||||||
|
|
||||||
return staleS
|
return staleS
|
||||||
|
@ -631,14 +750,14 @@ func (e *executor) purgeStaleSeries(activeRules []Rule) {
|
||||||
|
|
||||||
for _, rule := range activeRules {
|
for _, rule := range activeRules {
|
||||||
id := rule.ID()
|
id := rule.ID()
|
||||||
prev, ok := e.previouslySentSeriesToRW[id]
|
prev, ok := e.PreviouslySentSeriesToRW[id]
|
||||||
if ok {
|
if ok {
|
||||||
// keep previous series for staleness detection
|
// keep previous series for staleness detection
|
||||||
newPreviouslySentSeriesToRW[id] = prev
|
newPreviouslySentSeriesToRW[id] = prev
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
e.previouslySentSeriesToRW = nil
|
e.PreviouslySentSeriesToRW = nil
|
||||||
e.previouslySentSeriesToRW = newPreviouslySentSeriesToRW
|
e.PreviouslySentSeriesToRW = newPreviouslySentSeriesToRW
|
||||||
|
|
||||||
e.previouslySentSeriesToRWMu.Unlock()
|
e.previouslySentSeriesToRWMu.Unlock()
|
||||||
}
|
}
|
|
@ -1,17 +1,22 @@
|
||||||
package main
|
package rule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"os"
|
||||||
"reflect"
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"gopkg.in/yaml.v2"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||||
|
@ -20,7 +25,15 @@ import (
|
||||||
func init() {
|
func init() {
|
||||||
// Disable rand sleep on group start during tests in order to speed up test execution.
|
// Disable rand sleep on group start during tests in order to speed up test execution.
|
||||||
// Rand sleep is needed only in prod code.
|
// Rand sleep is needed only in prod code.
|
||||||
skipRandSleepOnGroupStart = true
|
SkipRandSleepOnGroupStart = true
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMain(m *testing.M) {
|
||||||
|
if err := templates.Load([]string{}, true); err != nil {
|
||||||
|
fmt.Println("failed to load template for test")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
os.Exit(m.Run())
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestUpdateWith(t *testing.T) {
|
func TestUpdateWith(t *testing.T) {
|
||||||
|
@ -138,7 +151,7 @@ func TestUpdateWith(t *testing.T) {
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
g := &Group{Name: "test"}
|
g := &Group{Name: "test"}
|
||||||
qb := &fakeQuerier{}
|
qb := &datasource.FakeQuerier{}
|
||||||
for _, r := range tc.currentRules {
|
for _, r := range tc.currentRules {
|
||||||
r.ID = config.HashRule(r)
|
r.ID = config.HashRule(r)
|
||||||
g.Rules = append(g.Rules, g.newRule(qb, r))
|
g.Rules = append(g.Rules, g.newRule(qb, r))
|
||||||
|
@ -170,7 +183,7 @@ func TestUpdateWith(t *testing.T) {
|
||||||
if got.ID() != want.ID() {
|
if got.ID() != want.ID() {
|
||||||
t.Fatalf("expected to have rule %q; got %q", want, got)
|
t.Fatalf("expected to have rule %q; got %q", want, got)
|
||||||
}
|
}
|
||||||
if err := compareRules(t, got, want); err != nil {
|
if err := CompareRules(t, got, want); err != nil {
|
||||||
t.Fatalf("comparison error: %s", err)
|
t.Fatalf("comparison error: %s", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -179,17 +192,31 @@ func TestUpdateWith(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGroupStart(t *testing.T) {
|
func TestGroupStart(t *testing.T) {
|
||||||
// TODO: make parsing from string instead of file
|
const (
|
||||||
groups, err := config.Parse([]string{"config/testdata/rules/rules1-good.rules"}, notifier.ValidateTemplates, true)
|
rules = `
|
||||||
|
- name: groupTest
|
||||||
|
rules:
|
||||||
|
- alert: VMRows
|
||||||
|
for: 1ms
|
||||||
|
expr: vm_rows > 0
|
||||||
|
labels:
|
||||||
|
label: bar
|
||||||
|
host: "{{ $labels.instance }}"
|
||||||
|
annotations:
|
||||||
|
summary: "{{ $value }}"
|
||||||
|
`
|
||||||
|
)
|
||||||
|
var groups []config.Group
|
||||||
|
err := yaml.Unmarshal([]byte(rules), &groups)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to parse rules: %s", err)
|
t.Fatalf("failed to parse rules: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fs := &fakeQuerier{}
|
fs := &datasource.FakeQuerier{}
|
||||||
fn := &fakeNotifier{}
|
fn := ¬ifier.FakeNotifier{}
|
||||||
|
|
||||||
const evalInterval = time.Millisecond
|
const evalInterval = time.Millisecond
|
||||||
g := newGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
|
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
|
||||||
g.Concurrency = 2
|
g.Concurrency = 2
|
||||||
|
|
||||||
const inst1, inst2, job = "foo", "bar", "baz"
|
const inst1, inst2, job = "foo", "bar", "baz"
|
||||||
|
@ -204,7 +231,7 @@ func TestGroupStart(t *testing.T) {
|
||||||
alert1.State = notifier.StateFiring
|
alert1.State = notifier.StateFiring
|
||||||
// add external label
|
// add external label
|
||||||
alert1.Labels["cluster"] = "east-1"
|
alert1.Labels["cluster"] = "east-1"
|
||||||
// add rule labels - see config/testdata/rules1-good.rules
|
// add rule labels
|
||||||
alert1.Labels["label"] = "bar"
|
alert1.Labels["label"] = "bar"
|
||||||
alert1.Labels["host"] = inst1
|
alert1.Labels["host"] = inst1
|
||||||
// add service labels
|
// add service labels
|
||||||
|
@ -219,7 +246,7 @@ func TestGroupStart(t *testing.T) {
|
||||||
alert2.State = notifier.StateFiring
|
alert2.State = notifier.StateFiring
|
||||||
// add external label
|
// add external label
|
||||||
alert2.Labels["cluster"] = "east-1"
|
alert2.Labels["cluster"] = "east-1"
|
||||||
// add rule labels - see config/testdata/rules1-good.rules
|
// add rule labels
|
||||||
alert2.Labels["label"] = "bar"
|
alert2.Labels["label"] = "bar"
|
||||||
alert2.Labels["host"] = inst2
|
alert2.Labels["host"] = inst2
|
||||||
// add service labels
|
// add service labels
|
||||||
|
@ -228,40 +255,40 @@ func TestGroupStart(t *testing.T) {
|
||||||
alert2.ID = hash(alert2.Labels)
|
alert2.ID = hash(alert2.Labels)
|
||||||
|
|
||||||
finished := make(chan struct{})
|
finished := make(chan struct{})
|
||||||
fs.add(m1)
|
fs.Add(m1)
|
||||||
fs.add(m2)
|
fs.Add(m2)
|
||||||
go func() {
|
go func() {
|
||||||
g.start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
|
g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
|
||||||
close(finished)
|
close(finished)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// wait for multiple evals
|
// wait for multiple evals
|
||||||
time.Sleep(20 * evalInterval)
|
time.Sleep(20 * evalInterval)
|
||||||
|
|
||||||
gotAlerts := fn.getAlerts()
|
gotAlerts := fn.GetAlerts()
|
||||||
expectedAlerts := []notifier.Alert{*alert1, *alert2}
|
expectedAlerts := []notifier.Alert{*alert1, *alert2}
|
||||||
compareAlerts(t, expectedAlerts, gotAlerts)
|
compareAlerts(t, expectedAlerts, gotAlerts)
|
||||||
|
|
||||||
gotAlertsNum := fn.getCounter()
|
gotAlertsNum := fn.GetCounter()
|
||||||
if gotAlertsNum < len(expectedAlerts)*2 {
|
if gotAlertsNum < len(expectedAlerts)*2 {
|
||||||
t.Fatalf("expected to receive at least %d alerts; got %d instead",
|
t.Fatalf("expected to receive at least %d alerts; got %d instead",
|
||||||
len(expectedAlerts)*2, gotAlertsNum)
|
len(expectedAlerts)*2, gotAlertsNum)
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset previous data
|
// reset previous data
|
||||||
fs.reset()
|
fs.Reset()
|
||||||
// and set only one datapoint for response
|
// and set only one datapoint for response
|
||||||
fs.add(m1)
|
fs.Add(m1)
|
||||||
|
|
||||||
// wait for multiple evals
|
// wait for multiple evals
|
||||||
time.Sleep(20 * evalInterval)
|
time.Sleep(20 * evalInterval)
|
||||||
|
|
||||||
gotAlerts = fn.getAlerts()
|
gotAlerts = fn.GetAlerts()
|
||||||
alert2.State = notifier.StateInactive
|
alert2.State = notifier.StateInactive
|
||||||
expectedAlerts = []notifier.Alert{*alert1, *alert2}
|
expectedAlerts = []notifier.Alert{*alert1, *alert2}
|
||||||
compareAlerts(t, expectedAlerts, gotAlerts)
|
compareAlerts(t, expectedAlerts, gotAlerts)
|
||||||
|
|
||||||
g.close()
|
g.Close()
|
||||||
<-finished
|
<-finished
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -294,15 +321,15 @@ func TestResolveDuration(t *testing.T) {
|
||||||
func TestGetStaleSeries(t *testing.T) {
|
func TestGetStaleSeries(t *testing.T) {
|
||||||
ts := time.Now()
|
ts := time.Now()
|
||||||
e := &executor{
|
e := &executor{
|
||||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||||
}
|
}
|
||||||
f := func(rule Rule, labels, expLabels [][]prompbmarshal.Label) {
|
f := func(r Rule, labels, expLabels [][]prompbmarshal.Label) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
var tss []prompbmarshal.TimeSeries
|
var tss []prompbmarshal.TimeSeries
|
||||||
for _, l := range labels {
|
for _, l := range labels {
|
||||||
tss = append(tss, newTimeSeriesPB([]float64{1}, []int64{ts.Unix()}, l))
|
tss = append(tss, newTimeSeriesPB([]float64{1}, []int64{ts.Unix()}, l))
|
||||||
}
|
}
|
||||||
staleS := e.getStaleSeries(rule, tss, ts)
|
staleS := e.getStaleSeries(r, tss, ts)
|
||||||
if staleS == nil && expLabels == nil {
|
if staleS == nil && expLabels == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -387,7 +414,7 @@ func TestPurgeStaleSeries(t *testing.T) {
|
||||||
f := func(curRules, newRules, expStaleRules []Rule) {
|
f := func(curRules, newRules, expStaleRules []Rule) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
e := &executor{
|
e := &executor{
|
||||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||||
}
|
}
|
||||||
// seed executor with series for
|
// seed executor with series for
|
||||||
// current rules
|
// current rules
|
||||||
|
@ -397,13 +424,13 @@ func TestPurgeStaleSeries(t *testing.T) {
|
||||||
|
|
||||||
e.purgeStaleSeries(newRules)
|
e.purgeStaleSeries(newRules)
|
||||||
|
|
||||||
if len(e.previouslySentSeriesToRW) != len(expStaleRules) {
|
if len(e.PreviouslySentSeriesToRW) != len(expStaleRules) {
|
||||||
t.Fatalf("expected to get %d stale series, got %d",
|
t.Fatalf("expected to get %d stale series, got %d",
|
||||||
len(expStaleRules), len(e.previouslySentSeriesToRW))
|
len(expStaleRules), len(e.PreviouslySentSeriesToRW))
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, exp := range expStaleRules {
|
for _, exp := range expStaleRules {
|
||||||
if _, ok := e.previouslySentSeriesToRW[exp.ID()]; !ok {
|
if _, ok := e.PreviouslySentSeriesToRW[exp.ID()]; !ok {
|
||||||
t.Fatalf("expected to have rule %d; got nil instead", exp.ID())
|
t.Fatalf("expected to have rule %d; got nil instead", exp.ID())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -438,17 +465,17 @@ func TestPurgeStaleSeries(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFaultyNotifier(t *testing.T) {
|
func TestFaultyNotifier(t *testing.T) {
|
||||||
fq := &fakeQuerier{}
|
fq := &datasource.FakeQuerier{}
|
||||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||||
|
|
||||||
r := newTestAlertingRule("instant", 0)
|
r := newTestAlertingRule("instant", 0)
|
||||||
r.q = fq
|
r.q = fq
|
||||||
|
|
||||||
fn := &fakeNotifier{}
|
fn := ¬ifier.FakeNotifier{}
|
||||||
e := &executor{
|
e := &executor{
|
||||||
notifiers: func() []notifier.Notifier {
|
Notifiers: func() []notifier.Notifier {
|
||||||
return []notifier.Notifier{
|
return []notifier.Notifier{
|
||||||
&faultyNotifier{},
|
¬ifier.FaultyNotifier{},
|
||||||
fn,
|
fn,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -464,7 +491,7 @@ func TestFaultyNotifier(t *testing.T) {
|
||||||
tn := time.Now()
|
tn := time.Now()
|
||||||
deadline := tn.Add(delay / 2)
|
deadline := tn.Add(delay / 2)
|
||||||
for {
|
for {
|
||||||
if fn.getCounter() > 0 {
|
if fn.GetCounter() > 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if tn.After(deadline) {
|
if tn.After(deadline) {
|
||||||
|
@ -477,18 +504,18 @@ func TestFaultyNotifier(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFaultyRW(t *testing.T) {
|
func TestFaultyRW(t *testing.T) {
|
||||||
fq := &fakeQuerier{}
|
fq := &datasource.FakeQuerier{}
|
||||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||||
|
|
||||||
r := &RecordingRule{
|
r := &RecordingRule{
|
||||||
Name: "test",
|
Name: "test",
|
||||||
state: newRuleState(10),
|
|
||||||
q: fq,
|
q: fq,
|
||||||
|
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||||
}
|
}
|
||||||
|
|
||||||
e := &executor{
|
e := &executor{
|
||||||
rw: &remotewrite.Client{},
|
Rw: &remotewrite.Client{},
|
||||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||||
}
|
}
|
||||||
|
|
||||||
err := e.exec(context.Background(), r, time.Now(), 0, 10)
|
err := e.exec(context.Background(), r, time.Now(), 0, 10)
|
||||||
|
@ -498,23 +525,38 @@ func TestFaultyRW(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCloseWithEvalInterruption(t *testing.T) {
|
func TestCloseWithEvalInterruption(t *testing.T) {
|
||||||
groups, err := config.Parse([]string{"config/testdata/rules/rules1-good.rules"}, notifier.ValidateTemplates, true)
|
const (
|
||||||
|
rules = `
|
||||||
|
- name: groupTest
|
||||||
|
rules:
|
||||||
|
- alert: VMRows
|
||||||
|
for: 1ms
|
||||||
|
expr: vm_rows > 0
|
||||||
|
labels:
|
||||||
|
label: bar
|
||||||
|
host: "{{ $labels.instance }}"
|
||||||
|
annotations:
|
||||||
|
summary: "{{ $value }}"
|
||||||
|
`
|
||||||
|
)
|
||||||
|
var groups []config.Group
|
||||||
|
err := yaml.Unmarshal([]byte(rules), &groups)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to parse rules: %s", err)
|
t.Fatalf("failed to parse rules: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
const delay = time.Second * 2
|
const delay = time.Second * 2
|
||||||
fq := &fakeQuerierWithDelay{delay: delay}
|
fq := &datasource.FakeQuerierWithDelay{Delay: delay}
|
||||||
|
|
||||||
const evalInterval = time.Millisecond
|
const evalInterval = time.Millisecond
|
||||||
g := newGroup(groups[0], fq, evalInterval, nil)
|
g := NewGroup(groups[0], fq, evalInterval, nil)
|
||||||
|
|
||||||
go g.start(context.Background(), nil, nil, nil)
|
go g.Start(context.Background(), nil, nil, nil)
|
||||||
|
|
||||||
time.Sleep(evalInterval * 20)
|
time.Sleep(evalInterval * 20)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
g.close()
|
g.Close()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
deadline := time.Tick(delay / 2)
|
deadline := time.Tick(delay / 2)
|
||||||
|
@ -637,3 +679,81 @@ func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRangeIterator(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
ri rangeIterator
|
||||||
|
result [][2]time.Time
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
ri: rangeIterator{
|
||||||
|
start: parseTime(t, "2021-01-01T12:00:00.000Z"),
|
||||||
|
end: parseTime(t, "2021-01-01T12:30:00.000Z"),
|
||||||
|
step: 5 * time.Minute,
|
||||||
|
},
|
||||||
|
result: [][2]time.Time{
|
||||||
|
{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:05:00.000Z")},
|
||||||
|
{parseTime(t, "2021-01-01T12:05:00.000Z"), parseTime(t, "2021-01-01T12:10:00.000Z")},
|
||||||
|
{parseTime(t, "2021-01-01T12:10:00.000Z"), parseTime(t, "2021-01-01T12:15:00.000Z")},
|
||||||
|
{parseTime(t, "2021-01-01T12:15:00.000Z"), parseTime(t, "2021-01-01T12:20:00.000Z")},
|
||||||
|
{parseTime(t, "2021-01-01T12:20:00.000Z"), parseTime(t, "2021-01-01T12:25:00.000Z")},
|
||||||
|
{parseTime(t, "2021-01-01T12:25:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ri: rangeIterator{
|
||||||
|
start: parseTime(t, "2021-01-01T12:00:00.000Z"),
|
||||||
|
end: parseTime(t, "2021-01-01T12:30:00.000Z"),
|
||||||
|
step: 45 * time.Minute,
|
||||||
|
},
|
||||||
|
result: [][2]time.Time{
|
||||||
|
{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
|
||||||
|
{parseTime(t, "2021-01-01T12:30:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ri: rangeIterator{
|
||||||
|
start: parseTime(t, "2021-01-01T12:00:12.000Z"),
|
||||||
|
end: parseTime(t, "2021-01-01T12:00:17.000Z"),
|
||||||
|
step: time.Second,
|
||||||
|
},
|
||||||
|
result: [][2]time.Time{
|
||||||
|
{parseTime(t, "2021-01-01T12:00:12.000Z"), parseTime(t, "2021-01-01T12:00:13.000Z")},
|
||||||
|
{parseTime(t, "2021-01-01T12:00:13.000Z"), parseTime(t, "2021-01-01T12:00:14.000Z")},
|
||||||
|
{parseTime(t, "2021-01-01T12:00:14.000Z"), parseTime(t, "2021-01-01T12:00:15.000Z")},
|
||||||
|
{parseTime(t, "2021-01-01T12:00:15.000Z"), parseTime(t, "2021-01-01T12:00:16.000Z")},
|
||||||
|
{parseTime(t, "2021-01-01T12:00:16.000Z"), parseTime(t, "2021-01-01T12:00:17.000Z")},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, tc := range testCases {
|
||||||
|
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
|
||||||
|
var j int
|
||||||
|
for tc.ri.next() {
|
||||||
|
if len(tc.result) < j+1 {
|
||||||
|
t.Fatalf("unexpected result for iterator on step %d: %v - %v",
|
||||||
|
j, tc.ri.s, tc.ri.e)
|
||||||
|
}
|
||||||
|
s, e := tc.ri.s, tc.ri.e
|
||||||
|
expS, expE := tc.result[j][0], tc.result[j][1]
|
||||||
|
if s != expS {
|
||||||
|
t.Fatalf("expected to get start=%v; got %v", expS, s)
|
||||||
|
}
|
||||||
|
if e != expE {
|
||||||
|
t.Fatalf("expected to get end=%v; got %v", expE, e)
|
||||||
|
}
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseTime(t *testing.T, s string) time.Time {
|
||||||
|
t.Helper()
|
||||||
|
tt, err := time.Parse("2006-01-02T15:04:05.000Z", s)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
return tt
|
||||||
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
package main
|
package rule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
@ -49,7 +49,8 @@ func (rr *RecordingRule) ID() uint64 {
|
||||||
return rr.RuleID
|
return rr.RuleID
|
||||||
}
|
}
|
||||||
|
|
||||||
func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
|
// NewRecordingRule creates a new RecordingRule
|
||||||
|
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
|
||||||
rr := &RecordingRule{
|
rr := &RecordingRule{
|
||||||
Type: group.Type,
|
Type: group.Type,
|
||||||
RuleID: cfg.ID,
|
RuleID: cfg.ID,
|
||||||
|
@ -66,17 +67,22 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
entrySize := *ruleUpdateEntriesLimit
|
||||||
if cfg.UpdateEntriesLimit != nil {
|
if cfg.UpdateEntriesLimit != nil {
|
||||||
rr.state = newRuleState(*cfg.UpdateEntriesLimit)
|
entrySize = *cfg.UpdateEntriesLimit
|
||||||
} else {
|
}
|
||||||
rr.state = newRuleState(*ruleUpdateEntriesLimit)
|
if entrySize < 1 {
|
||||||
|
entrySize = 1
|
||||||
|
}
|
||||||
|
rr.state = &ruleState{
|
||||||
|
entries: make([]StateEntry, entrySize),
|
||||||
}
|
}
|
||||||
|
|
||||||
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
|
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
|
||||||
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
||||||
func() float64 {
|
func() float64 {
|
||||||
e := rr.state.getLast()
|
e := rr.state.getLast()
|
||||||
if e.err == nil {
|
if e.Err == nil {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
return 1
|
return 1
|
||||||
|
@ -84,21 +90,21 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
||||||
rr.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_last_evaluation_samples{%s}`, labels),
|
rr.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_last_evaluation_samples{%s}`, labels),
|
||||||
func() float64 {
|
func() float64 {
|
||||||
e := rr.state.getLast()
|
e := rr.state.getLast()
|
||||||
return float64(e.samples)
|
return float64(e.Samples)
|
||||||
})
|
})
|
||||||
return rr
|
return rr
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close unregisters rule metrics
|
// close unregisters rule metrics
|
||||||
func (rr *RecordingRule) Close() {
|
func (rr *RecordingRule) close() {
|
||||||
rr.metrics.errors.Unregister()
|
rr.metrics.errors.Unregister()
|
||||||
rr.metrics.samples.Unregister()
|
rr.metrics.samples.Unregister()
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExecRange executes recording rule on the given time range similarly to Exec.
|
// execRange executes recording rule on the given time range similarly to Exec.
|
||||||
// It doesn't update internal states of the Rule and meant to be used just
|
// It doesn't update internal states of the Rule and meant to be used just
|
||||||
// to get time series for backfilling.
|
// to get time series for backfilling.
|
||||||
func (rr *RecordingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
func (rr *RecordingRule) execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
||||||
res, err := rr.q.QueryRange(ctx, rr.Expr, start, end)
|
res, err := rr.q.QueryRange(ctx, rr.Expr, start, end)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -117,17 +123,17 @@ func (rr *RecordingRule) ExecRange(ctx context.Context, start, end time.Time) ([
|
||||||
return tss, nil
|
return tss, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exec executes RecordingRule expression via the given Querier.
|
// exec executes RecordingRule expression via the given Querier.
|
||||||
func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
res, req, err := rr.q.Query(ctx, rr.Expr, ts)
|
res, req, err := rr.q.Query(ctx, rr.Expr, ts)
|
||||||
curState := ruleStateEntry{
|
curState := StateEntry{
|
||||||
time: start,
|
Time: start,
|
||||||
at: ts,
|
At: ts,
|
||||||
duration: time.Since(start),
|
Duration: time.Since(start),
|
||||||
samples: len(res.Data),
|
Samples: len(res.Data),
|
||||||
seriesFetched: res.SeriesFetched,
|
SeriesFetched: res.SeriesFetched,
|
||||||
curl: requestToCurl(req),
|
Curl: requestToCurl(req),
|
||||||
}
|
}
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
|
@ -135,15 +141,15 @@ func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]p
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
curState.err = fmt.Errorf("failed to execute query %q: %w", rr.Expr, err)
|
curState.Err = fmt.Errorf("failed to execute query %q: %w", rr.Expr, err)
|
||||||
return nil, curState.err
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
|
|
||||||
qMetrics := res.Data
|
qMetrics := res.Data
|
||||||
numSeries := len(qMetrics)
|
numSeries := len(qMetrics)
|
||||||
if limit > 0 && numSeries > limit {
|
if limit > 0 && numSeries > limit {
|
||||||
curState.err = fmt.Errorf("exec exceeded limit of %d with %d series", limit, numSeries)
|
curState.Err = fmt.Errorf("exec exceeded limit of %d with %d series", limit, numSeries)
|
||||||
return nil, curState.err
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
|
|
||||||
duplicates := make(map[string]struct{}, len(qMetrics))
|
duplicates := make(map[string]struct{}, len(qMetrics))
|
||||||
|
@ -152,8 +158,8 @@ func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]p
|
||||||
ts := rr.toTimeSeries(r)
|
ts := rr.toTimeSeries(r)
|
||||||
key := stringifyLabels(ts)
|
key := stringifyLabels(ts)
|
||||||
if _, ok := duplicates[key]; ok {
|
if _, ok := duplicates[key]; ok {
|
||||||
curState.err = fmt.Errorf("original metric %v; resulting labels %q: %w", r, key, errDuplicate)
|
curState.Err = fmt.Errorf("original metric %v; resulting labels %q: %w", r, key, errDuplicate)
|
||||||
return nil, curState.err
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
duplicates[key] = struct{}{}
|
duplicates[key] = struct{}{}
|
||||||
tss = append(tss, ts)
|
tss = append(tss, ts)
|
||||||
|
@ -193,8 +199,8 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompbmarshal.TimeSer
|
||||||
return newTimeSeries(m.Values, m.Timestamps, labels)
|
return newTimeSeries(m.Values, m.Timestamps, labels)
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateWith copies all significant fields.
|
// updateWith copies all significant fields.
|
||||||
func (rr *RecordingRule) UpdateWith(r Rule) error {
|
func (rr *RecordingRule) updateWith(r Rule) error {
|
||||||
nr, ok := r.(*RecordingRule)
|
nr, ok := r.(*RecordingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("BUG: attempt to update recroding rule with wrong type %#v", r)
|
return fmt.Errorf("BUG: attempt to update recroding rule with wrong type %#v", r)
|
||||||
|
@ -204,32 +210,3 @@ func (rr *RecordingRule) UpdateWith(r Rule) error {
|
||||||
rr.q = nr.q
|
rr.q = nr.q
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToAPI returns Rule's representation in form
|
|
||||||
// of APIRule
|
|
||||||
func (rr *RecordingRule) ToAPI() APIRule {
|
|
||||||
lastState := rr.state.getLast()
|
|
||||||
r := APIRule{
|
|
||||||
Type: "recording",
|
|
||||||
DatasourceType: rr.Type.String(),
|
|
||||||
Name: rr.Name,
|
|
||||||
Query: rr.Expr,
|
|
||||||
Labels: rr.Labels,
|
|
||||||
LastEvaluation: lastState.time,
|
|
||||||
EvaluationTime: lastState.duration.Seconds(),
|
|
||||||
Health: "ok",
|
|
||||||
LastSamples: lastState.samples,
|
|
||||||
LastSeriesFetched: lastState.seriesFetched,
|
|
||||||
MaxUpdates: rr.state.size(),
|
|
||||||
Updates: rr.state.getAll(),
|
|
||||||
|
|
||||||
// encode as strings to avoid rounding
|
|
||||||
ID: fmt.Sprintf("%d", rr.ID()),
|
|
||||||
GroupID: fmt.Sprintf("%d", rr.GroupID),
|
|
||||||
}
|
|
||||||
if lastState.err != nil {
|
|
||||||
r.LastError = lastState.err.Error()
|
|
||||||
r.Health = "err"
|
|
||||||
}
|
|
||||||
return r
|
|
||||||
}
|
|
|
@ -1,4 +1,4 @@
|
||||||
package main
|
package rule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
@ -56,10 +56,12 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||||
Name: "job:foo",
|
Name: "job:foo",
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"source": "test",
|
"source": "test",
|
||||||
}},
|
},
|
||||||
|
},
|
||||||
[]datasource.Metric{
|
[]datasource.Metric{
|
||||||
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
|
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
|
||||||
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar")},
|
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar"),
|
||||||
|
},
|
||||||
[]prompbmarshal.TimeSeries{
|
[]prompbmarshal.TimeSeries{
|
||||||
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
|
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||||
"__name__": "job:foo",
|
"__name__": "job:foo",
|
||||||
|
@ -76,11 +78,11 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||||
}
|
}
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||||
fq := &fakeQuerier{}
|
fq := &datasource.FakeQuerier{}
|
||||||
fq.add(tc.metrics...)
|
fq.Add(tc.metrics...)
|
||||||
tc.rule.q = fq
|
tc.rule.q = fq
|
||||||
tc.rule.state = newRuleState(10)
|
tc.rule.state = &ruleState{entries: make([]StateEntry, 10)}
|
||||||
tss, err := tc.rule.Exec(context.TODO(), time.Now(), 0)
|
tss, err := tc.rule.exec(context.TODO(), time.Now(), 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected Exec err: %s", err)
|
t.Fatalf("unexpected Exec err: %s", err)
|
||||||
}
|
}
|
||||||
|
@ -141,7 +143,8 @@ func TestRecordingRule_ExecRange(t *testing.T) {
|
||||||
}},
|
}},
|
||||||
[]datasource.Metric{
|
[]datasource.Metric{
|
||||||
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
|
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
|
||||||
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar")},
|
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar"),
|
||||||
|
},
|
||||||
[]prompbmarshal.TimeSeries{
|
[]prompbmarshal.TimeSeries{
|
||||||
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
|
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
|
||||||
"__name__": "job:foo",
|
"__name__": "job:foo",
|
||||||
|
@ -158,10 +161,10 @@ func TestRecordingRule_ExecRange(t *testing.T) {
|
||||||
}
|
}
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.rule.Name, func(t *testing.T) {
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
||||||
fq := &fakeQuerier{}
|
fq := &datasource.FakeQuerier{}
|
||||||
fq.add(tc.metrics...)
|
fq.Add(tc.metrics...)
|
||||||
tc.rule.q = fq
|
tc.rule.q = fq
|
||||||
tss, err := tc.rule.ExecRange(context.TODO(), time.Now(), time.Now())
|
tss, err := tc.rule.execRange(context.TODO(), time.Now(), time.Now())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected Exec err: %s", err)
|
t.Fatalf("unexpected Exec err: %s", err)
|
||||||
}
|
}
|
||||||
|
@ -198,15 +201,15 @@ func TestRecordingRuleLimit(t *testing.T) {
|
||||||
metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
|
metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
|
||||||
metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
|
metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
|
||||||
}
|
}
|
||||||
rule := &RecordingRule{Name: "job:foo", state: newRuleState(10), Labels: map[string]string{
|
rule := &RecordingRule{Name: "job:foo", state: &ruleState{entries: make([]StateEntry, 10)}, Labels: map[string]string{
|
||||||
"source": "test_limit",
|
"source": "test_limit",
|
||||||
}}
|
}}
|
||||||
var err error
|
var err error
|
||||||
for _, testCase := range testCases {
|
for _, testCase := range testCases {
|
||||||
fq := &fakeQuerier{}
|
fq := &datasource.FakeQuerier{}
|
||||||
fq.add(testMetrics...)
|
fq.Add(testMetrics...)
|
||||||
rule.q = fq
|
rule.q = fq
|
||||||
_, err = rule.Exec(context.TODO(), timestamp, testCase.limit)
|
_, err = rule.exec(context.TODO(), timestamp, testCase.limit)
|
||||||
if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
|
if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -215,18 +218,17 @@ func TestRecordingRuleLimit(t *testing.T) {
|
||||||
|
|
||||||
func TestRecordingRule_ExecNegative(t *testing.T) {
|
func TestRecordingRule_ExecNegative(t *testing.T) {
|
||||||
rr := &RecordingRule{
|
rr := &RecordingRule{
|
||||||
Name: "job:foo",
|
Name: "job:foo",
|
||||||
state: newRuleState(10),
|
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"job": "test",
|
"job": "test",
|
||||||
},
|
},
|
||||||
|
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||||
}
|
}
|
||||||
|
fq := &datasource.FakeQuerier{}
|
||||||
fq := &fakeQuerier{}
|
|
||||||
expErr := "connection reset by peer"
|
expErr := "connection reset by peer"
|
||||||
fq.setErr(errors.New(expErr))
|
fq.SetErr(errors.New(expErr))
|
||||||
rr.q = fq
|
rr.q = fq
|
||||||
_, err := rr.Exec(context.TODO(), time.Now(), 0)
|
_, err := rr.exec(context.TODO(), time.Now(), 0)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatalf("expected to get err; got nil")
|
t.Fatalf("expected to get err; got nil")
|
||||||
}
|
}
|
||||||
|
@ -234,14 +236,14 @@ func TestRecordingRule_ExecNegative(t *testing.T) {
|
||||||
t.Fatalf("expected to get err %q; got %q insterad", expErr, err)
|
t.Fatalf("expected to get err %q; got %q insterad", expErr, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fq.reset()
|
fq.Reset()
|
||||||
|
|
||||||
// add metrics which differs only by `job` label
|
// add metrics which differs only by `job` label
|
||||||
// which will be overridden by rule
|
// which will be overridden by rule
|
||||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"))
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"))
|
||||||
fq.add(metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "bar"))
|
fq.Add(metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "bar"))
|
||||||
|
|
||||||
_, err = rr.Exec(context.TODO(), time.Now(), 0)
|
_, err = rr.exec(context.TODO(), time.Now(), 0)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatalf("expected to get err; got nil")
|
t.Fatalf("expected to get err; got nil")
|
||||||
}
|
}
|
174
app/vmalert/rule/rule.go
Normal file
174
app/vmalert/rule/rule.go
Normal file
|
@ -0,0 +1,174 @@
|
||||||
|
package rule
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Rule represents alerting or recording rule
|
||||||
|
// that has unique ID, can be Executed and
|
||||||
|
// updated with other Rule.
|
||||||
|
type Rule interface {
|
||||||
|
// ID returns unique ID that may be used for
|
||||||
|
// identifying this Rule among others.
|
||||||
|
ID() uint64
|
||||||
|
// exec executes the rule with given context at the given timestamp and limit.
|
||||||
|
// returns an err if number of resulting time series exceeds the limit.
|
||||||
|
exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error)
|
||||||
|
// execRange executes the rule on the given time range.
|
||||||
|
execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error)
|
||||||
|
// updateWith performs modification of current Rule
|
||||||
|
// with fields of the given Rule.
|
||||||
|
updateWith(Rule) error
|
||||||
|
// close performs the shutdown procedures for rule
|
||||||
|
// such as metrics unregister
|
||||||
|
close()
|
||||||
|
}
|
||||||
|
|
||||||
|
var errDuplicate = errors.New("result contains metrics with the same labelset after applying rule labels. See https://docs.victoriametrics.com/vmalert.html#series-with-the-same-labelset for details")
|
||||||
|
|
||||||
|
type ruleState struct {
|
||||||
|
sync.RWMutex
|
||||||
|
entries []StateEntry
|
||||||
|
cur int
|
||||||
|
}
|
||||||
|
|
||||||
|
// StateEntry stores rule's execution states
|
||||||
|
type StateEntry struct {
|
||||||
|
// stores last moment of time rule.Exec was called
|
||||||
|
Time time.Time
|
||||||
|
// stores the timesteamp with which rule.Exec was called
|
||||||
|
At time.Time
|
||||||
|
// stores the duration of the last rule.Exec call
|
||||||
|
Duration time.Duration
|
||||||
|
// stores last error that happened in Exec func
|
||||||
|
// resets on every successful Exec
|
||||||
|
// may be used as Health ruleState
|
||||||
|
Err error
|
||||||
|
// stores the number of samples returned during
|
||||||
|
// the last evaluation
|
||||||
|
Samples int
|
||||||
|
// stores the number of time series fetched during
|
||||||
|
// the last evaluation.
|
||||||
|
// Is supported by VictoriaMetrics only, starting from v1.90.0
|
||||||
|
// If seriesFetched == nil, then this attribute was missing in
|
||||||
|
// datasource response (unsupported).
|
||||||
|
SeriesFetched *int
|
||||||
|
// stores the curl command reflecting the HTTP request used during rule.Exec
|
||||||
|
Curl string
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetLastEntry returns latest stateEntry of rule
|
||||||
|
func GetLastEntry(r Rule) StateEntry {
|
||||||
|
if rule, ok := r.(*AlertingRule); ok {
|
||||||
|
return rule.state.getLast()
|
||||||
|
}
|
||||||
|
if rule, ok := r.(*RecordingRule); ok {
|
||||||
|
return rule.state.getLast()
|
||||||
|
}
|
||||||
|
return StateEntry{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRuleStateSize returns size of rule stateEntry
|
||||||
|
func GetRuleStateSize(r Rule) int {
|
||||||
|
if rule, ok := r.(*AlertingRule); ok {
|
||||||
|
return rule.state.size()
|
||||||
|
}
|
||||||
|
if rule, ok := r.(*RecordingRule); ok {
|
||||||
|
return rule.state.size()
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAllRuleState returns rule entire stateEntries
|
||||||
|
func GetAllRuleState(r Rule) []StateEntry {
|
||||||
|
if rule, ok := r.(*AlertingRule); ok {
|
||||||
|
return rule.state.getAll()
|
||||||
|
}
|
||||||
|
if rule, ok := r.(*RecordingRule); ok {
|
||||||
|
return rule.state.getAll()
|
||||||
|
}
|
||||||
|
return []StateEntry{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ruleState) size() int {
|
||||||
|
s.RLock()
|
||||||
|
defer s.RUnlock()
|
||||||
|
return len(s.entries)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ruleState) getLast() StateEntry {
|
||||||
|
s.RLock()
|
||||||
|
defer s.RUnlock()
|
||||||
|
if len(s.entries) == 0 {
|
||||||
|
return StateEntry{}
|
||||||
|
}
|
||||||
|
return s.entries[s.cur]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ruleState) getAll() []StateEntry {
|
||||||
|
entries := make([]StateEntry, 0)
|
||||||
|
|
||||||
|
s.RLock()
|
||||||
|
defer s.RUnlock()
|
||||||
|
|
||||||
|
cur := s.cur
|
||||||
|
for {
|
||||||
|
e := s.entries[cur]
|
||||||
|
if !e.Time.IsZero() || !e.At.IsZero() {
|
||||||
|
entries = append(entries, e)
|
||||||
|
}
|
||||||
|
cur--
|
||||||
|
if cur < 0 {
|
||||||
|
cur = cap(s.entries) - 1
|
||||||
|
}
|
||||||
|
if cur == s.cur {
|
||||||
|
return entries
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ruleState) add(e StateEntry) {
|
||||||
|
s.Lock()
|
||||||
|
defer s.Unlock()
|
||||||
|
|
||||||
|
s.cur++
|
||||||
|
if s.cur > cap(s.entries)-1 {
|
||||||
|
s.cur = 0
|
||||||
|
}
|
||||||
|
s.entries[s.cur] = e
|
||||||
|
}
|
||||||
|
|
||||||
|
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
|
||||||
|
var err error
|
||||||
|
var tss []prompbmarshal.TimeSeries
|
||||||
|
for i := 0; i < replayRuleRetryAttempts; i++ {
|
||||||
|
tss, err = r.execRange(context.Background(), start, end)
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
logger.Errorf("attempt %d to execute rule %q failed: %s", i+1, r, err)
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
}
|
||||||
|
if err != nil { // means all attempts failed
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if len(tss) < 1 {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
var n int
|
||||||
|
for _, ts := range tss {
|
||||||
|
if err := rw.Push(ts); err != nil {
|
||||||
|
return n, fmt.Errorf("remote write failure: %s", err)
|
||||||
|
}
|
||||||
|
n += len(ts.Samples)
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
|
}
|
81
app/vmalert/rule/rule_test.go
Normal file
81
app/vmalert/rule/rule_test.go
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
package rule
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRule_state(t *testing.T) {
|
||||||
|
stateEntriesN := 20
|
||||||
|
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, stateEntriesN)}}
|
||||||
|
e := r.state.getLast()
|
||||||
|
if !e.At.IsZero() {
|
||||||
|
t.Fatalf("expected entry to be zero")
|
||||||
|
}
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
r.state.add(StateEntry{At: now})
|
||||||
|
|
||||||
|
e = r.state.getLast()
|
||||||
|
if e.At != now {
|
||||||
|
t.Fatalf("expected entry at %v to be equal to %v",
|
||||||
|
e.At, now)
|
||||||
|
}
|
||||||
|
|
||||||
|
time.Sleep(time.Millisecond)
|
||||||
|
now2 := time.Now()
|
||||||
|
r.state.add(StateEntry{At: now2})
|
||||||
|
|
||||||
|
e = r.state.getLast()
|
||||||
|
if e.At != now2 {
|
||||||
|
t.Fatalf("expected entry at %v to be equal to %v",
|
||||||
|
e.At, now2)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(r.state.getAll()) != 2 {
|
||||||
|
t.Fatalf("expected for state to have 2 entries only; got %d",
|
||||||
|
len(r.state.getAll()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
var last time.Time
|
||||||
|
for i := 0; i < stateEntriesN*2; i++ {
|
||||||
|
last = time.Now()
|
||||||
|
r.state.add(StateEntry{At: last})
|
||||||
|
}
|
||||||
|
|
||||||
|
e = r.state.getLast()
|
||||||
|
if e.At != last {
|
||||||
|
t.Fatalf("expected entry at %v to be equal to %v",
|
||||||
|
e.At, last)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(r.state.getAll()) != stateEntriesN {
|
||||||
|
t.Fatalf("expected for state to have %d entries only; got %d",
|
||||||
|
stateEntriesN, len(r.state.getAll()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRule_stateConcurrent supposed to test concurrent
|
||||||
|
// execution of state updates.
|
||||||
|
// Should be executed with -race flag
|
||||||
|
func TestRule_stateConcurrent(_ *testing.T) {
|
||||||
|
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
|
||||||
|
const workers = 50
|
||||||
|
const iterations = 100
|
||||||
|
wg := sync.WaitGroup{}
|
||||||
|
wg.Add(workers)
|
||||||
|
for i := 0; i < workers; i++ {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for i := 0; i < iterations; i++ {
|
||||||
|
r.state.add(StateEntry{At: time.Now()})
|
||||||
|
r.state.getAll()
|
||||||
|
r.state.getLast()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
}
|
|
@ -1,239 +1,18 @@
|
||||||
package main
|
package rule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
|
||||||
"reflect"
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
)
|
)
|
||||||
|
|
||||||
type fakeQuerier struct {
|
// CompareRules is a test helper func for other tests
|
||||||
sync.Mutex
|
func CompareRules(t *testing.T, a, b Rule) error {
|
||||||
metrics []datasource.Metric
|
|
||||||
err error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fq *fakeQuerier) setErr(err error) {
|
|
||||||
fq.Lock()
|
|
||||||
fq.err = err
|
|
||||||
fq.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fq *fakeQuerier) reset() {
|
|
||||||
fq.Lock()
|
|
||||||
fq.err = nil
|
|
||||||
fq.metrics = fq.metrics[:0]
|
|
||||||
fq.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fq *fakeQuerier) add(metrics ...datasource.Metric) {
|
|
||||||
fq.Lock()
|
|
||||||
fq.metrics = append(fq.metrics, metrics...)
|
|
||||||
fq.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fq *fakeQuerier) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
|
|
||||||
return fq
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fq *fakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) (datasource.Result, error) {
|
|
||||||
req, _, err := fq.Query(ctx, q, time.Now())
|
|
||||||
return req, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fq *fakeQuerier) Query(_ context.Context, _ string, _ time.Time) (datasource.Result, *http.Request, error) {
|
|
||||||
fq.Lock()
|
|
||||||
defer fq.Unlock()
|
|
||||||
if fq.err != nil {
|
|
||||||
return datasource.Result{}, nil, fq.err
|
|
||||||
}
|
|
||||||
cp := make([]datasource.Metric, len(fq.metrics))
|
|
||||||
copy(cp, fq.metrics)
|
|
||||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
|
||||||
return datasource.Result{Data: cp}, req, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type fakeQuerierWithRegistry struct {
|
|
||||||
sync.Mutex
|
|
||||||
registry map[string][]datasource.Metric
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fqr *fakeQuerierWithRegistry) set(key string, metrics ...datasource.Metric) {
|
|
||||||
fqr.Lock()
|
|
||||||
if fqr.registry == nil {
|
|
||||||
fqr.registry = make(map[string][]datasource.Metric)
|
|
||||||
}
|
|
||||||
fqr.registry[key] = metrics
|
|
||||||
fqr.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fqr *fakeQuerierWithRegistry) reset() {
|
|
||||||
fqr.Lock()
|
|
||||||
fqr.registry = nil
|
|
||||||
fqr.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fqr *fakeQuerierWithRegistry) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
|
|
||||||
return fqr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fqr *fakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) (datasource.Result, error) {
|
|
||||||
req, _, err := fqr.Query(ctx, q, time.Now())
|
|
||||||
return req, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fqr *fakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) (datasource.Result, *http.Request, error) {
|
|
||||||
fqr.Lock()
|
|
||||||
defer fqr.Unlock()
|
|
||||||
|
|
||||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
|
||||||
metrics, ok := fqr.registry[expr]
|
|
||||||
if !ok {
|
|
||||||
return datasource.Result{}, req, nil
|
|
||||||
}
|
|
||||||
cp := make([]datasource.Metric, len(metrics))
|
|
||||||
copy(cp, metrics)
|
|
||||||
return datasource.Result{Data: cp}, req, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type fakeQuerierWithDelay struct {
|
|
||||||
fakeQuerier
|
|
||||||
delay time.Duration
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fqd *fakeQuerierWithDelay) Query(ctx context.Context, expr string, ts time.Time) (datasource.Result, *http.Request, error) {
|
|
||||||
timer := time.NewTimer(fqd.delay)
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
case <-timer.C:
|
|
||||||
}
|
|
||||||
return fqd.fakeQuerier.Query(ctx, expr, ts)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fqd *fakeQuerierWithDelay) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
|
|
||||||
return fqd
|
|
||||||
}
|
|
||||||
|
|
||||||
type fakeNotifier struct {
|
|
||||||
sync.Mutex
|
|
||||||
alerts []notifier.Alert
|
|
||||||
// records number of received alerts in total
|
|
||||||
counter int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*fakeNotifier) Close() {}
|
|
||||||
func (*fakeNotifier) Addr() string { return "" }
|
|
||||||
func (fn *fakeNotifier) Send(_ context.Context, alerts []notifier.Alert, _ map[string]string) error {
|
|
||||||
fn.Lock()
|
|
||||||
defer fn.Unlock()
|
|
||||||
fn.counter += len(alerts)
|
|
||||||
fn.alerts = alerts
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fn *fakeNotifier) getCounter() int {
|
|
||||||
fn.Lock()
|
|
||||||
defer fn.Unlock()
|
|
||||||
return fn.counter
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fn *fakeNotifier) getAlerts() []notifier.Alert {
|
|
||||||
fn.Lock()
|
|
||||||
defer fn.Unlock()
|
|
||||||
return fn.alerts
|
|
||||||
}
|
|
||||||
|
|
||||||
type faultyNotifier struct {
|
|
||||||
fakeNotifier
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fn *faultyNotifier) Send(ctx context.Context, _ []notifier.Alert, _ map[string]string) error {
|
|
||||||
d, ok := ctx.Deadline()
|
|
||||||
if ok {
|
|
||||||
time.Sleep(time.Until(d))
|
|
||||||
}
|
|
||||||
return fmt.Errorf("send failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
func metricWithValueAndLabels(t *testing.T, value float64, labels ...string) datasource.Metric {
|
|
||||||
return metricWithValuesAndLabels(t, []float64{value}, labels...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func metricWithValuesAndLabels(t *testing.T, values []float64, labels ...string) datasource.Metric {
|
|
||||||
t.Helper()
|
|
||||||
m := metricWithLabels(t, labels...)
|
|
||||||
m.Values = values
|
|
||||||
for i := range values {
|
|
||||||
m.Timestamps = append(m.Timestamps, int64(i))
|
|
||||||
}
|
|
||||||
return m
|
|
||||||
}
|
|
||||||
|
|
||||||
func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
|
|
||||||
t.Helper()
|
|
||||||
if len(labels) == 0 || len(labels)%2 != 0 {
|
|
||||||
t.Fatalf("expected to get even number of labels")
|
|
||||||
}
|
|
||||||
m := datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}
|
|
||||||
for i := 0; i < len(labels); i += 2 {
|
|
||||||
m.Labels = append(m.Labels, datasource.Label{
|
|
||||||
Name: labels[i],
|
|
||||||
Value: labels[i+1],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return m
|
|
||||||
}
|
|
||||||
|
|
||||||
func toPromLabels(t *testing.T, labels ...string) []prompbmarshal.Label {
|
|
||||||
t.Helper()
|
|
||||||
if len(labels) == 0 || len(labels)%2 != 0 {
|
|
||||||
t.Fatalf("expected to get even number of labels")
|
|
||||||
}
|
|
||||||
var ls []prompbmarshal.Label
|
|
||||||
for i := 0; i < len(labels); i += 2 {
|
|
||||||
ls = append(ls, prompbmarshal.Label{
|
|
||||||
Name: labels[i],
|
|
||||||
Value: labels[i+1],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return ls
|
|
||||||
}
|
|
||||||
|
|
||||||
func compareGroups(t *testing.T, a, b *Group) {
|
|
||||||
t.Helper()
|
|
||||||
if a.Name != b.Name {
|
|
||||||
t.Fatalf("expected group name %q; got %q", a.Name, b.Name)
|
|
||||||
}
|
|
||||||
if a.File != b.File {
|
|
||||||
t.Fatalf("expected group %q file name %q; got %q", a.Name, a.File, b.File)
|
|
||||||
}
|
|
||||||
if a.Interval != b.Interval {
|
|
||||||
t.Fatalf("expected group %q interval %v; got %v", a.Name, a.Interval, b.Interval)
|
|
||||||
}
|
|
||||||
if len(a.Rules) != len(b.Rules) {
|
|
||||||
t.Fatalf("expected group %s to have %d rules; got: %d",
|
|
||||||
a.Name, len(a.Rules), len(b.Rules))
|
|
||||||
}
|
|
||||||
for i, r := range a.Rules {
|
|
||||||
got, want := r, b.Rules[i]
|
|
||||||
if a.ID() != b.ID() {
|
|
||||||
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
|
|
||||||
}
|
|
||||||
if err := compareRules(t, want, got); err != nil {
|
|
||||||
t.Fatalf("comparison error: %s", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func compareRules(t *testing.T, a, b Rule) error {
|
|
||||||
t.Helper()
|
t.Helper()
|
||||||
switch v := a.(type) {
|
switch v := a.(type) {
|
||||||
case *AlertingRule:
|
case *AlertingRule:
|
||||||
|
@ -287,6 +66,50 @@ func compareAlertingRules(t *testing.T, a, b *AlertingRule) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func metricWithValueAndLabels(t *testing.T, value float64, labels ...string) datasource.Metric {
|
||||||
|
return metricWithValuesAndLabels(t, []float64{value}, labels...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func metricWithValuesAndLabels(t *testing.T, values []float64, labels ...string) datasource.Metric {
|
||||||
|
t.Helper()
|
||||||
|
m := metricWithLabels(t, labels...)
|
||||||
|
m.Values = values
|
||||||
|
for i := range values {
|
||||||
|
m.Timestamps = append(m.Timestamps, int64(i))
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
|
||||||
|
t.Helper()
|
||||||
|
if len(labels) == 0 || len(labels)%2 != 0 {
|
||||||
|
t.Fatalf("expected to get even number of labels")
|
||||||
|
}
|
||||||
|
m := datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}
|
||||||
|
for i := 0; i < len(labels); i += 2 {
|
||||||
|
m.Labels = append(m.Labels, datasource.Label{
|
||||||
|
Name: labels[i],
|
||||||
|
Value: labels[i+1],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func toPromLabels(t *testing.T, labels ...string) []prompbmarshal.Label {
|
||||||
|
t.Helper()
|
||||||
|
if len(labels) == 0 || len(labels)%2 != 0 {
|
||||||
|
t.Fatalf("expected to get even number of labels")
|
||||||
|
}
|
||||||
|
var ls []prompbmarshal.Label
|
||||||
|
for i := 0; i < len(labels); i += 2 {
|
||||||
|
ls = append(ls, prompbmarshal.Label{
|
||||||
|
Name: labels[i],
|
||||||
|
Value: labels[i+1],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return ls
|
||||||
|
}
|
||||||
|
|
||||||
func compareTimeSeries(t *testing.T, a, b []prompbmarshal.TimeSeries) error {
|
func compareTimeSeries(t *testing.T, a, b []prompbmarshal.TimeSeries) error {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
if len(a) != len(b) {
|
if len(a) != len(b) {
|
|
@ -1,4 +1,4 @@
|
||||||
package main
|
package rule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
|
@ -1,4 +1,4 @@
|
||||||
package main
|
package rule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
|
@ -1,100 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"sync"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestRule_stateDisabled(t *testing.T) {
|
|
||||||
state := newRuleState(-1)
|
|
||||||
e := state.getLast()
|
|
||||||
if !e.at.IsZero() {
|
|
||||||
t.Fatalf("expected entry to be zero")
|
|
||||||
}
|
|
||||||
|
|
||||||
state.add(ruleStateEntry{at: time.Now()})
|
|
||||||
state.add(ruleStateEntry{at: time.Now()})
|
|
||||||
state.add(ruleStateEntry{at: time.Now()})
|
|
||||||
|
|
||||||
if len(state.getAll()) != 1 {
|
|
||||||
// state should store at least one update at any circumstances
|
|
||||||
t.Fatalf("expected for state to have %d entries; got %d",
|
|
||||||
1, len(state.getAll()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func TestRule_state(t *testing.T) {
|
|
||||||
stateEntriesN := 20
|
|
||||||
state := newRuleState(stateEntriesN)
|
|
||||||
e := state.getLast()
|
|
||||||
if !e.at.IsZero() {
|
|
||||||
t.Fatalf("expected entry to be zero")
|
|
||||||
}
|
|
||||||
|
|
||||||
now := time.Now()
|
|
||||||
state.add(ruleStateEntry{at: now})
|
|
||||||
|
|
||||||
e = state.getLast()
|
|
||||||
if e.at != now {
|
|
||||||
t.Fatalf("expected entry at %v to be equal to %v",
|
|
||||||
e.at, now)
|
|
||||||
}
|
|
||||||
|
|
||||||
time.Sleep(time.Millisecond)
|
|
||||||
now2 := time.Now()
|
|
||||||
state.add(ruleStateEntry{at: now2})
|
|
||||||
|
|
||||||
e = state.getLast()
|
|
||||||
if e.at != now2 {
|
|
||||||
t.Fatalf("expected entry at %v to be equal to %v",
|
|
||||||
e.at, now2)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(state.getAll()) != 2 {
|
|
||||||
t.Fatalf("expected for state to have 2 entries only; got %d",
|
|
||||||
len(state.getAll()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
var last time.Time
|
|
||||||
for i := 0; i < stateEntriesN*2; i++ {
|
|
||||||
last = time.Now()
|
|
||||||
state.add(ruleStateEntry{at: last})
|
|
||||||
}
|
|
||||||
|
|
||||||
e = state.getLast()
|
|
||||||
if e.at != last {
|
|
||||||
t.Fatalf("expected entry at %v to be equal to %v",
|
|
||||||
e.at, last)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(state.getAll()) != stateEntriesN {
|
|
||||||
t.Fatalf("expected for state to have %d entries only; got %d",
|
|
||||||
stateEntriesN, len(state.getAll()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestRule_stateConcurrent supposed to test concurrent
|
|
||||||
// execution of state updates.
|
|
||||||
// Should be executed with -race flag
|
|
||||||
func TestRule_stateConcurrent(_ *testing.T) {
|
|
||||||
state := newRuleState(20)
|
|
||||||
|
|
||||||
const workers = 50
|
|
||||||
const iterations = 100
|
|
||||||
wg := sync.WaitGroup{}
|
|
||||||
wg.Add(workers)
|
|
||||||
for i := 0; i < workers; i++ {
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
for i := 0; i < iterations; i++ {
|
|
||||||
state.add(ruleStateEntry{at: time.Now()})
|
|
||||||
state.getAll()
|
|
||||||
state.getLast()
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
wg.Wait()
|
|
||||||
}
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
|
@ -143,38 +144,32 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
|
||||||
paramGroupID = "group_id"
|
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
|
||||||
paramAlertID = "alert_id"
|
|
||||||
paramRuleID = "rule_id"
|
|
||||||
)
|
|
||||||
|
|
||||||
func (rh *requestHandler) getRule(r *http.Request) (APIRule, error) {
|
|
||||||
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 0)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return APIRule{}, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
|
return apiRule{}, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
|
||||||
}
|
}
|
||||||
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 0)
|
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return APIRule{}, fmt.Errorf("failed to read %q param: %s", paramRuleID, err)
|
return apiRule{}, fmt.Errorf("failed to read %q param: %s", paramRuleID, err)
|
||||||
}
|
}
|
||||||
rule, err := rh.m.RuleAPI(groupID, ruleID)
|
obj, err := rh.m.ruleAPI(groupID, ruleID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return APIRule{}, errResponse(err, http.StatusNotFound)
|
return apiRule{}, errResponse(err, http.StatusNotFound)
|
||||||
}
|
}
|
||||||
return rule, nil
|
return obj, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) getAlert(r *http.Request) (*APIAlert, error) {
|
func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
|
||||||
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 0)
|
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
|
return nil, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
|
||||||
}
|
}
|
||||||
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 0)
|
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to read %q param: %s", paramAlertID, err)
|
return nil, fmt.Errorf("failed to read %q param: %s", paramAlertID, err)
|
||||||
}
|
}
|
||||||
a, err := rh.m.AlertAPI(groupID, alertID)
|
a, err := rh.m.alertAPI(groupID, alertID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errResponse(err, http.StatusNotFound)
|
return nil, errResponse(err, http.StatusNotFound)
|
||||||
}
|
}
|
||||||
|
@ -184,17 +179,17 @@ func (rh *requestHandler) getAlert(r *http.Request) (*APIAlert, error) {
|
||||||
type listGroupsResponse struct {
|
type listGroupsResponse struct {
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
Data struct {
|
Data struct {
|
||||||
Groups []APIGroup `json:"groups"`
|
Groups []apiGroup `json:"groups"`
|
||||||
} `json:"data"`
|
} `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) groups() []APIGroup {
|
func (rh *requestHandler) groups() []apiGroup {
|
||||||
rh.m.groupsMu.RLock()
|
rh.m.groupsMu.RLock()
|
||||||
defer rh.m.groupsMu.RUnlock()
|
defer rh.m.groupsMu.RUnlock()
|
||||||
|
|
||||||
groups := make([]APIGroup, 0)
|
groups := make([]apiGroup, 0)
|
||||||
for _, g := range rh.m.groups {
|
for _, g := range rh.m.groups {
|
||||||
groups = append(groups, g.toAPI())
|
groups = append(groups, groupToAPI(g))
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort list of alerts for deterministic output
|
// sort list of alerts for deterministic output
|
||||||
|
@ -221,35 +216,35 @@ func (rh *requestHandler) listGroups() ([]byte, error) {
|
||||||
type listAlertsResponse struct {
|
type listAlertsResponse struct {
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
Data struct {
|
Data struct {
|
||||||
Alerts []*APIAlert `json:"alerts"`
|
Alerts []*apiAlert `json:"alerts"`
|
||||||
} `json:"data"`
|
} `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) groupAlerts() []GroupAlerts {
|
func (rh *requestHandler) groupAlerts() []groupAlerts {
|
||||||
rh.m.groupsMu.RLock()
|
rh.m.groupsMu.RLock()
|
||||||
defer rh.m.groupsMu.RUnlock()
|
defer rh.m.groupsMu.RUnlock()
|
||||||
|
|
||||||
var groupAlerts []GroupAlerts
|
var gAlerts []groupAlerts
|
||||||
for _, g := range rh.m.groups {
|
for _, g := range rh.m.groups {
|
||||||
var alerts []*APIAlert
|
var alerts []*apiAlert
|
||||||
for _, r := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
a, ok := r.(*AlertingRule)
|
a, ok := r.(*rule.AlertingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
alerts = append(alerts, a.AlertsToAPI()...)
|
alerts = append(alerts, ruleToAPIAlert(a)...)
|
||||||
}
|
}
|
||||||
if len(alerts) > 0 {
|
if len(alerts) > 0 {
|
||||||
groupAlerts = append(groupAlerts, GroupAlerts{
|
gAlerts = append(gAlerts, groupAlerts{
|
||||||
Group: g.toAPI(),
|
Group: groupToAPI(g),
|
||||||
Alerts: alerts,
|
Alerts: alerts,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sort.Slice(groupAlerts, func(i, j int) bool {
|
sort.Slice(gAlerts, func(i, j int) bool {
|
||||||
return groupAlerts[i].Group.Name < groupAlerts[j].Group.Name
|
return gAlerts[i].Group.Name < gAlerts[j].Group.Name
|
||||||
})
|
})
|
||||||
return groupAlerts
|
return gAlerts
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) listAlerts() ([]byte, error) {
|
func (rh *requestHandler) listAlerts() ([]byte, error) {
|
||||||
|
@ -257,14 +252,14 @@ func (rh *requestHandler) listAlerts() ([]byte, error) {
|
||||||
defer rh.m.groupsMu.RUnlock()
|
defer rh.m.groupsMu.RUnlock()
|
||||||
|
|
||||||
lr := listAlertsResponse{Status: "success"}
|
lr := listAlertsResponse{Status: "success"}
|
||||||
lr.Data.Alerts = make([]*APIAlert, 0)
|
lr.Data.Alerts = make([]*apiAlert, 0)
|
||||||
for _, g := range rh.m.groups {
|
for _, g := range rh.m.groups {
|
||||||
for _, r := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
a, ok := r.(*AlertingRule)
|
a, ok := r.(*rule.AlertingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
lr.Data.Alerts = append(lr.Data.Alerts, a.AlertsToAPI()...)
|
lr.Data.Alerts = append(lr.Data.Alerts, ruleToAPIAlert(a)...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@ btn-primary
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
{% func ListGroups(r *http.Request, originGroups []APIGroup) %}
|
{% func ListGroups(r *http.Request, originGroups []apiGroup) %}
|
||||||
{%code prefix := utils.Prefix(r.URL.Path) %}
|
{%code prefix := utils.Prefix(r.URL.Path) %}
|
||||||
{%= tpl.Header(r, navItems, "Groups", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "Groups", getLastConfigError()) %}
|
||||||
{%code
|
{%code
|
||||||
|
@ -46,9 +46,9 @@ btn-primary
|
||||||
rOk := make(map[string]int)
|
rOk := make(map[string]int)
|
||||||
rNotOk := make(map[string]int)
|
rNotOk := make(map[string]int)
|
||||||
rNoMatch := make(map[string]int)
|
rNoMatch := make(map[string]int)
|
||||||
var groups []APIGroup
|
var groups []apiGroup
|
||||||
for _, g := range originGroups {
|
for _, g := range originGroups {
|
||||||
var rules []APIRule
|
var rules []apiRule
|
||||||
for _, r := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
if r.LastError != "" {
|
if r.LastError != "" {
|
||||||
rNotOk[g.ID]++
|
rNotOk[g.ID]++
|
||||||
|
@ -166,7 +166,7 @@ btn-primary
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
|
|
||||||
{% func ListAlerts(r *http.Request, groupAlerts []GroupAlerts) %}
|
{% func ListAlerts(r *http.Request, groupAlerts []groupAlerts) %}
|
||||||
{%code prefix := utils.Prefix(r.URL.Path) %}
|
{%code prefix := utils.Prefix(r.URL.Path) %}
|
||||||
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
|
||||||
{% if len(groupAlerts) > 0 %}
|
{% if len(groupAlerts) > 0 %}
|
||||||
|
@ -183,7 +183,7 @@ btn-primary
|
||||||
</div>
|
</div>
|
||||||
{%code
|
{%code
|
||||||
var keys []string
|
var keys []string
|
||||||
alertsByRule := make(map[string][]*APIAlert)
|
alertsByRule := make(map[string][]*apiAlert)
|
||||||
for _, alert := range ga.Alerts {
|
for _, alert := range ga.Alerts {
|
||||||
if len(alertsByRule[alert.RuleID]) < 1 {
|
if len(alertsByRule[alert.RuleID]) < 1 {
|
||||||
keys = append(keys, alert.RuleID)
|
keys = append(keys, alert.RuleID)
|
||||||
|
@ -310,7 +310,7 @@ btn-primary
|
||||||
|
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
{% func Alert(r *http.Request, alert *APIAlert) %}
|
{% func Alert(r *http.Request, alert *apiAlert) %}
|
||||||
{%code prefix := utils.Prefix(r.URL.Path) %}
|
{%code prefix := utils.Prefix(r.URL.Path) %}
|
||||||
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
||||||
{%code
|
{%code
|
||||||
|
@ -397,7 +397,7 @@ btn-primary
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
|
|
||||||
{% func RuleDetails(r *http.Request, rule APIRule) %}
|
{% func RuleDetails(r *http.Request, rule apiRule) %}
|
||||||
{%code prefix := utils.Prefix(r.URL.Path) %}
|
{%code prefix := utils.Prefix(r.URL.Path) %}
|
||||||
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
||||||
{%code
|
{%code
|
||||||
|
@ -416,9 +416,9 @@ btn-primary
|
||||||
var seriesFetchedEnabled bool
|
var seriesFetchedEnabled bool
|
||||||
var seriesFetchedWarning bool
|
var seriesFetchedWarning bool
|
||||||
for _, u := range rule.Updates {
|
for _, u := range rule.Updates {
|
||||||
if u.seriesFetched != nil {
|
if u.SeriesFetched != nil {
|
||||||
seriesFetchedEnabled = true
|
seriesFetchedEnabled = true
|
||||||
if *u.seriesFetched == 0 && u.samples == 0{
|
if *u.SeriesFetched == 0 && u.Samples == 0{
|
||||||
seriesFetchedWarning = true
|
seriesFetchedWarning = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -537,23 +537,23 @@ btn-primary
|
||||||
<tbody>
|
<tbody>
|
||||||
|
|
||||||
{% for _, u := range rule.Updates %}
|
{% for _, u := range rule.Updates %}
|
||||||
<tr{% if u.err != nil %} class="alert-danger"{% endif %}>
|
<tr{% if u.Err != nil %} class="alert-danger"{% endif %}>
|
||||||
<td>
|
<td>
|
||||||
<span class="badge bg-primary rounded-pill me-3" title="Updated at">{%s u.time.Format(time.RFC3339) %}</span>
|
<span class="badge bg-primary rounded-pill me-3" title="Updated at">{%s u.Time.Format(time.RFC3339) %}</span>
|
||||||
</td>
|
</td>
|
||||||
<td class="text-center">{%d u.samples %}</td>
|
<td class="text-center">{%d u.Samples %}</td>
|
||||||
{% if seriesFetchedEnabled %}<td class="text-center">{% if u.seriesFetched != nil %}{%d *u.seriesFetched %}{% endif %}</td>{% endif %}
|
{% if seriesFetchedEnabled %}<td class="text-center">{% if u.SeriesFetched != nil %}{%d *u.SeriesFetched %}{% endif %}</td>{% endif %}
|
||||||
<td class="text-center">{%f.3 u.duration.Seconds() %}s</td>
|
<td class="text-center">{%f.3 u.Duration.Seconds() %}s</td>
|
||||||
<td class="text-center">{%s u.at.Format(time.RFC3339) %}</td>
|
<td class="text-center">{%s u.At.Format(time.RFC3339) %}</td>
|
||||||
<td>
|
<td>
|
||||||
<textarea class="curl-area" rows="1" onclick="this.focus();this.select()">{%s u.curl %}</textarea>
|
<textarea class="curl-area" rows="1" onclick="this.focus();this.select()">{%s u.Curl %}</textarea>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</li>
|
</li>
|
||||||
{% if u.err != nil %}
|
{% if u.Err != nil %}
|
||||||
<tr{% if u.err != nil %} class="alert-danger"{% endif %}>
|
<tr{% if u.Err != nil %} class="alert-danger"{% endif %}>
|
||||||
<td colspan="{% if seriesFetchedEnabled %}6{%else%}5{%endif%}">
|
<td colspan="{% if seriesFetchedEnabled %}6{%else%}5{%endif%}">
|
||||||
<span class="alert-danger">{%v u.err %}</span>
|
<span class="alert-danger">{%v u.Err %}</span>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
@ -582,7 +582,7 @@ btn-primary
|
||||||
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
|
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
{% func seriesFetchedWarn(r APIRule) %}
|
{% func seriesFetchedWarn(r apiRule) %}
|
||||||
{% if isNoMatch(r) %}
|
{% if isNoMatch(r) %}
|
||||||
<svg xmlns="http://www.w3.org/2000/svg"
|
<svg xmlns="http://www.w3.org/2000/svg"
|
||||||
data-bs-toggle="tooltip"
|
data-bs-toggle="tooltip"
|
||||||
|
@ -596,7 +596,7 @@ btn-primary
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
{%code
|
{%code
|
||||||
func isNoMatch (r APIRule) bool {
|
func isNoMatch (r apiRule) bool {
|
||||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||||
}
|
}
|
||||||
%}
|
%}
|
||||||
|
|
|
@ -196,7 +196,7 @@ func buttonActive(filter, expValue string) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:41
|
//line app/vmalert/web.qtpl:41
|
||||||
func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, originGroups []APIGroup) {
|
func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, originGroups []apiGroup) {
|
||||||
//line app/vmalert/web.qtpl:41
|
//line app/vmalert/web.qtpl:41
|
||||||
qw422016.N().S(`
|
qw422016.N().S(`
|
||||||
`)
|
`)
|
||||||
|
@ -216,9 +216,9 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, originGroups [
|
||||||
rOk := make(map[string]int)
|
rOk := make(map[string]int)
|
||||||
rNotOk := make(map[string]int)
|
rNotOk := make(map[string]int)
|
||||||
rNoMatch := make(map[string]int)
|
rNoMatch := make(map[string]int)
|
||||||
var groups []APIGroup
|
var groups []apiGroup
|
||||||
for _, g := range originGroups {
|
for _, g := range originGroups {
|
||||||
var rules []APIRule
|
var rules []apiRule
|
||||||
for _, r := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
if r.LastError != "" {
|
if r.LastError != "" {
|
||||||
rNotOk[g.ID]++
|
rNotOk[g.ID]++
|
||||||
|
@ -610,7 +610,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, originGroups [
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:166
|
//line app/vmalert/web.qtpl:166
|
||||||
func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, originGroups []APIGroup) {
|
func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, originGroups []apiGroup) {
|
||||||
//line app/vmalert/web.qtpl:166
|
//line app/vmalert/web.qtpl:166
|
||||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||||
//line app/vmalert/web.qtpl:166
|
//line app/vmalert/web.qtpl:166
|
||||||
|
@ -621,7 +621,7 @@ func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, originGroups [
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:166
|
//line app/vmalert/web.qtpl:166
|
||||||
func ListGroups(r *http.Request, originGroups []APIGroup) string {
|
func ListGroups(r *http.Request, originGroups []apiGroup) string {
|
||||||
//line app/vmalert/web.qtpl:166
|
//line app/vmalert/web.qtpl:166
|
||||||
qb422016 := qt422016.AcquireByteBuffer()
|
qb422016 := qt422016.AcquireByteBuffer()
|
||||||
//line app/vmalert/web.qtpl:166
|
//line app/vmalert/web.qtpl:166
|
||||||
|
@ -636,7 +636,7 @@ func ListGroups(r *http.Request, originGroups []APIGroup) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:169
|
//line app/vmalert/web.qtpl:169
|
||||||
func StreamListAlerts(qw422016 *qt422016.Writer, r *http.Request, groupAlerts []GroupAlerts) {
|
func StreamListAlerts(qw422016 *qt422016.Writer, r *http.Request, groupAlerts []groupAlerts) {
|
||||||
//line app/vmalert/web.qtpl:169
|
//line app/vmalert/web.qtpl:169
|
||||||
qw422016.N().S(`
|
qw422016.N().S(`
|
||||||
`)
|
`)
|
||||||
|
@ -712,7 +712,7 @@ func StreamListAlerts(qw422016 *qt422016.Writer, r *http.Request, groupAlerts []
|
||||||
`)
|
`)
|
||||||
//line app/vmalert/web.qtpl:185
|
//line app/vmalert/web.qtpl:185
|
||||||
var keys []string
|
var keys []string
|
||||||
alertsByRule := make(map[string][]*APIAlert)
|
alertsByRule := make(map[string][]*apiAlert)
|
||||||
for _, alert := range ga.Alerts {
|
for _, alert := range ga.Alerts {
|
||||||
if len(alertsByRule[alert.RuleID]) < 1 {
|
if len(alertsByRule[alert.RuleID]) < 1 {
|
||||||
keys = append(keys, alert.RuleID)
|
keys = append(keys, alert.RuleID)
|
||||||
|
@ -891,7 +891,7 @@ func StreamListAlerts(qw422016 *qt422016.Writer, r *http.Request, groupAlerts []
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:255
|
//line app/vmalert/web.qtpl:255
|
||||||
func WriteListAlerts(qq422016 qtio422016.Writer, r *http.Request, groupAlerts []GroupAlerts) {
|
func WriteListAlerts(qq422016 qtio422016.Writer, r *http.Request, groupAlerts []groupAlerts) {
|
||||||
//line app/vmalert/web.qtpl:255
|
//line app/vmalert/web.qtpl:255
|
||||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||||
//line app/vmalert/web.qtpl:255
|
//line app/vmalert/web.qtpl:255
|
||||||
|
@ -902,7 +902,7 @@ func WriteListAlerts(qq422016 qtio422016.Writer, r *http.Request, groupAlerts []
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:255
|
//line app/vmalert/web.qtpl:255
|
||||||
func ListAlerts(r *http.Request, groupAlerts []GroupAlerts) string {
|
func ListAlerts(r *http.Request, groupAlerts []groupAlerts) string {
|
||||||
//line app/vmalert/web.qtpl:255
|
//line app/vmalert/web.qtpl:255
|
||||||
qb422016 := qt422016.AcquireByteBuffer()
|
qb422016 := qt422016.AcquireByteBuffer()
|
||||||
//line app/vmalert/web.qtpl:255
|
//line app/vmalert/web.qtpl:255
|
||||||
|
@ -1091,7 +1091,7 @@ func ListTargets(r *http.Request, targets map[notifier.TargetType][]notifier.Tar
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:313
|
//line app/vmalert/web.qtpl:313
|
||||||
func StreamAlert(qw422016 *qt422016.Writer, r *http.Request, alert *APIAlert) {
|
func StreamAlert(qw422016 *qt422016.Writer, r *http.Request, alert *apiAlert) {
|
||||||
//line app/vmalert/web.qtpl:313
|
//line app/vmalert/web.qtpl:313
|
||||||
qw422016.N().S(`
|
qw422016.N().S(`
|
||||||
`)
|
`)
|
||||||
|
@ -1274,7 +1274,7 @@ func StreamAlert(qw422016 *qt422016.Writer, r *http.Request, alert *APIAlert) {
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:397
|
//line app/vmalert/web.qtpl:397
|
||||||
func WriteAlert(qq422016 qtio422016.Writer, r *http.Request, alert *APIAlert) {
|
func WriteAlert(qq422016 qtio422016.Writer, r *http.Request, alert *apiAlert) {
|
||||||
//line app/vmalert/web.qtpl:397
|
//line app/vmalert/web.qtpl:397
|
||||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||||
//line app/vmalert/web.qtpl:397
|
//line app/vmalert/web.qtpl:397
|
||||||
|
@ -1285,7 +1285,7 @@ func WriteAlert(qq422016 qtio422016.Writer, r *http.Request, alert *APIAlert) {
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:397
|
//line app/vmalert/web.qtpl:397
|
||||||
func Alert(r *http.Request, alert *APIAlert) string {
|
func Alert(r *http.Request, alert *apiAlert) string {
|
||||||
//line app/vmalert/web.qtpl:397
|
//line app/vmalert/web.qtpl:397
|
||||||
qb422016 := qt422016.AcquireByteBuffer()
|
qb422016 := qt422016.AcquireByteBuffer()
|
||||||
//line app/vmalert/web.qtpl:397
|
//line app/vmalert/web.qtpl:397
|
||||||
|
@ -1300,7 +1300,7 @@ func Alert(r *http.Request, alert *APIAlert) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:400
|
//line app/vmalert/web.qtpl:400
|
||||||
func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule) {
|
func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule) {
|
||||||
//line app/vmalert/web.qtpl:400
|
//line app/vmalert/web.qtpl:400
|
||||||
qw422016.N().S(`
|
qw422016.N().S(`
|
||||||
`)
|
`)
|
||||||
|
@ -1331,9 +1331,9 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||||
var seriesFetchedEnabled bool
|
var seriesFetchedEnabled bool
|
||||||
var seriesFetchedWarning bool
|
var seriesFetchedWarning bool
|
||||||
for _, u := range rule.Updates {
|
for _, u := range rule.Updates {
|
||||||
if u.seriesFetched != nil {
|
if u.SeriesFetched != nil {
|
||||||
seriesFetchedEnabled = true
|
seriesFetchedEnabled = true
|
||||||
if *u.seriesFetched == 0 && u.samples == 0 {
|
if *u.SeriesFetched == 0 && u.Samples == 0 {
|
||||||
seriesFetchedWarning = true
|
seriesFetchedWarning = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1587,7 +1587,7 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||||
qw422016.N().S(`
|
qw422016.N().S(`
|
||||||
<tr`)
|
<tr`)
|
||||||
//line app/vmalert/web.qtpl:540
|
//line app/vmalert/web.qtpl:540
|
||||||
if u.err != nil {
|
if u.Err != nil {
|
||||||
//line app/vmalert/web.qtpl:540
|
//line app/vmalert/web.qtpl:540
|
||||||
qw422016.N().S(` class="alert-danger"`)
|
qw422016.N().S(` class="alert-danger"`)
|
||||||
//line app/vmalert/web.qtpl:540
|
//line app/vmalert/web.qtpl:540
|
||||||
|
@ -1597,13 +1597,13 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||||
<td>
|
<td>
|
||||||
<span class="badge bg-primary rounded-pill me-3" title="Updated at">`)
|
<span class="badge bg-primary rounded-pill me-3" title="Updated at">`)
|
||||||
//line app/vmalert/web.qtpl:542
|
//line app/vmalert/web.qtpl:542
|
||||||
qw422016.E().S(u.time.Format(time.RFC3339))
|
qw422016.E().S(u.Time.Format(time.RFC3339))
|
||||||
//line app/vmalert/web.qtpl:542
|
//line app/vmalert/web.qtpl:542
|
||||||
qw422016.N().S(`</span>
|
qw422016.N().S(`</span>
|
||||||
</td>
|
</td>
|
||||||
<td class="text-center">`)
|
<td class="text-center">`)
|
||||||
//line app/vmalert/web.qtpl:544
|
//line app/vmalert/web.qtpl:544
|
||||||
qw422016.N().D(u.samples)
|
qw422016.N().D(u.Samples)
|
||||||
//line app/vmalert/web.qtpl:544
|
//line app/vmalert/web.qtpl:544
|
||||||
qw422016.N().S(`</td>
|
qw422016.N().S(`</td>
|
||||||
`)
|
`)
|
||||||
|
@ -1612,9 +1612,9 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||||
//line app/vmalert/web.qtpl:545
|
//line app/vmalert/web.qtpl:545
|
||||||
qw422016.N().S(`<td class="text-center">`)
|
qw422016.N().S(`<td class="text-center">`)
|
||||||
//line app/vmalert/web.qtpl:545
|
//line app/vmalert/web.qtpl:545
|
||||||
if u.seriesFetched != nil {
|
if u.SeriesFetched != nil {
|
||||||
//line app/vmalert/web.qtpl:545
|
//line app/vmalert/web.qtpl:545
|
||||||
qw422016.N().D(*u.seriesFetched)
|
qw422016.N().D(*u.SeriesFetched)
|
||||||
//line app/vmalert/web.qtpl:545
|
//line app/vmalert/web.qtpl:545
|
||||||
}
|
}
|
||||||
//line app/vmalert/web.qtpl:545
|
//line app/vmalert/web.qtpl:545
|
||||||
|
@ -1625,18 +1625,18 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||||
qw422016.N().S(`
|
qw422016.N().S(`
|
||||||
<td class="text-center">`)
|
<td class="text-center">`)
|
||||||
//line app/vmalert/web.qtpl:546
|
//line app/vmalert/web.qtpl:546
|
||||||
qw422016.N().FPrec(u.duration.Seconds(), 3)
|
qw422016.N().FPrec(u.Duration.Seconds(), 3)
|
||||||
//line app/vmalert/web.qtpl:546
|
//line app/vmalert/web.qtpl:546
|
||||||
qw422016.N().S(`s</td>
|
qw422016.N().S(`s</td>
|
||||||
<td class="text-center">`)
|
<td class="text-center">`)
|
||||||
//line app/vmalert/web.qtpl:547
|
//line app/vmalert/web.qtpl:547
|
||||||
qw422016.E().S(u.at.Format(time.RFC3339))
|
qw422016.E().S(u.At.Format(time.RFC3339))
|
||||||
//line app/vmalert/web.qtpl:547
|
//line app/vmalert/web.qtpl:547
|
||||||
qw422016.N().S(`</td>
|
qw422016.N().S(`</td>
|
||||||
<td>
|
<td>
|
||||||
<textarea class="curl-area" rows="1" onclick="this.focus();this.select()">`)
|
<textarea class="curl-area" rows="1" onclick="this.focus();this.select()">`)
|
||||||
//line app/vmalert/web.qtpl:549
|
//line app/vmalert/web.qtpl:549
|
||||||
qw422016.E().S(u.curl)
|
qw422016.E().S(u.Curl)
|
||||||
//line app/vmalert/web.qtpl:549
|
//line app/vmalert/web.qtpl:549
|
||||||
qw422016.N().S(`</textarea>
|
qw422016.N().S(`</textarea>
|
||||||
</td>
|
</td>
|
||||||
|
@ -1644,12 +1644,12 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||||
</li>
|
</li>
|
||||||
`)
|
`)
|
||||||
//line app/vmalert/web.qtpl:553
|
//line app/vmalert/web.qtpl:553
|
||||||
if u.err != nil {
|
if u.Err != nil {
|
||||||
//line app/vmalert/web.qtpl:553
|
//line app/vmalert/web.qtpl:553
|
||||||
qw422016.N().S(`
|
qw422016.N().S(`
|
||||||
<tr`)
|
<tr`)
|
||||||
//line app/vmalert/web.qtpl:554
|
//line app/vmalert/web.qtpl:554
|
||||||
if u.err != nil {
|
if u.Err != nil {
|
||||||
//line app/vmalert/web.qtpl:554
|
//line app/vmalert/web.qtpl:554
|
||||||
qw422016.N().S(` class="alert-danger"`)
|
qw422016.N().S(` class="alert-danger"`)
|
||||||
//line app/vmalert/web.qtpl:554
|
//line app/vmalert/web.qtpl:554
|
||||||
|
@ -1671,7 +1671,7 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||||
qw422016.N().S(`">
|
qw422016.N().S(`">
|
||||||
<span class="alert-danger">`)
|
<span class="alert-danger">`)
|
||||||
//line app/vmalert/web.qtpl:556
|
//line app/vmalert/web.qtpl:556
|
||||||
qw422016.E().V(u.err)
|
qw422016.E().V(u.Err)
|
||||||
//line app/vmalert/web.qtpl:556
|
//line app/vmalert/web.qtpl:556
|
||||||
qw422016.N().S(`</span>
|
qw422016.N().S(`</span>
|
||||||
</td>
|
</td>
|
||||||
|
@ -1697,7 +1697,7 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:563
|
//line app/vmalert/web.qtpl:563
|
||||||
func WriteRuleDetails(qq422016 qtio422016.Writer, r *http.Request, rule APIRule) {
|
func WriteRuleDetails(qq422016 qtio422016.Writer, r *http.Request, rule apiRule) {
|
||||||
//line app/vmalert/web.qtpl:563
|
//line app/vmalert/web.qtpl:563
|
||||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||||
//line app/vmalert/web.qtpl:563
|
//line app/vmalert/web.qtpl:563
|
||||||
|
@ -1708,7 +1708,7 @@ func WriteRuleDetails(qq422016 qtio422016.Writer, r *http.Request, rule APIRule)
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:563
|
//line app/vmalert/web.qtpl:563
|
||||||
func RuleDetails(r *http.Request, rule APIRule) string {
|
func RuleDetails(r *http.Request, rule apiRule) string {
|
||||||
//line app/vmalert/web.qtpl:563
|
//line app/vmalert/web.qtpl:563
|
||||||
qb422016 := qt422016.AcquireByteBuffer()
|
qb422016 := qt422016.AcquireByteBuffer()
|
||||||
//line app/vmalert/web.qtpl:563
|
//line app/vmalert/web.qtpl:563
|
||||||
|
@ -1853,7 +1853,7 @@ func badgeStabilizing() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:585
|
//line app/vmalert/web.qtpl:585
|
||||||
func streamseriesFetchedWarn(qw422016 *qt422016.Writer, r APIRule) {
|
func streamseriesFetchedWarn(qw422016 *qt422016.Writer, r apiRule) {
|
||||||
//line app/vmalert/web.qtpl:585
|
//line app/vmalert/web.qtpl:585
|
||||||
qw422016.N().S(`
|
qw422016.N().S(`
|
||||||
`)
|
`)
|
||||||
|
@ -1879,7 +1879,7 @@ func streamseriesFetchedWarn(qw422016 *qt422016.Writer, r APIRule) {
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:596
|
//line app/vmalert/web.qtpl:596
|
||||||
func writeseriesFetchedWarn(qq422016 qtio422016.Writer, r APIRule) {
|
func writeseriesFetchedWarn(qq422016 qtio422016.Writer, r apiRule) {
|
||||||
//line app/vmalert/web.qtpl:596
|
//line app/vmalert/web.qtpl:596
|
||||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||||
//line app/vmalert/web.qtpl:596
|
//line app/vmalert/web.qtpl:596
|
||||||
|
@ -1890,7 +1890,7 @@ func writeseriesFetchedWarn(qq422016 qtio422016.Writer, r APIRule) {
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:596
|
//line app/vmalert/web.qtpl:596
|
||||||
func seriesFetchedWarn(r APIRule) string {
|
func seriesFetchedWarn(r apiRule) string {
|
||||||
//line app/vmalert/web.qtpl:596
|
//line app/vmalert/web.qtpl:596
|
||||||
qb422016 := qt422016.AcquireByteBuffer()
|
qb422016 := qt422016.AcquireByteBuffer()
|
||||||
//line app/vmalert/web.qtpl:596
|
//line app/vmalert/web.qtpl:596
|
||||||
|
@ -1905,6 +1905,6 @@ func seriesFetchedWarn(r APIRule) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
//line app/vmalert/web.qtpl:599
|
//line app/vmalert/web.qtpl:599
|
||||||
func isNoMatch(r APIRule) bool {
|
func isNoMatch(r apiRule) bool {
|
||||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
@ -9,32 +10,29 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestHandler(t *testing.T) {
|
func TestHandler(t *testing.T) {
|
||||||
ar := &AlertingRule{
|
fq := &datasource.FakeQuerier{}
|
||||||
Name: "alert",
|
fq.Add(datasource.Metric{
|
||||||
alerts: map[uint64]*notifier.Alert{
|
Values: []float64{1}, Timestamps: []int64{0},
|
||||||
0: {State: notifier.StateFiring},
|
|
||||||
},
|
|
||||||
state: newRuleState(10),
|
|
||||||
}
|
|
||||||
ar.state.add(ruleStateEntry{
|
|
||||||
time: time.Now(),
|
|
||||||
at: time.Now(),
|
|
||||||
samples: 10,
|
|
||||||
})
|
})
|
||||||
rr := &RecordingRule{
|
g := &rule.Group{
|
||||||
Name: "record",
|
Name: "group",
|
||||||
state: newRuleState(10),
|
Concurrency: 1,
|
||||||
}
|
}
|
||||||
g := &Group{
|
ar := rule.NewAlertingRule(fq, g, config.Rule{ID: 0, Alert: "alert"})
|
||||||
Name: "group",
|
rr := rule.NewRecordingRule(fq, g, config.Rule{ID: 1, Record: "record"})
|
||||||
Rules: []Rule{ar, rr},
|
g.Rules = []rule.Rule{ar, rr}
|
||||||
}
|
g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, nil, time.Time{})
|
||||||
m := &manager{groups: make(map[uint64]*Group)}
|
|
||||||
m.groups[0] = g
|
m := &manager{groups: map[uint64]*rule.Group{
|
||||||
|
g.ID(): g,
|
||||||
|
}}
|
||||||
rh := &requestHandler{m: m}
|
rh := &requestHandler{m: m}
|
||||||
|
|
||||||
getResp := func(url string, to interface{}, code int) {
|
getResp := func(url string, to interface{}, code int) {
|
||||||
|
@ -70,13 +68,13 @@ func TestHandler(t *testing.T) {
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("/vmalert/rule", func(t *testing.T) {
|
t.Run("/vmalert/rule", func(t *testing.T) {
|
||||||
a := ar.ToAPI()
|
a := ruleToAPI(ar)
|
||||||
getResp(ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
getResp(ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
||||||
r := rr.ToAPI()
|
r := ruleToAPI(rr)
|
||||||
getResp(ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
|
getResp(ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
|
||||||
})
|
})
|
||||||
t.Run("/vmalert/alert", func(t *testing.T) {
|
t.Run("/vmalert/alert", func(t *testing.T) {
|
||||||
alerts := ar.AlertsToAPI()
|
alerts := ruleToAPIAlert(ar)
|
||||||
for _, a := range alerts {
|
for _, a := range alerts {
|
||||||
getResp(ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
getResp(ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
||||||
}
|
}
|
||||||
|
@ -103,14 +101,14 @@ func TestHandler(t *testing.T) {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
|
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
|
||||||
expAlert := ar.newAlertAPI(*ar.alerts[0])
|
expAlert := newAlertAPI(ar, ar.GetAlerts()[0])
|
||||||
alert := &APIAlert{}
|
alert := &apiAlert{}
|
||||||
getResp(ts.URL+"/"+expAlert.APILink(), alert, 200)
|
getResp(ts.URL+"/"+expAlert.APILink(), alert, 200)
|
||||||
if !reflect.DeepEqual(alert, expAlert) {
|
if !reflect.DeepEqual(alert, expAlert) {
|
||||||
t.Errorf("expected %v is equal to %v", alert, expAlert)
|
t.Errorf("expected %v is equal to %v", alert, expAlert)
|
||||||
}
|
}
|
||||||
|
|
||||||
alert = &APIAlert{}
|
alert = &apiAlert{}
|
||||||
getResp(ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200)
|
getResp(ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200)
|
||||||
if !reflect.DeepEqual(alert, expAlert) {
|
if !reflect.DeepEqual(alert, expAlert) {
|
||||||
t.Errorf("expected %v is equal to %v", alert, expAlert)
|
t.Errorf("expected %v is equal to %v", alert, expAlert)
|
||||||
|
@ -148,7 +146,7 @@ func TestHandler(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEmptyResponse(t *testing.T) {
|
func TestEmptyResponse(t *testing.T) {
|
||||||
rhWithNoGroups := &requestHandler{m: &manager{groups: make(map[uint64]*Group)}}
|
rhWithNoGroups := &requestHandler{m: &manager{groups: make(map[uint64]*rule.Group)}}
|
||||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { rhWithNoGroups.handler(w, r) }))
|
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { rhWithNoGroups.handler(w, r) }))
|
||||||
defer ts.Close()
|
defer ts.Close()
|
||||||
|
|
||||||
|
@ -201,7 +199,7 @@ func TestEmptyResponse(t *testing.T) {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
rhWithEmptyGroup := &requestHandler{m: &manager{groups: map[uint64]*Group{0: {Name: "test"}}}}
|
rhWithEmptyGroup := &requestHandler{m: &manager{groups: map[uint64]*rule.Group{0: {Name: "test"}}}}
|
||||||
ts.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { rhWithEmptyGroup.handler(w, r) })
|
ts.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { rhWithEmptyGroup.handler(w, r) })
|
||||||
|
|
||||||
t.Run("empty group /api/v1/rules", func(t *testing.T) {
|
t.Run("empty group /api/v1/rules", func(t *testing.T) {
|
||||||
|
|
|
@ -2,13 +2,28 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net/url"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
)
|
)
|
||||||
|
|
||||||
// APIAlert represents a notifier.AlertingRule state
|
const (
|
||||||
|
// ParamGroupID is group id key in url parameter
|
||||||
|
paramGroupID = "group_id"
|
||||||
|
// ParamAlertID is alert id key in url parameter
|
||||||
|
paramAlertID = "alert_id"
|
||||||
|
// ParamRuleID is rule id key in url parameter
|
||||||
|
paramRuleID = "rule_id"
|
||||||
|
)
|
||||||
|
|
||||||
|
// apiAlert represents a notifier.AlertingRule state
|
||||||
// for WEB view
|
// for WEB view
|
||||||
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||||
type APIAlert struct {
|
type apiAlert struct {
|
||||||
State string `json:"state"`
|
State string `json:"state"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Value string `json:"value"`
|
Value string `json:"value"`
|
||||||
|
@ -38,24 +53,24 @@ type APIAlert struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// WebLink returns a link to the alert which can be used in UI.
|
// WebLink returns a link to the alert which can be used in UI.
|
||||||
func (aa *APIAlert) WebLink() string {
|
func (aa *apiAlert) WebLink() string {
|
||||||
return fmt.Sprintf("alert?%s=%s&%s=%s",
|
return fmt.Sprintf("alert?%s=%s&%s=%s",
|
||||||
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
|
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// APILink returns a link to the alert's JSON representation.
|
// APILink returns a link to the alert's JSON representation.
|
||||||
func (aa *APIAlert) APILink() string {
|
func (aa *apiAlert) APILink() string {
|
||||||
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
|
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
|
||||||
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
|
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// APIGroup represents Group for WEB view
|
// apiGroup represents Group for web view
|
||||||
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||||
type APIGroup struct {
|
type apiGroup struct {
|
||||||
// Name is the group name as present in the config
|
// Name is the group name as present in the config
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
// Rules contains both recording and alerting rules
|
// Rules contains both recording and alerting rules
|
||||||
Rules []APIRule `json:"rules"`
|
Rules []apiRule `json:"rules"`
|
||||||
// Interval is the Group's evaluation interval in float seconds as present in the file.
|
// Interval is the Group's evaluation interval in float seconds as present in the file.
|
||||||
Interval float64 `json:"interval"`
|
Interval float64 `json:"interval"`
|
||||||
// LastEvaluation is the timestamp of the last time the Group was executed
|
// LastEvaluation is the timestamp of the last time the Group was executed
|
||||||
|
@ -81,15 +96,15 @@ type APIGroup struct {
|
||||||
Labels map[string]string `json:"labels,omitempty"`
|
Labels map[string]string `json:"labels,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// GroupAlerts represents a group of alerts for WEB view
|
// groupAlerts represents a group of alerts for WEB view
|
||||||
type GroupAlerts struct {
|
type groupAlerts struct {
|
||||||
Group APIGroup
|
Group apiGroup
|
||||||
Alerts []*APIAlert
|
Alerts []*apiAlert
|
||||||
}
|
}
|
||||||
|
|
||||||
// APIRule represents a Rule for WEB view
|
// apiRule represents a Rule for web view
|
||||||
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||||
type APIRule struct {
|
type apiRule struct {
|
||||||
// State must be one of these under following scenarios
|
// State must be one of these under following scenarios
|
||||||
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
|
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
|
||||||
// "firing": at least 1 alert in the rule in firing state.
|
// "firing": at least 1 alert in the rule in firing state.
|
||||||
|
@ -111,7 +126,7 @@ type APIRule struct {
|
||||||
// LastEvaluation is the timestamp of the last time the rule was executed
|
// LastEvaluation is the timestamp of the last time the rule was executed
|
||||||
LastEvaluation time.Time `json:"lastEvaluation"`
|
LastEvaluation time.Time `json:"lastEvaluation"`
|
||||||
// Alerts is the list of all the alerts in this rule that are currently pending or firing
|
// Alerts is the list of all the alerts in this rule that are currently pending or firing
|
||||||
Alerts []*APIAlert `json:"alerts,omitempty"`
|
Alerts []*apiAlert `json:"alerts,omitempty"`
|
||||||
// Health is the health of rule evaluation.
|
// Health is the health of rule evaluation.
|
||||||
// It MUST be one of "ok", "err", "unknown"
|
// It MUST be one of "ok", "err", "unknown"
|
||||||
Health string `json:"health"`
|
Health string `json:"health"`
|
||||||
|
@ -138,11 +153,206 @@ type APIRule struct {
|
||||||
// MaxUpdates is the max number of recorded ruleStateEntry objects
|
// MaxUpdates is the max number of recorded ruleStateEntry objects
|
||||||
MaxUpdates int `json:"max_updates_entries"`
|
MaxUpdates int `json:"max_updates_entries"`
|
||||||
// Updates contains the ordered list of recorded ruleStateEntry objects
|
// Updates contains the ordered list of recorded ruleStateEntry objects
|
||||||
Updates []ruleStateEntry `json:"-"`
|
Updates []rule.StateEntry `json:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// WebLink returns a link to the alert which can be used in UI.
|
// WebLink returns a link to the alert which can be used in UI.
|
||||||
func (ar APIRule) WebLink() string {
|
func (ar apiRule) WebLink() string {
|
||||||
return fmt.Sprintf("rule?%s=%s&%s=%s",
|
return fmt.Sprintf("rule?%s=%s&%s=%s",
|
||||||
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
|
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ruleToAPI(r interface{}) apiRule {
|
||||||
|
if ar, ok := r.(*rule.AlertingRule); ok {
|
||||||
|
return alertingToAPI(ar)
|
||||||
|
}
|
||||||
|
if rr, ok := r.(*rule.RecordingRule); ok {
|
||||||
|
return recordingToAPI(rr)
|
||||||
|
}
|
||||||
|
return apiRule{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func recordingToAPI(rr *rule.RecordingRule) apiRule {
|
||||||
|
lastState := rule.GetLastEntry(rr)
|
||||||
|
r := apiRule{
|
||||||
|
Type: "recording",
|
||||||
|
DatasourceType: rr.Type.String(),
|
||||||
|
Name: rr.Name,
|
||||||
|
Query: rr.Expr,
|
||||||
|
Labels: rr.Labels,
|
||||||
|
LastEvaluation: lastState.Time,
|
||||||
|
EvaluationTime: lastState.Duration.Seconds(),
|
||||||
|
Health: "ok",
|
||||||
|
LastSamples: lastState.Samples,
|
||||||
|
LastSeriesFetched: lastState.SeriesFetched,
|
||||||
|
MaxUpdates: rule.GetRuleStateSize(rr),
|
||||||
|
Updates: rule.GetAllRuleState(rr),
|
||||||
|
|
||||||
|
// encode as strings to avoid rounding
|
||||||
|
ID: fmt.Sprintf("%d", rr.ID()),
|
||||||
|
GroupID: fmt.Sprintf("%d", rr.GroupID),
|
||||||
|
}
|
||||||
|
if lastState.Err != nil {
|
||||||
|
r.LastError = lastState.Err.Error()
|
||||||
|
r.Health = "err"
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// alertingToAPI returns Rule representation in form of apiRule
|
||||||
|
func alertingToAPI(ar *rule.AlertingRule) apiRule {
|
||||||
|
lastState := rule.GetLastEntry(ar)
|
||||||
|
r := apiRule{
|
||||||
|
Type: "alerting",
|
||||||
|
DatasourceType: ar.Type.String(),
|
||||||
|
Name: ar.Name,
|
||||||
|
Query: ar.Expr,
|
||||||
|
Duration: ar.For.Seconds(),
|
||||||
|
KeepFiringFor: ar.KeepFiringFor.Seconds(),
|
||||||
|
Labels: ar.Labels,
|
||||||
|
Annotations: ar.Annotations,
|
||||||
|
LastEvaluation: lastState.Time,
|
||||||
|
EvaluationTime: lastState.Duration.Seconds(),
|
||||||
|
Health: "ok",
|
||||||
|
State: "inactive",
|
||||||
|
Alerts: ruleToAPIAlert(ar),
|
||||||
|
LastSamples: lastState.Samples,
|
||||||
|
LastSeriesFetched: lastState.SeriesFetched,
|
||||||
|
MaxUpdates: rule.GetRuleStateSize(ar),
|
||||||
|
Updates: rule.GetAllRuleState(ar),
|
||||||
|
Debug: ar.Debug,
|
||||||
|
|
||||||
|
// encode as strings to avoid rounding in JSON
|
||||||
|
ID: fmt.Sprintf("%d", ar.ID()),
|
||||||
|
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||||
|
}
|
||||||
|
if lastState.Err != nil {
|
||||||
|
r.LastError = lastState.Err.Error()
|
||||||
|
r.Health = "err"
|
||||||
|
}
|
||||||
|
// satisfy apiRule.State logic
|
||||||
|
if len(r.Alerts) > 0 {
|
||||||
|
r.State = notifier.StatePending.String()
|
||||||
|
stateFiring := notifier.StateFiring.String()
|
||||||
|
for _, a := range r.Alerts {
|
||||||
|
if a.State == stateFiring {
|
||||||
|
r.State = stateFiring
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// ruleToAPIAlert generates list of apiAlert objects from existing alerts
|
||||||
|
func ruleToAPIAlert(ar *rule.AlertingRule) []*apiAlert {
|
||||||
|
var alerts []*apiAlert
|
||||||
|
for _, a := range ar.GetAlerts() {
|
||||||
|
if a.State == notifier.StateInactive {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
alerts = append(alerts, newAlertAPI(ar, a))
|
||||||
|
}
|
||||||
|
return alerts
|
||||||
|
}
|
||||||
|
|
||||||
|
// alertToAPI generates apiAlert object from alert by its id(hash)
|
||||||
|
func alertToAPI(ar *rule.AlertingRule, id uint64) *apiAlert {
|
||||||
|
a := ar.GetAlert(id)
|
||||||
|
if a == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return newAlertAPI(ar, a)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewAlertAPI creates apiAlert for notifier.Alert
|
||||||
|
func newAlertAPI(ar *rule.AlertingRule, a *notifier.Alert) *apiAlert {
|
||||||
|
aa := &apiAlert{
|
||||||
|
// encode as strings to avoid rounding
|
||||||
|
ID: fmt.Sprintf("%d", a.ID),
|
||||||
|
GroupID: fmt.Sprintf("%d", a.GroupID),
|
||||||
|
RuleID: fmt.Sprintf("%d", ar.RuleID),
|
||||||
|
|
||||||
|
Name: a.Name,
|
||||||
|
Expression: ar.Expr,
|
||||||
|
Labels: a.Labels,
|
||||||
|
Annotations: a.Annotations,
|
||||||
|
State: a.State.String(),
|
||||||
|
ActiveAt: a.ActiveAt,
|
||||||
|
Restored: a.Restored,
|
||||||
|
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
|
||||||
|
}
|
||||||
|
if alertURLGeneratorFn != nil {
|
||||||
|
aa.SourceLink = alertURLGeneratorFn(*a)
|
||||||
|
}
|
||||||
|
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
|
||||||
|
aa.Stabilizing = true
|
||||||
|
}
|
||||||
|
return aa
|
||||||
|
}
|
||||||
|
|
||||||
|
func groupToAPI(g *rule.Group) apiGroup {
|
||||||
|
g = g.DeepCopy()
|
||||||
|
ag := apiGroup{
|
||||||
|
// encode as string to avoid rounding
|
||||||
|
ID: fmt.Sprintf("%d", g.ID()),
|
||||||
|
|
||||||
|
Name: g.Name,
|
||||||
|
Type: g.Type.String(),
|
||||||
|
File: g.File,
|
||||||
|
Interval: g.Interval.Seconds(),
|
||||||
|
LastEvaluation: g.LastEvaluation,
|
||||||
|
Concurrency: g.Concurrency,
|
||||||
|
Params: urlValuesToStrings(g.Params),
|
||||||
|
Headers: headersToStrings(g.Headers),
|
||||||
|
NotifierHeaders: headersToStrings(g.NotifierHeaders),
|
||||||
|
|
||||||
|
Labels: g.Labels,
|
||||||
|
}
|
||||||
|
ag.Rules = make([]apiRule, 0)
|
||||||
|
for _, r := range g.Rules {
|
||||||
|
ag.Rules = append(ag.Rules, ruleToAPI(r))
|
||||||
|
}
|
||||||
|
return ag
|
||||||
|
}
|
||||||
|
|
||||||
|
func urlValuesToStrings(values url.Values) []string {
|
||||||
|
if len(values) < 1 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
keys := make([]string, 0, len(values))
|
||||||
|
for k := range values {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
|
||||||
|
var res []string
|
||||||
|
for _, k := range keys {
|
||||||
|
params := values[k]
|
||||||
|
for _, v := range params {
|
||||||
|
res = append(res, fmt.Sprintf("%s=%s", k, v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
func headersToStrings(headers map[string]string) []string {
|
||||||
|
if len(headers) < 1 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
keys := make([]string, 0, len(headers))
|
||||||
|
for k := range headers {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
|
||||||
|
var res []string
|
||||||
|
for _, k := range keys {
|
||||||
|
v := headers[k]
|
||||||
|
res = append(res, fmt.Sprintf("%s: %s", k, v))
|
||||||
|
}
|
||||||
|
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
23
app/vmalert/web_types_test.go
Normal file
23
app/vmalert/web_types_test.go
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestUrlValuesToStrings(t *testing.T) {
|
||||||
|
mapQueryParams := map[string][]string{
|
||||||
|
"param1": {"param1"},
|
||||||
|
"param2": {"anotherparam"},
|
||||||
|
}
|
||||||
|
expectedRes := []string{"param1=param1", "param2=anotherparam"}
|
||||||
|
res := urlValuesToStrings(mapQueryParams)
|
||||||
|
|
||||||
|
if len(res) != len(expectedRes) {
|
||||||
|
t.Errorf("Expected length %d, but got %d", len(expectedRes), len(res))
|
||||||
|
}
|
||||||
|
for ind, val := range expectedRes {
|
||||||
|
if val != res[ind] {
|
||||||
|
t.Errorf("Expected %v; but got %v", val, res[ind])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -33,7 +33,7 @@ var (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
saCfgReloaderStopCh = make(chan struct{})
|
saCfgReloaderStopCh chan struct{}
|
||||||
saCfgReloaderWG sync.WaitGroup
|
saCfgReloaderWG sync.WaitGroup
|
||||||
|
|
||||||
saCfgReloads = metrics.NewCounter(`vminsert_streamagg_config_reloads_total`)
|
saCfgReloads = metrics.NewCounter(`vminsert_streamagg_config_reloads_total`)
|
||||||
|
@ -62,6 +62,8 @@ func CheckStreamAggrConfig() error {
|
||||||
//
|
//
|
||||||
// MustStopStreamAggr must be called when stream aggr is no longer needed.
|
// MustStopStreamAggr must be called when stream aggr is no longer needed.
|
||||||
func InitStreamAggr() {
|
func InitStreamAggr() {
|
||||||
|
saCfgReloaderStopCh = make(chan struct{})
|
||||||
|
|
||||||
if *streamAggrConfig == "" {
|
if *streamAggrConfig == "" {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 20
|
sort: 29
|
||||||
weight: 20
|
weight: 29
|
||||||
title: Articles
|
title: Articles
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 20
|
weight: 29
|
||||||
aliases:
|
aliases:
|
||||||
- /Articles.html
|
- /Articles.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 23
|
sort: 32
|
||||||
weight: 23
|
weight: 32
|
||||||
title: VictoriaMetrics best practices
|
title: VictoriaMetrics best practices
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 23
|
weight: 32
|
||||||
aliases:
|
aliases:
|
||||||
- /BestPractices.html
|
- /BestPractices.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 16
|
sort: 25
|
||||||
weight: 16
|
weight: 25
|
||||||
title: CHANGELOG
|
title: CHANGELOG
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 16
|
weight: 25
|
||||||
aliases:
|
aliases:
|
||||||
- /CHANGELOG.html
|
- /CHANGELOG.html
|
||||||
---
|
---
|
||||||
|
@ -44,8 +44,10 @@ The sandbox cluster installation is running under the constant load generated by
|
||||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): improve repeated VMUI page load times by enabling caching of static js and css at web browser side according to [these recommendations](https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/).
|
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): improve repeated VMUI page load times by enabling caching of static js and css at web browser side according to [these recommendations](https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/).
|
||||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): show information about lines with bigger values at the top of the legend under the graph in order to simplify graph analysis.
|
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): show information about lines with bigger values at the top of the legend under the graph in order to simplify graph analysis.
|
||||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): reduce vertical space usage, so more information is visible on the screen without scrolling.
|
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): reduce vertical space usage, so more information is visible on the screen without scrolling.
|
||||||
|
* FEATURE: [vmalert-tool](https://docs.victoriametrics.com/#vmalert-tool): add `unittest` command to run unittest for alerting and recording rules. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4789) for details.
|
||||||
|
|
||||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): strip sensitive information such as auth headers or passwords from datasource, remote-read, remote-write or notifier URLs in log messages or UI. This behavior is by default and is controlled via `-datasource.showURL`, `-remoteRead.showURL`, `remoteWrite.showURL` or `-notifier.showURL` cmd-line flags. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5044).
|
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): strip sensitive information such as auth headers or passwords from datasource, remote-read, remote-write or notifier URLs in log messages or UI. This behavior is by default and is controlled via `-datasource.showURL`, `-remoteRead.showURL`, `remoteWrite.showURL` or `-notifier.showURL` cmd-line flags. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5044).
|
||||||
|
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): fix vmalert web UI when running on 32-bit architectures machine.
|
||||||
* BUGFIX: [vmselect](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): improve performance and memory usage during query processing on machines with big number of CPU cores. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5087) for details.
|
* BUGFIX: [vmselect](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): improve performance and memory usage during query processing on machines with big number of CPU cores. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5087) for details.
|
||||||
* BUGFIX: dashboards: fix vminsert/vmstorage/vmselect metrics filtering when dashboard is used to display data from many sub-clusters with unique job names. Before, only one specific job could have been accounted for component-specific panels, instead of all available jobs for the component.
|
* BUGFIX: dashboards: fix vminsert/vmstorage/vmselect metrics filtering when dashboard is used to display data from many sub-clusters with unique job names. Before, only one specific job could have been accounted for component-specific panels, instead of all available jobs for the component.
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 19
|
sort: 28
|
||||||
weight: 19
|
weight: 28
|
||||||
title: CHANGELOG for the year 2020
|
title: CHANGELOG for the year 2020
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 19
|
weight: 28
|
||||||
aliases:
|
aliases:
|
||||||
- /CHANGELOG.html
|
- /CHANGELOG.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 18
|
sort: 27
|
||||||
weight: 18
|
weight: 27
|
||||||
title: CHANGELOG for the year 2021
|
title: CHANGELOG for the year 2021
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 18
|
weight: 27
|
||||||
aliases:
|
aliases:
|
||||||
- /CHANGELOG.html
|
- /CHANGELOG.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 17
|
sort: 26
|
||||||
weight: 17
|
weight: 26
|
||||||
title: CHANGELOG for the year 2022
|
title: CHANGELOG for the year 2022
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 17
|
weight: 26
|
||||||
aliases:
|
aliases:
|
||||||
- /CHANGELOG.html
|
- /CHANGELOG.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 12
|
sort: 21
|
||||||
weight: 12
|
weight: 21
|
||||||
title: Case studies and talks
|
title: Case studies and talks
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 12
|
weight: 21
|
||||||
aliases:
|
aliases:
|
||||||
- /CaseStudies.html
|
- /CaseStudies.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 15
|
sort: 24
|
||||||
weight: 15
|
weight: 24
|
||||||
title: FAQ
|
title: FAQ
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 15
|
weight: 24
|
||||||
aliases:
|
aliases:
|
||||||
- /FAQ.html
|
- /FAQ.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 14
|
sort: 23
|
||||||
weight: 14
|
weight: 23
|
||||||
title: MetricsQL
|
title: MetricsQL
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 14
|
weight: 23
|
||||||
aliases:
|
aliases:
|
||||||
- /ExtendedPromQL.html
|
- /ExtendedPromQL.html
|
||||||
- /MetricsQL.html
|
- /MetricsQL.html
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 22
|
sort: 31
|
||||||
weight: 22
|
weight: 31
|
||||||
title: VictoriaMetrics Cluster Per Tenant Statistic
|
title: VictoriaMetrics Cluster Per Tenant Statistic
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 22
|
weight: 31
|
||||||
aliases:
|
aliases:
|
||||||
- /PerTenantStatistic.html
|
- /PerTenantStatistic.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 13
|
sort: 22
|
||||||
weight: 13
|
weight: 22
|
||||||
title: Quick start
|
title: Quick start
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 13
|
weight: 22
|
||||||
aliases:
|
aliases:
|
||||||
- /Quick-Start.html
|
- /Quick-Start.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 21
|
sort: 30
|
||||||
weight: 21
|
weight: 30
|
||||||
title: Release process guidance
|
title: Release process guidance
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 21
|
weight: 30
|
||||||
aliases:
|
aliases:
|
||||||
- /Release-Guide.html
|
- /Release-Guide.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 26
|
sort: 35
|
||||||
weight: 26
|
weight: 35
|
||||||
title: Troubleshooting
|
title: Troubleshooting
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 26
|
weight: 35
|
||||||
aliases:
|
aliases:
|
||||||
- /Troubleshooting.html
|
- /Troubleshooting.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -4,7 +4,7 @@ weight: 99
|
||||||
title: VictoriaMetrics Enterprise
|
title: VictoriaMetrics Enterprise
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 99
|
weight: 99
|
||||||
aliases:
|
aliases:
|
||||||
- /enterprise.html
|
- /enterprise.html
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 25
|
sort: 34
|
||||||
weight: 25
|
weight: 34
|
||||||
title: Key concepts
|
title: Key concepts
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 25
|
weight: 34
|
||||||
aliases:
|
aliases:
|
||||||
- /keyConcepts.html
|
- /keyConcepts.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 28
|
sort: 37
|
||||||
weight: 28
|
weight: 37
|
||||||
title: Relabeling cookbook
|
title: Relabeling cookbook
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 25
|
weight: 37
|
||||||
aliases:
|
aliases:
|
||||||
- /relabeling.html
|
- /relabeling.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 27
|
sort: 36
|
||||||
weight: 27
|
weight: 36
|
||||||
title: Prometheus service discovery
|
title: Prometheus service discovery
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 27
|
weight: 36
|
||||||
aliases:
|
aliases:
|
||||||
- /sd_configs.html
|
- /sd_configs.html
|
||||||
---
|
---
|
||||||
|
|
|
@ -4,7 +4,7 @@ weight: 98
|
||||||
title: Streaming aggregation
|
title: Streaming aggregation
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 98
|
weight: 98
|
||||||
aliases:
|
aliases:
|
||||||
- /stream-aggregation.html
|
- /stream-aggregation.html
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
---
|
---
|
||||||
sort: 24
|
sort: 33
|
||||||
weight: 24
|
weight: 33
|
||||||
title: VictoriaMetrics API examples
|
title: VictoriaMetrics API examples
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 24
|
weight: 33
|
||||||
---
|
---
|
||||||
|
|
||||||
# VictoriaMetrics API examples
|
# VictoriaMetrics API examples
|
||||||
|
|
253
docs/vmalert-tool.md
Normal file
253
docs/vmalert-tool.md
Normal file
|
@ -0,0 +1,253 @@
|
||||||
|
---
|
||||||
|
sort: 12
|
||||||
|
weight: 12
|
||||||
|
menu:
|
||||||
|
docs:
|
||||||
|
parent: 'victoriametrics'
|
||||||
|
weight: 12
|
||||||
|
title: vmalert-tool
|
||||||
|
---
|
||||||
|
|
||||||
|
# vmalert-tool
|
||||||
|
|
||||||
|
VMAlert command-line tool
|
||||||
|
|
||||||
|
## Unit testing for rules
|
||||||
|
|
||||||
|
You can use `vmalert-tool` to run unit tests for alerting and recording rules.
|
||||||
|
It will perform the following actions:
|
||||||
|
* sets up an isolated VictoriaMetrics instance;
|
||||||
|
* simulates the periodic ingestion of time series;
|
||||||
|
* queries the ingested data for recording and alerting rules evaluation like [vmalert](https://docs.victoriametrics.com/vmalert.html);
|
||||||
|
* checks whether the firing alerts or resulting recording rules match the expected results.
|
||||||
|
|
||||||
|
See how to run vmalert-tool for unit test below:
|
||||||
|
```
|
||||||
|
# Run vmalert-tool with one or multiple test files via --files cmd-line flag
|
||||||
|
./vmalert-tool unittest --files test1.yaml --files test2.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
vmalert-tool unittest is compatible with [Prometheus config format for tests](https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/#test-file-format)
|
||||||
|
except `promql_expr_test` field. Use `metricsql_expr_test` field name instead. The name is different because vmalert-tool
|
||||||
|
validates and executes [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html) expressions,
|
||||||
|
which aren't always backward compatible with [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
|
||||||
|
|
||||||
|
### Test file format
|
||||||
|
|
||||||
|
The configuration format for files specified in `--files` cmd-line flag is the following:
|
||||||
|
```
|
||||||
|
# Path to the files or http url containing [rule groups](https://docs.victoriametrics.com/vmalert.html#groups) configuration.
|
||||||
|
# Enterprise version of vmalert-tool supports S3 and GCS paths to rules.
|
||||||
|
rule_files:
|
||||||
|
[ - <string> ]
|
||||||
|
|
||||||
|
# The evaluation interval for rules specified in `rule_files`
|
||||||
|
[ evaluation_interval: <duration> | default = 1m ]
|
||||||
|
|
||||||
|
# Groups listed below will be evaluated by order.
|
||||||
|
# Not All the groups need not be mentioned, if not, they will be evaluated by define order in rule_files.
|
||||||
|
group_eval_order:
|
||||||
|
[ - <string> ]
|
||||||
|
|
||||||
|
# The list of unit test files to be checked during evaluation.
|
||||||
|
tests:
|
||||||
|
[ - <test_group> ]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<test_group>`
|
||||||
|
|
||||||
|
```
|
||||||
|
# Interval between samples for input series
|
||||||
|
interval: <duration>
|
||||||
|
# Time series to persist into the database according to configured <interval> before running tests.
|
||||||
|
input_series:
|
||||||
|
[ - <series> ]
|
||||||
|
|
||||||
|
# Name of the test group, optional
|
||||||
|
[ name: <string> ]
|
||||||
|
|
||||||
|
# Unit tests for alerting rules
|
||||||
|
alert_rule_test:
|
||||||
|
[ - <alert_test_case> ]
|
||||||
|
|
||||||
|
# Unit tests for Metricsql expressions.
|
||||||
|
metricsql_expr_test:
|
||||||
|
[ - <metricsql_expr_test> ]
|
||||||
|
|
||||||
|
# External labels accessible for templating.
|
||||||
|
external_labels:
|
||||||
|
[ <labelname>: <string> ... ]
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<series>`
|
||||||
|
|
||||||
|
```
|
||||||
|
# series in the following format '<metric name>{<label name>=<label value>, ...}'
|
||||||
|
# Examples:
|
||||||
|
# series_name{label1="value1", label2="value2"}
|
||||||
|
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||||
|
series: <string>
|
||||||
|
|
||||||
|
# values support several special equations:
|
||||||
|
# 'a+bxc' becomes 'a a+b a+(2*b) a+(3*b) … a+(c*b)'
|
||||||
|
# Read this as series starts at a, then c further samples incrementing by b.
|
||||||
|
# 'a-bxc' becomes 'a a-b a-(2*b) a-(3*b) … a-(c*b)'
|
||||||
|
# Read this as series starts at a, then c further samples decrementing by b (or incrementing by negative b).
|
||||||
|
# '_' represents a missing sample from scrape
|
||||||
|
# 'stale' indicates a stale sample
|
||||||
|
# Examples:
|
||||||
|
# 1. '-2+4x3' becomes '-2 2 6 10' - series starts at -2, then 3 further samples incrementing by 4.
|
||||||
|
# 2. ' 1-2x4' becomes '1 -1 -3 -5 -7' - series starts at 1, then 4 further samples decrementing by 2.
|
||||||
|
# 3. ' 1x4' becomes '1 1 1 1 1' - shorthand for '1+0x4', series starts at 1, then 4 further samples incrementing by 0.
|
||||||
|
# 4. ' 1 _x3 stale' becomes '1 _ _ _ stale' - the missing sample cannot increment, so 3 missing samples are produced by the '_x3' expression.
|
||||||
|
values: <string>
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<alert_test_case>`
|
||||||
|
|
||||||
|
vmalert by default adds `alertgroup` and `alertname` to the generated alerts and time series.
|
||||||
|
So you will need to specify both `groupname` and `alertname` under a single `<alert_test_case>`,
|
||||||
|
but no need to add them under `exp_alerts`.
|
||||||
|
You can also pass `--disableAlertgroupLabel` to skip `alertgroup` check.
|
||||||
|
|
||||||
|
```
|
||||||
|
# The time elapsed from time=0s when this alerting rule should be checked.
|
||||||
|
# Means this rule should be firing at this point, or shouldn't be firing if 'exp_alerts' is empty.
|
||||||
|
eval_time: <duration>
|
||||||
|
|
||||||
|
# Name of the group name to be tested.
|
||||||
|
groupname: <string>
|
||||||
|
|
||||||
|
# Name of the alert to be tested.
|
||||||
|
alertname: <string>
|
||||||
|
|
||||||
|
# List of the expected alerts that are firing under the given alertname at
|
||||||
|
# the given evaluation time. If you want to test if an alerting rule should
|
||||||
|
# not be firing, then you can mention only the fields above and leave 'exp_alerts' empty.
|
||||||
|
exp_alerts:
|
||||||
|
[ - <alert> ]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<alert>`
|
||||||
|
|
||||||
|
```
|
||||||
|
# These are the expanded labels and annotations of the expected alert.
|
||||||
|
# Note: labels also include the labels of the sample associated with the alert
|
||||||
|
exp_labels:
|
||||||
|
[ <labelname>: <string> ]
|
||||||
|
exp_annotations:
|
||||||
|
[ <labelname>: <string> ]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<metricsql_expr_test>`
|
||||||
|
|
||||||
|
```
|
||||||
|
# Expression to evaluate
|
||||||
|
expr: <string>
|
||||||
|
|
||||||
|
# The time elapsed from time=0s when this expression be evaluated.
|
||||||
|
eval_time: <duration>
|
||||||
|
|
||||||
|
# Expected samples at the given evaluation time.
|
||||||
|
exp_samples:
|
||||||
|
[ - <sample> ]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `<sample>`
|
||||||
|
|
||||||
|
```
|
||||||
|
# Labels of the sample in usual series notation '<metric name>{<label name>=<label value>, ...}'
|
||||||
|
# Examples:
|
||||||
|
# series_name{label1="value1", label2="value2"}
|
||||||
|
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||||
|
labels: <string>
|
||||||
|
|
||||||
|
# The expected value of the Metricsql expression.
|
||||||
|
value: <number>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example
|
||||||
|
|
||||||
|
This is an example input file for unit testing which will pass.
|
||||||
|
`test.yaml` is the test file which follows the syntax above and `alerts.yaml` contains the alerting rules.
|
||||||
|
|
||||||
|
With `rules.yaml` in the same directory, run `./vmalert-tool unittest --files=./unittest/testdata/test.yaml`.
|
||||||
|
|
||||||
|
#### `test.yaml`
|
||||||
|
|
||||||
|
```
|
||||||
|
rule_files:
|
||||||
|
- rules.yaml
|
||||||
|
|
||||||
|
evaluation_interval: 1m
|
||||||
|
|
||||||
|
tests:
|
||||||
|
- interval: 1m
|
||||||
|
input_series:
|
||||||
|
- series: 'up{job="prometheus", instance="localhost:9090"}'
|
||||||
|
values: "0+0x1440"
|
||||||
|
|
||||||
|
metricsql_expr_test:
|
||||||
|
- expr: suquery_interval_test
|
||||||
|
eval_time: 4m
|
||||||
|
exp_samples:
|
||||||
|
- labels: '{__name__="suquery_interval_test", datacenter="dc-123", instance="localhost:9090", job="prometheus"}'
|
||||||
|
value: 1
|
||||||
|
|
||||||
|
alert_rule_test:
|
||||||
|
- eval_time: 2h
|
||||||
|
groupname: group1
|
||||||
|
alertname: InstanceDown
|
||||||
|
exp_alerts:
|
||||||
|
- exp_labels:
|
||||||
|
job: prometheus
|
||||||
|
severity: page
|
||||||
|
instance: localhost:9090
|
||||||
|
datacenter: dc-123
|
||||||
|
exp_annotations:
|
||||||
|
summary: "Instance localhost:9090 down"
|
||||||
|
description: "localhost:9090 of job prometheus has been down for more than 5 minutes."
|
||||||
|
|
||||||
|
- eval_time: 0
|
||||||
|
groupname: group1
|
||||||
|
alertname: AlwaysFiring
|
||||||
|
exp_alerts:
|
||||||
|
- exp_labels:
|
||||||
|
datacenter: dc-123
|
||||||
|
|
||||||
|
- eval_time: 0
|
||||||
|
groupname: group1
|
||||||
|
alertname: InstanceDown
|
||||||
|
exp_alerts: []
|
||||||
|
|
||||||
|
external_labels:
|
||||||
|
datacenter: dc-123
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `alerts.yaml`
|
||||||
|
|
||||||
|
```
|
||||||
|
# This is the rules file.
|
||||||
|
|
||||||
|
groups:
|
||||||
|
- name: group1
|
||||||
|
rules:
|
||||||
|
- alert: InstanceDown
|
||||||
|
expr: up == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: page
|
||||||
|
annotations:
|
||||||
|
summary: "Instance {{ $labels.instance }} down"
|
||||||
|
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
|
||||||
|
- alert: AlwaysFiring
|
||||||
|
expr: 1
|
||||||
|
|
||||||
|
- name: group2
|
||||||
|
rules:
|
||||||
|
- record: job:test:count_over_time1m
|
||||||
|
expr: sum without(instance) (count_over_time(test[1m]))
|
||||||
|
- record: suquery_interval_test
|
||||||
|
expr: count_over_time(up[5m:])
|
||||||
|
```
|
|
@ -765,6 +765,11 @@ See full description for these flags in `./vmalert -help`.
|
||||||
* `limit` group's param has no effect during replay (might be changed in future);
|
* `limit` group's param has no effect during replay (might be changed in future);
|
||||||
* `keep_firing_for` alerting rule param has no effect during replay (might be changed in future).
|
* `keep_firing_for` alerting rule param has no effect during replay (might be changed in future).
|
||||||
|
|
||||||
|
## Unit Testing for Rules
|
||||||
|
|
||||||
|
You can use `vmalert-tool` to test your alerting and recording rules like [promtool does](https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/).
|
||||||
|
See more details [here](https://docs.victoriametrics.com/vmalert-tool.html#Unit-testing-for-rules).
|
||||||
|
|
||||||
## Monitoring
|
## Monitoring
|
||||||
|
|
||||||
`vmalert` exports various metrics in Prometheus exposition format at `http://vmalert-host:8880/metrics` page.
|
`vmalert` exports various metrics in Prometheus exposition format at `http://vmalert-host:8880/metrics` page.
|
||||||
|
|
|
@ -4,7 +4,7 @@ weight: 11
|
||||||
title: vmanomaly
|
title: vmanomaly
|
||||||
menu:
|
menu:
|
||||||
docs:
|
docs:
|
||||||
parent: "victoriametrics"
|
parent: 'victoriametrics'
|
||||||
weight: 11
|
weight: 11
|
||||||
aliases:
|
aliases:
|
||||||
- /vmanomaly.html
|
- /vmanomaly.html
|
||||||
|
|
|
@ -53,3 +53,11 @@ func ParseDuration(s string) (time.Duration, error) {
|
||||||
}
|
}
|
||||||
return time.Duration(ms) * time.Millisecond, nil
|
return time.Duration(ms) * time.Millisecond, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ParseTime returns time for pd.
|
||||||
|
func (pd *Duration) ParseTime() time.Time {
|
||||||
|
if pd == nil {
|
||||||
|
return time.Time{}
|
||||||
|
}
|
||||||
|
return time.UnixMilli(pd.Duration().Milliseconds())
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue