Merge branch 'master' into add-IgnoreFirstSampleInterval-streamaggr

This commit is contained in:
Ivan Yurochko 2024-11-21 15:12:56 +01:00 committed by GitHub
commit e080bdeb6e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
393 changed files with 24240 additions and 5274 deletions

View file

@ -60,8 +60,8 @@ body:
For VictoriaMetrics health-state issues please provide full-length screenshots
of Grafana dashboards if possible:
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229/)
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176/)
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229)
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176)
See how to setup monitoring here:
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)

View file

@ -88,6 +88,35 @@ jobs:
run: make ${{ matrix.scenario}}
- name: Publish coverage
uses: codecov/codecov-action@v4
uses: codecov/codecov-action@v5
with:
file: ./coverage.txt
integration-test:
name: integration-test
needs: [lint, test]
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v4
- name: Setup Go
id: go
uses: actions/setup-go@v5
with:
cache: false
go-version: stable
- name: Cache Go artifacts
uses: actions/cache@v4
with:
path: |
~/.cache/go-build
~/go/bin
~/go/pkg/mod
key: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-
- name: Run integration tests
run: make integration-test

View file

@ -501,10 +501,12 @@ pprof-cpu:
fmt:
gofmt -l -w -s ./lib
gofmt -l -w -s ./app
gofmt -l -w -s ./apptest
vet:
go vet ./lib/...
go vet ./app/...
go vet ./apptest/...
check-all: fmt vet golangci-lint govulncheck
@ -525,6 +527,9 @@ test-full:
test-full-386:
DISABLE_FSYNC_FOR_TESTING=1 GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
integration-test: victoria-metrics vmagent vmalert vmauth
go test ./apptest/... -skip="^TestCluster.*"
benchmark:
go test -bench=. ./lib/...
go test -bench=. ./app/...

View file

@ -23,7 +23,7 @@ Here are some resources and information about VictoriaMetrics:
- Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), [Docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
- Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/enterprise/)
- Changelog: [CHANGELOG](https://docs.victoriametrics.com/changelog/), and [How to upgrade](https://docs.victoriametrics.com/#how-to-upgrade-victoriametrics)
- Community: [Slack](https://slack.victoriametrics.com/), [Twitter](https://twitter.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
- Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.
@ -84,7 +84,7 @@ Some good benchmarks VictoriaMetrics achieved:
Feel free asking any questions regarding VictoriaMetrics:
* [Slack Inviter](https://slack.victoriametrics.com/) and [Slack channel](https://victoriametrics.slack.com/)
* [Twitter](https://twitter.com/VictoriaMetrics/)
* [X (Twitter)](https://x.com/VictoriaMetrics/)
* [Linkedin](https://www.linkedin.com/company/victoriametrics/)
* [Reddit](https://www.reddit.com/r/VictoriaMetrics/)
* [Telegram-en](https://t.me/VictoriaMetrics_en)
@ -122,4 +122,4 @@ The provided [ZIP file](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/
* Do not modify the spacing, alignment, or positioning of design elements.
* You may resize the logo as needed, but ensure all proportions remain intact.
Thank you for your cooperation!
Thank you for your cooperation!

View file

@ -2,10 +2,10 @@
"name": "subquery-aggregation",
"issue": "https://github.com/VictoriaMetrics/VictoriaMetrics/issues/184",
"data": [
"forms_daily_count;item=x 1 {TIME_S-1m}",
"forms_daily_count;item=x 2 {TIME_S-2m}",
"forms_daily_count;item=y 3 {TIME_S-1m}",
"forms_daily_count;item=y 4 {TIME_S-2m}"],
"forms_daily_count;item=x 1 {TIME_S-59s}",
"forms_daily_count;item=x 2 {TIME_S-1m59s}",
"forms_daily_count;item=y 3 {TIME_S-59s}",
"forms_daily_count;item=y 4 {TIME_S-1m59s}"],
"query": ["/api/v1/query?query=min%20by%20(item)%20(min_over_time(forms_daily_count[10m:1m]))&time={TIME_S-1m}&latency_offset=1ms"],
"result_query": {
"status":"success",

View file

@ -0,0 +1,185 @@
package datadog
import (
"bytes"
"fmt"
"io"
"net/http"
"strconv"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fastjson"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
)
var parserPool fastjson.ParserPool
// RequestHandler processes Datadog insert requests
func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
switch path {
case "/api/v1/validate":
fmt.Fprintf(w, `{}`)
return true
case "/api/v2/logs":
return datadogLogsIngestion(w, r)
default:
return false
}
}
func datadogLogsIngestion(w http.ResponseWriter, r *http.Request) bool {
w.Header().Add("Content-Type", "application/json")
startTime := time.Now()
v2LogsRequestsTotal.Inc()
reader := r.Body
var ts int64
if tsValue := r.Header.Get("dd-message-timestamp"); tsValue != "" && tsValue != "0" {
var err error
ts, err = strconv.ParseInt(tsValue, 10, 64)
if err != nil {
httpserver.Errorf(w, r, "could not parse dd-message-timestamp header value: %s", err)
return true
}
ts *= 1e6
} else {
ts = startTime.UnixNano()
}
if r.Header.Get("Content-Encoding") == "gzip" {
zr, err := common.GetGzipReader(reader)
if err != nil {
httpserver.Errorf(w, r, "cannot read gzipped logs request: %s", err)
return true
}
defer common.PutGzipReader(zr)
reader = zr
}
wcr := writeconcurrencylimiter.GetReader(reader)
data, err := io.ReadAll(wcr)
writeconcurrencylimiter.PutReader(wcr)
if err != nil {
httpserver.Errorf(w, r, "cannot read request body: %s", err)
return true
}
cp, err := insertutils.GetCommonParams(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
if err := vlstorage.CanWriteData(); err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
lmp := cp.NewLogMessageProcessor()
n, err := readLogsRequest(ts, data, lmp.AddRow)
lmp.MustClose()
if n > 0 {
rowsIngestedTotal.Add(n)
}
if err != nil {
logger.Warnf("cannot decode log message in /api/v2/logs request: %s, stream fields: %s", err, cp.StreamFields)
return true
}
// update v2LogsRequestDuration only for successfully parsed requests
// There is no need in updating v2LogsRequestDuration for request errors,
// since their timings are usually much smaller than the timing for successful request parsing.
v2LogsRequestDuration.UpdateDuration(startTime)
fmt.Fprintf(w, `{}`)
return true
}
var (
v2LogsRequestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/datadog/api/v2/logs"}`)
rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="datadog"}`)
v2LogsRequestDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/datadog/api/v2/logs"}`)
)
// readLogsRequest parses data according to DataDog logs format
// https://docs.datadoghq.com/api/latest/logs/#send-logs
func readLogsRequest(ts int64, data []byte, processLogMessage func(int64, []logstorage.Field)) (int, error) {
p := parserPool.Get()
defer parserPool.Put(p)
v, err := p.ParseBytes(data)
if err != nil {
return 0, fmt.Errorf("cannot parse JSON request body: %w", err)
}
records, err := v.Array()
if err != nil {
return 0, fmt.Errorf("cannot extract array from parsed JSON: %w", err)
}
var fields []logstorage.Field
for m, r := range records {
o, err := r.Object()
if err != nil {
return m + 1, fmt.Errorf("could not extract log record: %w", err)
}
o.Visit(func(k []byte, v *fastjson.Value) {
if err != nil {
return
}
val, e := v.StringBytes()
if e != nil {
err = fmt.Errorf("unexpected label value type for %q:%q; want string", k, v)
return
}
switch string(k) {
case "message":
fields = append(fields, logstorage.Field{
Name: "_msg",
Value: bytesutil.ToUnsafeString(val),
})
case "ddtags":
// https://docs.datadoghq.com/getting_started/tagging/
var pair []byte
idx := 0
for idx >= 0 {
idx = bytes.IndexByte(val, ',')
if idx < 0 {
pair = val
} else {
pair = val[:idx]
val = val[idx+1:]
}
if len(pair) > 0 {
n := bytes.IndexByte(pair, ':')
if n < 0 {
// No tag value.
fields = append(fields, logstorage.Field{
Name: bytesutil.ToUnsafeString(pair),
Value: "no_label_value",
})
}
fields = append(fields, logstorage.Field{
Name: bytesutil.ToUnsafeString(pair[:n]),
Value: bytesutil.ToUnsafeString(pair[n+1:]),
})
}
}
default:
fields = append(fields, logstorage.Field{
Name: bytesutil.ToUnsafeString(k),
Value: bytesutil.ToUnsafeString(val),
})
}
})
processLogMessage(ts, fields)
fields = fields[:0]
}
return len(records), nil
}

View file

@ -0,0 +1,117 @@
package datadog
import (
"fmt"
"strings"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
)
func TestReadLogsRequestFailure(t *testing.T) {
f := func(data string) {
t.Helper()
ts := time.Now().UnixNano()
processLogMessage := func(timestamp int64, fields []logstorage.Field) {
t.Fatalf("unexpected call to processLogMessage with timestamp=%d, fields=%s", timestamp, fields)
}
rows, err := readLogsRequest(ts, []byte(data), processLogMessage)
if err == nil {
t.Fatalf("expecting non-empty error")
}
if rows != 0 {
t.Fatalf("unexpected non-zero rows=%d", rows)
}
}
f("foobar")
f(`{}`)
f(`["create":{}]`)
f(`{"create":{}}
foobar`)
}
func TestReadLogsRequestSuccess(t *testing.T) {
f := func(data string, rowsExpected int, resultExpected string) {
t.Helper()
ts := time.Now().UnixNano()
var result string
processLogMessage := func(_ int64, fields []logstorage.Field) {
a := make([]string, len(fields))
for i, f := range fields {
a[i] = fmt.Sprintf("%q:%q", f.Name, f.Value)
}
if len(result) > 0 {
result = result + "\n"
}
s := "{" + strings.Join(a, ",") + "}"
result += s
}
// Read the request without compression
rows, err := readLogsRequest(ts, []byte(data), processLogMessage)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if rows != rowsExpected {
t.Fatalf("unexpected rows read; got %d; want %d", rows, rowsExpected)
}
if result != resultExpected {
t.Fatalf("unexpected result;\ngot\n%s\nwant\n%s", result, resultExpected)
}
}
// Verify non-empty data
data := `[
{
"ddsource":"nginx",
"ddtags":"tag1:value1,tag2:value2",
"hostname":"127.0.0.1",
"message":"bar",
"service":"test"
}, {
"ddsource":"nginx",
"ddtags":"tag1:value1,tag2:value2",
"hostname":"127.0.0.1",
"message":"foobar",
"service":"test"
}, {
"ddsource":"nginx",
"ddtags":"tag1:value1,tag2:value2",
"hostname":"127.0.0.1",
"message":"baz",
"service":"test"
}, {
"ddsource":"nginx",
"ddtags":"tag1:value1,tag2:value2",
"hostname":"127.0.0.1",
"message":"xyz",
"service":"test"
}, {
"ddsource": "nginx",
"ddtags":"tag1:value1,tag2:value2,",
"hostname":"127.0.0.1",
"message":"xyz",
"service":"test"
}, {
"ddsource":"nginx",
"ddtags":",tag1:value1,tag2:value2",
"hostname":"127.0.0.1",
"message":"xyz",
"service":"test"
}
]`
rowsExpected := 6
resultExpected := `{"ddsource":"nginx","tag1":"value1","tag2":"value2","hostname":"127.0.0.1","_msg":"bar","service":"test"}
{"ddsource":"nginx","tag1":"value1","tag2":"value2","hostname":"127.0.0.1","_msg":"foobar","service":"test"}
{"ddsource":"nginx","tag1":"value1","tag2":"value2","hostname":"127.0.0.1","_msg":"baz","service":"test"}
{"ddsource":"nginx","tag1":"value1","tag2":"value2","hostname":"127.0.0.1","_msg":"xyz","service":"test"}
{"ddsource":"nginx","tag1":"value1","tag2":"value2","hostname":"127.0.0.1","_msg":"xyz","service":"test"}
{"ddsource":"nginx","tag1":"value1","tag2":"value2","hostname":"127.0.0.1","_msg":"xyz","service":"test"}`
f(data, rowsExpected, resultExpected)
}

View file

@ -103,7 +103,7 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
}
lmp := cp.NewLogMessageProcessor()
isGzip := r.Header.Get("Content-Encoding") == "gzip"
n, err := readBulkRequest(r.Body, isGzip, cp.TimeField, cp.MsgField, lmp)
n, err := readBulkRequest(r.Body, isGzip, cp.TimeField, cp.MsgFields, lmp)
lmp.MustClose()
if err != nil {
logger.Warnf("cannot decode log message #%d in /_bulk request: %s, stream fields: %s", n, err, cp.StreamFields)
@ -133,7 +133,7 @@ var (
bulkRequestDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/elasticsearch/_bulk"}`)
)
func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string, lmp insertutils.LogMessageProcessor) (int, error) {
func readBulkRequest(r io.Reader, isGzip bool, timeField string, msgFields []string, lmp insertutils.LogMessageProcessor) (int, error) {
// See https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
if isGzip {
@ -158,7 +158,7 @@ func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string, lmp i
n := 0
nCheckpoint := 0
for {
ok, err := readBulkLine(sc, timeField, msgField, lmp)
ok, err := readBulkLine(sc, timeField, msgFields, lmp)
wcr.DecConcurrency()
if err != nil || !ok {
rowsIngestedTotal.Add(n - nCheckpoint)
@ -174,7 +174,7 @@ func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string, lmp i
var lineBufferPool bytesutil.ByteBufferPool
func readBulkLine(sc *bufio.Scanner, timeField, msgField string, lmp insertutils.LogMessageProcessor) (bool, error) {
func readBulkLine(sc *bufio.Scanner, timeField string, msgFields []string, lmp insertutils.LogMessageProcessor) (bool, error) {
var line []byte
// Read the command, must be "create" or "index"
@ -219,7 +219,7 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string, lmp insertutils
if ts == 0 {
ts = time.Now().UnixNano()
}
logstorage.RenameField(p.Fields, msgField, "_msg")
logstorage.RenameField(p.Fields, msgFields, "_msg")
lmp.AddRow(ts, p.Fields)
logstorage.PutJSONParser(p)

View file

@ -15,7 +15,7 @@ func TestReadBulkRequest_Failure(t *testing.T) {
tlp := &insertutils.TestLogMessageProcessor{}
r := bytes.NewBufferString(data)
rows, err := readBulkRequest(r, false, "_time", "_msg", tlp)
rows, err := readBulkRequest(r, false, "_time", []string{"_msg"}, tlp)
if err == nil {
t.Fatalf("expecting non-empty error")
}
@ -36,11 +36,12 @@ func TestReadBulkRequest_Success(t *testing.T) {
f := func(data, timeField, msgField string, rowsExpected int, timestampsExpected []int64, resultExpected string) {
t.Helper()
msgFields := []string{"non_existing_foo", msgField, "non_exiting_bar"}
tlp := &insertutils.TestLogMessageProcessor{}
// Read the request without compression
r := bytes.NewBufferString(data)
rows, err := readBulkRequest(r, false, timeField, msgField, tlp)
rows, err := readBulkRequest(r, false, timeField, msgFields, tlp)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
@ -55,7 +56,7 @@ func TestReadBulkRequest_Success(t *testing.T) {
tlp = &insertutils.TestLogMessageProcessor{}
compressedData := compressData(data)
r = bytes.NewBufferString(compressedData)
rows, err = readBulkRequest(r, true, timeField, msgField, tlp)
rows, err = readBulkRequest(r, true, timeField, msgFields, tlp)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}

View file

@ -32,7 +32,7 @@ func benchmarkReadBulkRequest(b *testing.B, isGzip bool) {
dataBytes := bytesutil.ToUnsafeBytes(data)
timeField := "@timestamp"
msgField := "message"
msgFields := []string{"message"}
blp := &insertutils.BenchmarkLogMessageProcessor{}
b.ReportAllocs()
@ -41,7 +41,7 @@ func benchmarkReadBulkRequest(b *testing.B, isGzip bool) {
r := &bytes.Reader{}
for pb.Next() {
r.Reset(dataBytes)
_, err := readBulkRequest(r, isGzip, timeField, msgField, blp)
_, err := readBulkRequest(r, isGzip, timeField, msgFields, blp)
if err != nil {
panic(fmt.Errorf("unexpected error: %w", err))
}

View file

@ -1,7 +1,10 @@
package insertutils
import (
"flag"
"fmt"
"net/http"
"strconv"
"strings"
"sync"
"time"
@ -16,15 +19,21 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
)
var (
defaultMsgValue = flag.String("defaultMsgValue", "missing _msg field; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field",
"Default value for _msg field if the ingested log entry doesn't contain it; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field")
)
// CommonParams contains common HTTP parameters used by log ingestion APIs.
//
// See https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters
type CommonParams struct {
TenantID logstorage.TenantID
TimeField string
MsgField string
MsgFields []string
StreamFields []string
IgnoreFields []string
ExtraFields []logstorage.Field
Debug bool
DebugRequestURI string
@ -39,44 +48,25 @@ func GetCommonParams(r *http.Request) (*CommonParams, error) {
return nil, err
}
// Extract time field name from _time_field query arg or header
timeField := "_time"
if tf := r.FormValue("_time_field"); tf != "" {
timeField = tf
} else if tf = r.Header.Get("VL-Time-Field"); tf != "" {
if tf := httputils.GetRequestValue(r, "_time_field", "VL-Time-Field"); tf != "" {
timeField = tf
}
// Extract message field name from _msg_field query arg or header
msgField := ""
if msgf := r.FormValue("_msg_field"); msgf != "" {
msgField = msgf
} else if msgf = r.Header.Get("VL-Msg-Field"); msgf != "" {
msgField = msgf
msgFields := httputils.GetArray(r, "_msg_field", "VL-Msg-Field")
streamFields := httputils.GetArray(r, "_stream_fields", "VL-Stream-Fields")
ignoreFields := httputils.GetArray(r, "ignore_fields", "VL-Ignore-Fields")
extraFields, err := getExtraFields(r)
if err != nil {
return nil, err
}
streamFields := httputils.GetArray(r, "_stream_fields")
if len(streamFields) == 0 {
if sf := r.Header.Get("VL-Stream-Fields"); len(sf) > 0 {
streamFields = strings.Split(sf, ",")
}
}
ignoreFields := httputils.GetArray(r, "ignore_fields")
if len(ignoreFields) == 0 {
if f := r.Header.Get("VL-Ignore-Fields"); len(f) > 0 {
ignoreFields = strings.Split(f, ",")
}
}
debug := httputils.GetBool(r, "debug")
if !debug {
if dh := r.Header.Get("VL-Debug"); len(dh) > 0 {
hv := strings.ToLower(dh)
switch hv {
case "", "0", "f", "false", "no":
default:
debug = true
}
debug := false
if dv := httputils.GetRequestValue(r, "debug", "VL-Debug"); dv != "" {
debug, err = strconv.ParseBool(dv)
if err != nil {
return nil, fmt.Errorf("cannot parse debug=%q: %w", dv, err)
}
}
debugRequestURI := ""
@ -89,9 +79,10 @@ func GetCommonParams(r *http.Request) (*CommonParams, error) {
cp := &CommonParams{
TenantID: tenantID,
TimeField: timeField,
MsgField: msgField,
MsgFields: msgFields,
StreamFields: streamFields,
IgnoreFields: ignoreFields,
ExtraFields: extraFields,
Debug: debug,
DebugRequestURI: debugRequestURI,
DebugRemoteAddr: debugRemoteAddr,
@ -100,18 +91,45 @@ func GetCommonParams(r *http.Request) (*CommonParams, error) {
return cp, nil
}
func getExtraFields(r *http.Request) ([]logstorage.Field, error) {
efs := httputils.GetArray(r, "extra_fields", "VL-Extra-Fields")
if len(efs) == 0 {
return nil, nil
}
extraFields := make([]logstorage.Field, len(efs))
for i, ef := range efs {
n := strings.Index(ef, "=")
if n <= 0 || n == len(ef)-1 {
return nil, fmt.Errorf(`invalid extra_field format: %q; must be in the form "field=value"`, ef)
}
extraFields[i] = logstorage.Field{
Name: ef[:n],
Value: ef[n+1:],
}
}
return extraFields, nil
}
// GetCommonParamsForSyslog returns common params needed for parsing syslog messages and storing them to the given tenantID.
func GetCommonParamsForSyslog(tenantID logstorage.TenantID) *CommonParams {
func GetCommonParamsForSyslog(tenantID logstorage.TenantID, streamFields, ignoreFields []string, extraFields []logstorage.Field) *CommonParams {
// See https://docs.victoriametrics.com/victorialogs/logsql/#unpack_syslog-pipe
cp := &CommonParams{
TenantID: tenantID,
TimeField: "timestamp",
MsgField: "message",
StreamFields: []string{
if streamFields == nil {
streamFields = []string{
"hostname",
"app_name",
"proc_id",
}
}
cp := &CommonParams{
TenantID: tenantID,
TimeField: "timestamp",
MsgFields: []string{
"message",
},
StreamFields: streamFields,
IgnoreFields: ignoreFields,
ExtraFields: extraFields,
}
return cp
@ -176,22 +194,6 @@ func (lmp *logMessageProcessor) AddRow(timestamp int64, fields []logstorage.Fiel
return
}
// _msg field must be non-empty according to VictoriaLogs data model.
// See https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field
msgExist := false
for i := range fields {
if fields[i].Name == "_msg" {
msgExist = len(fields[i].Value) > 0
break
}
}
if !msgExist {
rf := logstorage.RowFormatter(fields)
logger.Warnf("dropping log line without _msg field; %s", rf)
rowsDroppedTotalMsgNotValid.Inc()
return
}
lmp.lr.MustAdd(lmp.cp.TenantID, timestamp, fields)
if lmp.cp.Debug {
s := lmp.lr.GetRowString(0)
@ -226,7 +228,7 @@ func (lmp *logMessageProcessor) MustClose() {
//
// MustClose() must be called on the returned LogMessageProcessor when it is no longer needed.
func (cp *CommonParams) NewLogMessageProcessor() LogMessageProcessor {
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields, cp.ExtraFields, *defaultMsgValue)
lmp := &logMessageProcessor{
cp: cp,
lr: lr,
@ -241,5 +243,4 @@ func (cp *CommonParams) NewLogMessageProcessor() LogMessageProcessor {
var (
rowsDroppedTotalDebug = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)
rowsDroppedTotalTooManyFields = metrics.NewCounter(`vl_rows_dropped_total{reason="too_many_fields"}`)
rowsDroppedTotalMsgNotValid = metrics.NewCounter(`vl_rows_dropped_total{reason="msg_not_exist"}`)
)

View file

@ -0,0 +1,256 @@
package journald
import (
"bytes"
"encoding/binary"
"flag"
"fmt"
"io"
"net/http"
"regexp"
"slices"
"strconv"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
const (
journaldEntryMaxNameLen = 64
)
var (
bodyBufferPool bytesutil.ByteBufferPool
allowedJournaldEntryNameChars = regexp.MustCompile(`^[A-Z_][A-Z0-9_]+`)
)
var (
journaldStreamFields = flagutil.NewArrayString("journald.streamFields", "Journal fields to be used as stream fields. "+
"See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html.")
journaldIgnoreFields = flagutil.NewArrayString("journald.ignoreFields", "Journal fields to ignore. "+
"See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html.")
journaldTimeField = flag.String("journald.timeField", "__REALTIME_TIMESTAMP", "Journal field to be used as time field. "+
"See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html.")
journaldTenantID = flag.String("journald.tenantID", "0:0", "TenantID for logs ingested via the Journald endpoint.")
journaldIncludeEntryMetadata = flag.Bool("journald.includeEntryMetadata", false, "Include journal entry fields, which with double underscores.")
)
func getCommonParams(r *http.Request) (*insertutils.CommonParams, error) {
cp, err := insertutils.GetCommonParams(r)
if err != nil {
return nil, err
}
if cp.TenantID.AccountID == 0 && cp.TenantID.ProjectID == 0 {
tenantID, err := logstorage.ParseTenantID(*journaldTenantID)
if err != nil {
return nil, fmt.Errorf("cannot parse -journald.tenantID=%q for journald: %w", *journaldTenantID, err)
}
cp.TenantID = tenantID
}
if cp.TimeField != "" {
cp.TimeField = *journaldTimeField
}
if len(cp.StreamFields) == 0 {
cp.StreamFields = *journaldStreamFields
}
if len(cp.IgnoreFields) == 0 {
cp.IgnoreFields = *journaldIgnoreFields
}
cp.MsgFields = []string{"MESSAGE"}
return cp, nil
}
// RequestHandler processes Journald Export insert requests
func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
switch path {
case "/upload":
if r.Header.Get("Content-Type") != "application/vnd.fdo.journal" {
httpserver.Errorf(w, r, "only application/vnd.fdo.journal encoding is supported for Journald")
return true
}
handleJournald(r, w)
return true
default:
return false
}
}
// handleJournald parses Journal binary entries
func handleJournald(r *http.Request, w http.ResponseWriter) {
startTime := time.Now()
requestsJournaldTotal.Inc()
if err := vlstorage.CanWriteData(); err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
reader := r.Body
var err error
wcr := writeconcurrencylimiter.GetReader(reader)
data, err := io.ReadAll(wcr)
if err != nil {
httpserver.Errorf(w, r, "cannot read request body: %s", err)
return
}
writeconcurrencylimiter.PutReader(wcr)
bb := bodyBufferPool.Get()
defer bodyBufferPool.Put(bb)
if r.Header.Get("Content-Encoding") == "zstd" {
bb.B, err = zstd.Decompress(bb.B[:0], data)
if err != nil {
httpserver.Errorf(w, r, "cannot decompress zstd-encoded request with length %d: %s", len(data), err)
return
}
data = bb.B
}
cp, err := getCommonParams(r)
if err != nil {
httpserver.Errorf(w, r, "cannot parse common params from request: %s", err)
return
}
lmp := cp.NewLogMessageProcessor()
n, err := parseJournaldRequest(data, lmp, cp)
lmp.MustClose()
if err != nil {
errorsTotal.Inc()
httpserver.Errorf(w, r, "cannot parse Journald protobuf request: %s", err)
return
}
rowsIngestedJournaldTotal.Add(n)
// update requestJournaldDuration only for successfully parsed requests
// There is no need in updating requestJournaldDuration for request errors,
// since their timings are usually much smaller than the timing for successful request parsing.
requestJournaldDuration.UpdateDuration(startTime)
}
var (
rowsIngestedJournaldTotal = metrics.NewCounter(`vl_rows_ingested_total{type="journald", format="journald"}`)
requestsJournaldTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/journald/upload",format="journald"}`)
errorsTotal = metrics.NewCounter(`vl_http_errors_total{path="/insert/journald/upload",format="journald"}`)
requestJournaldDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/journald/upload",format="journald"}`)
)
// See https://systemd.io/JOURNAL_EXPORT_FORMATS/#journal-export-format
func parseJournaldRequest(data []byte, lmp insertutils.LogMessageProcessor, cp *insertutils.CommonParams) (rowsIngested int, err error) {
var fields []logstorage.Field
var ts int64
var size uint64
var name, value string
var line []byte
currentTimestamp := time.Now().UnixNano()
for len(data) > 0 {
idx := bytes.IndexByte(data, '\n')
switch {
case idx > 0:
// process fields
line = data[:idx]
data = data[idx+1:]
case idx == 0:
// next message or end of file
// double new line is a separator for the next message
if len(fields) > 0 {
if ts == 0 {
ts = currentTimestamp
}
lmp.AddRow(ts, fields)
rowsIngested++
fields = fields[:0]
}
// skip newline separator
data = data[1:]
continue
case idx < 0:
return rowsIngested, fmt.Errorf("missing new line separator, unread data left=%d", len(data))
}
idx = bytes.IndexByte(line, '=')
// could b either e key=value\n pair
// or just key\n
// with binary data at the buffer
if idx > 0 {
name = bytesutil.ToUnsafeString(line[:idx])
value = bytesutil.ToUnsafeString(line[idx+1:])
} else {
name = bytesutil.ToUnsafeString(line)
if len(data) == 0 {
return rowsIngested, fmt.Errorf("unexpected zero data for binary field value of key=%s", name)
}
// size of binary data encoded as le i64 at the begging
idx, err := binary.Decode(data, binary.LittleEndian, &size)
if err != nil {
return rowsIngested, fmt.Errorf("failed to extract binary field %q value size: %w", name, err)
}
// skip binary data sise
data = data[idx:]
if size == 0 {
return rowsIngested, fmt.Errorf("unexpected zero binary data size decoded %d", size)
}
if int(size) > len(data) {
return rowsIngested, fmt.Errorf("binary data size=%d cannot exceed size of the data at buffer=%d", size, len(data))
}
value = bytesutil.ToUnsafeString(data[:size])
data = data[int(size):]
// binary data must has new line separator for the new line or next field
if len(data) == 0 {
return rowsIngested, fmt.Errorf("unexpected empty buffer after binary field=%s read", name)
}
lastB := data[0]
if lastB != '\n' {
return rowsIngested, fmt.Errorf("expected new line separator after binary field=%s, got=%s", name, string(lastB))
}
data = data[1:]
}
// https://github.com/systemd/systemd/blob/main/src/libsystemd/sd-journal/journal-file.c#L1703
if len(name) > journaldEntryMaxNameLen {
return rowsIngested, fmt.Errorf("journald entry name should not exceed %d symbols, got: %q", journaldEntryMaxNameLen, name)
}
if !allowedJournaldEntryNameChars.MatchString(name) {
return rowsIngested, fmt.Errorf("journald entry name should consist of `A-Z0-9_` characters and must start from non-digit symbol")
}
if name == cp.TimeField {
ts, err = strconv.ParseInt(value, 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse Journald timestamp, %w", err)
}
ts *= 1e3
continue
}
if slices.Contains(cp.MsgFields, name) {
name = "_msg"
}
if *journaldIncludeEntryMetadata || !strings.HasPrefix(name, "__") {
fields = append(fields, logstorage.Field{
Name: name,
Value: value,
})
}
}
if len(fields) > 0 {
if ts == 0 {
ts = currentTimestamp
}
lmp.AddRow(ts, fields)
rowsIngested++
}
return rowsIngested, nil
}

View file

@ -0,0 +1,70 @@
package journald
import (
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
)
func TestPushJournaldOk(t *testing.T) {
f := func(src string, timestampsExpected []int64, resultExpected string) {
t.Helper()
tlp := &insertutils.TestLogMessageProcessor{}
cp := &insertutils.CommonParams{
TimeField: "__REALTIME_TIMESTAMP",
MsgFields: []string{"MESSAGE"},
}
n, err := parseJournaldRequest([]byte(src), tlp, cp)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if err := tlp.Verify(n, timestampsExpected, resultExpected); err != nil {
t.Fatal(err)
}
}
// Single event
f("__REALTIME_TIMESTAMP=91723819283\nMESSAGE=Test message\n",
[]int64{91723819283000},
"{\"_msg\":\"Test message\"}",
)
// Multiple events
f("__REALTIME_TIMESTAMP=91723819283\nMESSAGE=Test message\n\n__REALTIME_TIMESTAMP=91723819284\nMESSAGE=Test message2\n",
[]int64{91723819283000, 91723819284000},
"{\"_msg\":\"Test message\"}\n{\"_msg\":\"Test message2\"}",
)
// Parse binary data
f("__CURSOR=s=e0afe8412a6a49d2bfcf66aa7927b588;i=1f06;b=f778b6e2f7584a77b991a2366612a7b5;m=300bdfd420;t=62526e1182354;x=930dc44b370963b7\n__REALTIME_TIMESTAMP=1729698775704404\n__MONOTONIC_TIMESTAMP=206357648416\n__SEQNUM=7942\n__SEQNUM_ID=e0afe8412a6a49d2bfcf66aa7927b588\n_BOOT_ID=f778b6e2f7584a77b991a2366612a7b5\n_UID=0\n_GID=0\n_MACHINE_ID=a4a970370c30a925df02a13c67167847\n_HOSTNAME=ecd5e4555787\n_RUNTIME_SCOPE=system\n_TRANSPORT=journal\n_CAP_EFFECTIVE=1ffffffffff\n_SYSTEMD_CGROUP=/init.scope\n_SYSTEMD_UNIT=init.scope\n_SYSTEMD_SLICE=-.slice\nCODE_FILE=<stdin>\nCODE_LINE=1\nCODE_FUNC=<module>\nSYSLOG_IDENTIFIER=python3\n_COMM=python3\n_EXE=/usr/bin/python3.12\n_CMDLINE=python3\nMESSAGE\n\x13\x00\x00\x00\x00\x00\x00\x00foo\nbar\n\n\nasda\nasda\n_PID=2763\n_SOURCE_REALTIME_TIMESTAMP=1729698775704375\n\n",
[]int64{1729698775704404000},
"{\"_BOOT_ID\":\"f778b6e2f7584a77b991a2366612a7b5\",\"_UID\":\"0\",\"_GID\":\"0\",\"_MACHINE_ID\":\"a4a970370c30a925df02a13c67167847\",\"_HOSTNAME\":\"ecd5e4555787\",\"_RUNTIME_SCOPE\":\"system\",\"_TRANSPORT\":\"journal\",\"_CAP_EFFECTIVE\":\"1ffffffffff\",\"_SYSTEMD_CGROUP\":\"/init.scope\",\"_SYSTEMD_UNIT\":\"init.scope\",\"_SYSTEMD_SLICE\":\"-.slice\",\"CODE_FILE\":\"\\u003cstdin>\",\"CODE_LINE\":\"1\",\"CODE_FUNC\":\"\\u003cmodule>\",\"SYSLOG_IDENTIFIER\":\"python3\",\"_COMM\":\"python3\",\"_EXE\":\"/usr/bin/python3.12\",\"_CMDLINE\":\"python3\",\"_msg\":\"foo\\nbar\\n\\n\\nasda\\nasda\",\"_PID\":\"2763\",\"_SOURCE_REALTIME_TIMESTAMP\":\"1729698775704375\"}",
)
}
func TestPushJournald_Failure(t *testing.T) {
f := func(data string) {
t.Helper()
tlp := &insertutils.TestLogMessageProcessor{}
cp := &insertutils.CommonParams{
TimeField: "__REALTIME_TIMESTAMP",
MsgFields: []string{"MESSAGE"},
}
_, err := parseJournaldRequest([]byte(data), tlp, cp)
if err == nil {
t.Fatalf("expected non nil error")
}
}
// missing new line terminator for binary encoded message
f("__CURSOR=s=e0afe8412a6a49d2bfcf66aa7927b588;i=1f06;b=f778b6e2f7584a77b991a2366612a7b5;m=300bdfd420;t=62526e1182354;x=930dc44b370963b7\n__REALTIME_TIMESTAMP=1729698775704404\nMESSAGE\n\x13\x00\x00\x00\x00\x00\x00\x00foo\nbar\n\n\nasdaasda2")
// missing new line terminator
f("__REALTIME_TIMESTAMP=91723819283\n=Test message")
// empty field name
f("__REALTIME_TIMESTAMP=91723819283\n=Test message\n")
// field name starting with number
f("__REALTIME_TIMESTAMP=91723819283\n1incorrect=Test message\n")
// field name exceeds 64 limit
f("__REALTIME_TIMESTAMP=91723819283\ntoolooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooongcorrecooooooooooooong=Test message\n")
// Only allow A-Z0-9 and '_'
f("__REALTIME_TIMESTAMP=91723819283\nbadC!@$!@$as=Test message\n")
}

View file

@ -53,7 +53,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) {
}
lmp := cp.NewLogMessageProcessor()
err = processStreamInternal(reader, cp.TimeField, cp.MsgField, lmp)
err = processStreamInternal(reader, cp.TimeField, cp.MsgFields, lmp)
lmp.MustClose()
if err != nil {
@ -66,7 +66,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) {
}
}
func processStreamInternal(r io.Reader, timeField, msgField string, lmp insertutils.LogMessageProcessor) error {
func processStreamInternal(r io.Reader, timeField string, msgFields []string, lmp insertutils.LogMessageProcessor) error {
wcr := writeconcurrencylimiter.GetReader(r)
defer writeconcurrencylimiter.PutReader(wcr)
@ -79,7 +79,7 @@ func processStreamInternal(r io.Reader, timeField, msgField string, lmp insertut
n := 0
for {
ok, err := readLine(sc, timeField, msgField, lmp)
ok, err := readLine(sc, timeField, msgFields, lmp)
wcr.DecConcurrency()
if err != nil {
errorsTotal.Inc()
@ -93,7 +93,7 @@ func processStreamInternal(r io.Reader, timeField, msgField string, lmp insertut
}
}
func readLine(sc *bufio.Scanner, timeField, msgField string, lmp insertutils.LogMessageProcessor) (bool, error) {
func readLine(sc *bufio.Scanner, timeField string, msgFields []string, lmp insertutils.LogMessageProcessor) (bool, error) {
var line []byte
for len(line) == 0 {
if !sc.Scan() {
@ -116,7 +116,7 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, lmp insertutils.Log
if err != nil {
return false, fmt.Errorf("cannot get timestamp: %w", err)
}
logstorage.RenameField(p.Fields, msgField, "_msg")
logstorage.RenameField(p.Fields, msgFields, "_msg")
lmp.AddRow(ts, p.Fields)
logstorage.PutJSONParser(p)

View file

@ -11,9 +11,10 @@ func TestProcessStreamInternal_Success(t *testing.T) {
f := func(data, timeField, msgField string, rowsExpected int, timestampsExpected []int64, resultExpected string) {
t.Helper()
msgFields := []string{msgField}
tlp := &insertutils.TestLogMessageProcessor{}
r := bytes.NewBufferString(data)
if err := processStreamInternal(r, timeField, msgField, tlp); err != nil {
if err := processStreamInternal(r, timeField, msgFields, tlp); err != nil {
t.Fatalf("unexpected error: %s", err)
}
@ -34,6 +35,18 @@ func TestProcessStreamInternal_Success(t *testing.T) {
{"_msg":"baz"}
{"_msg":"xyz","x":"y"}`
f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected)
// Non-existing msgField
data = `{"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"}
{"@timestamp":"2023-06-06T04:48:12.735+01:00","message":"baz"}
`
timeField = "@timestamp"
msgField = "foobar"
rowsExpected = 2
timestampsExpected = []int64{1686026891735000000, 1686023292735000000}
resultExpected = `{"log.offset":"71770","log.file.path":"/var/log/auth.log","message":"foobar"}
{"message":"baz"}`
f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected)
}
func TestProcessStreamInternal_Failure(t *testing.T) {
@ -42,7 +55,7 @@ func TestProcessStreamInternal_Failure(t *testing.T) {
tlp := &insertutils.TestLogMessageProcessor{}
r := bytes.NewBufferString(data)
if err := processStreamInternal(r, "time", "", tlp); err == nil {
if err := processStreamInternal(r, "time", nil, tlp); err == nil {
t.Fatalf("expecting non-nil error")
}
}

View file

@ -8,6 +8,9 @@ import (
"strconv"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fastjson"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@ -15,8 +18,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fastjson"
)
var parserPool fastjson.ParserPool
@ -56,7 +57,7 @@ func handleJSON(r *http.Request, w http.ResponseWriter) {
n, err := parseJSONRequest(data, lmp)
lmp.MustClose()
if err != nil {
httpserver.Errorf(w, r, "cannot parse Loki json request: %s", err)
httpserver.Errorf(w, r, "cannot parse Loki json request: %s; data=%s", err, data)
return
}
@ -84,7 +85,7 @@ func parseJSONRequest(data []byte, lmp insertutils.LogMessageProcessor) (int, er
streamsV := v.Get("streams")
if streamsV == nil {
return 0, fmt.Errorf("missing `streams` item in the parsed JSON: %q", v)
return 0, fmt.Errorf("missing `streams` item in the parsed JSON")
}
streams, err := streamsV.Array()
if err != nil {
@ -107,9 +108,6 @@ func parseJSONRequest(data []byte, lmp insertutils.LogMessageProcessor) (int, er
labels = o
}
labels.Visit(func(k []byte, v *fastjson.Value) {
if err != nil {
return
}
vStr, errLocal := v.StringBytes()
if errLocal != nil {
err = fmt.Errorf("unexpected label value type for %q:%q; want string", k, v)
@ -127,7 +125,7 @@ func parseJSONRequest(data []byte, lmp insertutils.LogMessageProcessor) (int, er
// populate messages from `values` array
linesV := stream.Get("values")
if linesV == nil {
return rowsIngested, fmt.Errorf("missing `values` item in the parsed JSON %q", stream)
return rowsIngested, fmt.Errorf("missing `values` item in the parsed `stream` object %q", stream)
}
lines, err := linesV.Array()
if err != nil {
@ -140,8 +138,8 @@ func parseJSONRequest(data []byte, lmp insertutils.LogMessageProcessor) (int, er
if err != nil {
return rowsIngested, fmt.Errorf("unexpected contents of `values` item; want array; got %q", line)
}
if len(lineA) != 2 {
return rowsIngested, fmt.Errorf("unexpected number of values in `values` item array %q; got %d want 2", line, len(lineA))
if len(lineA) < 2 || len(lineA) > 3 {
return rowsIngested, fmt.Errorf("unexpected number of values in `values` item array %q; got %d want 2 or 3", line, len(lineA))
}
// parse timestamp
@ -167,6 +165,30 @@ func parseJSONRequest(data []byte, lmp insertutils.LogMessageProcessor) (int, er
Name: "_msg",
Value: bytesutil.ToUnsafeString(msg),
})
// parse structured metadata - see https://grafana.com/docs/loki/latest/reference/loki-http-api/#ingest-logs
if len(lineA) > 2 {
structuredMetadata, err := lineA[2].Object()
if err != nil {
return rowsIngested, fmt.Errorf("unexpected structured metadata type for %q; want JSON object", lineA[2])
}
structuredMetadata.Visit(func(k []byte, v *fastjson.Value) {
vStr, errLocal := v.StringBytes()
if errLocal != nil {
err = fmt.Errorf("unexpected label value type for %q:%q; want string", k, v)
return
}
fields = append(fields, logstorage.Field{
Name: bytesutil.ToUnsafeString(k),
Value: bytesutil.ToUnsafeString(vStr),
})
})
if err != nil {
return rowsIngested, fmt.Errorf("error when parsing `structuredMetadata` object: %w", err)
}
}
lmp.AddRow(ts, fields)
}
rowsIngested += len(lines)

View file

@ -45,13 +45,19 @@ func TestParseJSONRequest_Failure(t *testing.T) {
// Invalid length of `values` individual item
f(`{"streams":[{"values":[[]]}]}`)
f(`{"streams":[{"values":[["123"]]}]}`)
f(`{"streams":[{"values":[["123","456","789"]]}]}`)
f(`{"streams":[{"values":[["123","456","789","8123"]]}]}`)
// Invalid type for timestamp inside `values` individual item
f(`{"streams":[{"values":[[123,"456"]}]}`)
// Invalid type for log message
f(`{"streams":[{"values":[["123",1234]]}]}`)
// invalid structured metadata type
f(`{"streams":[{"values":[["1577836800000000001", "foo bar", ["metadata_1", "md_value"]]]}]}`)
// structured metadata with unexpected value type
f(`{"streams":[{"values":[["1577836800000000001", "foo bar", {"metadata_1": 1}]] }]}`)
}
func TestParseJSONRequest_Success(t *testing.T) {
@ -116,4 +122,8 @@ func TestParseJSONRequest_Success(t *testing.T) {
}`, []int64{1577836800000000001, 1577836900005000002, 1877836900005000002}, `{"foo":"bar","a":"b","_msg":"foo bar"}
{"foo":"bar","a":"b","_msg":"abc"}
{"x":"y","_msg":"yx"}`)
// values with metadata
f(`{"streams":[{"values":[["1577836800000000001", "foo bar", {"metadata_1": "md_value"}]]}]}`, []int64{1577836800000000001}, `{"_msg":"foo bar","metadata_1":"md_value"}`)
f(`{"streams":[{"values":[["1577836800000000001", "foo bar", {}]]}]}`, []int64{1577836800000000001}, `{"_msg":"foo bar"}`)
}

View file

@ -17,7 +17,7 @@ var mp easyproto.MarshalerPool
// PushRequest represents Loki PushRequest
//
// See https://github.com/grafana/loki/blob/4220737a52da7ab6c9346b12d5a5d7bedbcd641d/pkg/push/push.proto#L14C1-L14C20
// See https://github.com/grafana/loki/blob/ada4b7b8713385fbe9f5984a5a0aaaddf1a7b851/pkg/push/push.proto#L14
type PushRequest struct {
Streams []Stream
@ -87,7 +87,7 @@ func (pr *PushRequest) unmarshalProtobuf(entriesBuf []Entry, labelPairBuf []Labe
// Stream represents Loki stream.
//
// See https://github.com/grafana/loki/blob/4220737a52da7ab6c9346b12d5a5d7bedbcd641d/pkg/push/push.proto#L23
// See https://github.com/grafana/loki/blob/ada4b7b8713385fbe9f5984a5a0aaaddf1a7b851/pkg/push/push.proto#L23
type Stream struct {
Labels string
Entries []Entry
@ -139,7 +139,7 @@ func (s *Stream) unmarshalProtobuf(entriesBuf []Entry, labelPairBuf []LabelPair,
// Entry represents Loki entry.
//
// See https://github.com/grafana/loki/blob/4220737a52da7ab6c9346b12d5a5d7bedbcd641d/pkg/push/push.proto#L38
// See https://github.com/grafana/loki/blob/ada4b7b8713385fbe9f5984a5a0aaaddf1a7b851/pkg/push/push.proto#L38
type Entry struct {
Timestamp time.Time
Line string
@ -203,7 +203,7 @@ func (e *Entry) unmarshalProtobuf(labelPairBuf []LabelPair, src []byte) ([]Label
// LabelPair represents Loki label pair.
//
// See https://github.com/grafana/loki/blob/4220737a52da7ab6c9346b12d5a5d7bedbcd641d/pkg/push/push.proto#L33
// See https://github.com/grafana/loki/blob/ada4b7b8713385fbe9f5984a5a0aaaddf1a7b851/pkg/push/push.proto#L33
type LabelPair struct {
Name string
Value string

View file

@ -4,7 +4,9 @@ import (
"net/http"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/datadog"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/elasticsearch"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/journald"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/jsonline"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/loki"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/opentelemetry"
@ -24,6 +26,7 @@ func Stop() {
// RequestHandler handles insert requests for VictoriaLogs
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
path := r.URL.Path
if !strings.HasPrefix(path, "/insert/") {
// Skip requests, which do not start with /insert/, since these aren't our requests.
return false
@ -45,6 +48,12 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
case strings.HasPrefix(path, "/opentelemetry/"):
path = strings.TrimPrefix(path, "/opentelemetry")
return opentelemetry.RequestHandler(path, w, r)
case strings.HasPrefix(path, "/journald/"):
path = strings.TrimPrefix(path, "/journald")
return journald.RequestHandler(path, w, r)
case strings.HasPrefix(path, "/datadog/"):
path = strings.TrimPrefix(path, "/datadog")
return datadog.RequestHandler(path, w, r)
default:
return false
}

View file

@ -3,11 +3,13 @@ package syslog
import (
"bufio"
"crypto/tls"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"net"
"sort"
"strconv"
"strings"
"sync"
@ -35,10 +37,25 @@ var (
syslogTimezone = flag.String("syslog.timezone", "Local", "Timezone to use when parsing timestamps in RFC3164 syslog messages. Timezone must be a valid IANA Time Zone. "+
"For example: America/New_York, Europe/Berlin, Etc/GMT+3 . See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/")
syslogTenantIDTCP = flagutil.NewArrayString("syslog.tenantID.tcp", "TenantID for logs ingested via the corresponding -syslog.listenAddr.tcp. "+
"See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/")
syslogTenantIDUDP = flagutil.NewArrayString("syslog.tenantID.udp", "TenantID for logs ingested via the corresponding -syslog.listenAddr.udp. "+
"See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/")
streamFieldsTCP = flagutil.NewArrayString("syslog.streamFields.tcp", "Fields to use as log stream labels for logs ingested via the corresponding -syslog.listenAddr.tcp. "+
`See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields`)
streamFieldsUDP = flagutil.NewArrayString("syslog.streamFields.udp", "Fields to use as log stream labels for logs ingested via the corresponding -syslog.listenAddr.udp. "+
`See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields`)
ignoreFieldsTCP = flagutil.NewArrayString("syslog.ignoreFields.tcp", "Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.tcp. "+
`See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields`)
ignoreFieldsUDP = flagutil.NewArrayString("syslog.ignoreFields.udp", "Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.udp. "+
`See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields`)
extraFieldsTCP = flagutil.NewArrayString("syslog.extraFields.tcp", "Fields to add to logs ingested via the corresponding -syslog.listenAddr.tcp. "+
`See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields`)
extraFieldsUDP = flagutil.NewArrayString("syslog.extraFields.udp", "Fields to add to logs ingested via the corresponding -syslog.listenAddr.udp. "+
`See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields`)
tenantIDTCP = flagutil.NewArrayString("syslog.tenantID.tcp", "TenantID for logs ingested via the corresponding -syslog.listenAddr.tcp. "+
"See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy")
tenantIDUDP = flagutil.NewArrayString("syslog.tenantID.udp", "TenantID for logs ingested via the corresponding -syslog.listenAddr.udp. "+
"See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy")
listenAddrTCP = flagutil.NewArrayString("syslog.listenAddr.tcp", "Comma-separated list of TCP addresses to listen to for Syslog messages. "+
"See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/")
@ -150,7 +167,7 @@ func runUDPListener(addr string, argIdx int) {
logger.Fatalf("cannot start UDP syslog server at %q: %s", addr, err)
}
tenantIDStr := syslogTenantIDUDP.GetOptionalArg(argIdx)
tenantIDStr := tenantIDUDP.GetOptionalArg(argIdx)
tenantID, err := logstorage.ParseTenantID(tenantIDStr)
if err != nil {
logger.Fatalf("cannot parse -syslog.tenantID.udp=%q for -syslog.listenAddr.udp=%q: %s", tenantIDStr, addr, err)
@ -161,9 +178,27 @@ func runUDPListener(addr string, argIdx int) {
useLocalTimestamp := useLocalTimestampUDP.GetOptionalArg(argIdx)
streamFieldsStr := streamFieldsUDP.GetOptionalArg(argIdx)
streamFields, err := parseFieldsList(streamFieldsStr)
if err != nil {
logger.Fatalf("cannot parse -syslog.streamFields.udp=%q for -syslog.listenAddr.udp=%q: %s", streamFieldsStr, addr, err)
}
ignoreFieldsStr := ignoreFieldsUDP.GetOptionalArg(argIdx)
ignoreFields, err := parseFieldsList(ignoreFieldsStr)
if err != nil {
logger.Fatalf("cannot parse -syslog.ignoreFields.udp=%q for -syslog.listenAddr.udp=%q: %s", ignoreFieldsStr, addr, err)
}
extraFieldsStr := extraFieldsUDP.GetOptionalArg(argIdx)
extraFields, err := parseExtraFields(extraFieldsStr)
if err != nil {
logger.Fatalf("cannot parse -syslog.extraFields.udp=%q for -syslog.listenAddr.udp=%q: %s", extraFieldsStr, addr, err)
}
doneCh := make(chan struct{})
go func() {
serveUDP(ln, tenantID, compressMethod, useLocalTimestamp)
serveUDP(ln, tenantID, compressMethod, useLocalTimestamp, streamFields, ignoreFields, extraFields)
close(doneCh)
}()
@ -193,7 +228,7 @@ func runTCPListener(addr string, argIdx int) {
logger.Fatalf("syslog: cannot start TCP listener at %s: %s", addr, err)
}
tenantIDStr := syslogTenantIDTCP.GetOptionalArg(argIdx)
tenantIDStr := tenantIDTCP.GetOptionalArg(argIdx)
tenantID, err := logstorage.ParseTenantID(tenantIDStr)
if err != nil {
logger.Fatalf("cannot parse -syslog.tenantID.tcp=%q for -syslog.listenAddr.tcp=%q: %s", tenantIDStr, addr, err)
@ -204,9 +239,27 @@ func runTCPListener(addr string, argIdx int) {
useLocalTimestamp := useLocalTimestampTCP.GetOptionalArg(argIdx)
streamFieldsStr := streamFieldsTCP.GetOptionalArg(argIdx)
streamFields, err := parseFieldsList(streamFieldsStr)
if err != nil {
logger.Fatalf("cannot parse -syslog.streamFields.tcp=%q for -syslog.listenAddr.tcp=%q: %s", streamFieldsStr, addr, err)
}
ignoreFieldsStr := ignoreFieldsTCP.GetOptionalArg(argIdx)
ignoreFields, err := parseFieldsList(ignoreFieldsStr)
if err != nil {
logger.Fatalf("cannot parse -syslog.ignoreFields.tcp=%q for -syslog.listenAddr.tcp=%q: %s", ignoreFieldsStr, addr, err)
}
extraFieldsStr := extraFieldsTCP.GetOptionalArg(argIdx)
extraFields, err := parseExtraFields(extraFieldsStr)
if err != nil {
logger.Fatalf("cannot parse -syslog.extraFields.tcp=%q for -syslog.listenAddr.tcp=%q: %s", extraFieldsStr, addr, err)
}
doneCh := make(chan struct{})
go func() {
serveTCP(ln, tenantID, compressMethod, useLocalTimestamp)
serveTCP(ln, tenantID, compressMethod, useLocalTimestamp, streamFields, ignoreFields, extraFields)
close(doneCh)
}()
@ -228,7 +281,7 @@ func checkCompressMethod(compressMethod, addr, protocol string) {
}
}
func serveUDP(ln net.PacketConn, tenantID logstorage.TenantID, compressMethod string, useLocalTimestamp bool) {
func serveUDP(ln net.PacketConn, tenantID logstorage.TenantID, compressMethod string, useLocalTimestamp bool, streamFields, ignoreFields []string, extraFields []logstorage.Field) {
gomaxprocs := cgroup.AvailableCPUs()
var wg sync.WaitGroup
localAddr := ln.LocalAddr()
@ -236,7 +289,7 @@ func serveUDP(ln net.PacketConn, tenantID logstorage.TenantID, compressMethod st
wg.Add(1)
go func() {
defer wg.Done()
cp := insertutils.GetCommonParamsForSyslog(tenantID)
cp := insertutils.GetCommonParamsForSyslog(tenantID, streamFields, ignoreFields, extraFields)
var bb bytesutil.ByteBuffer
bb.B = bytesutil.ResizeNoCopyNoOverallocate(bb.B, 64*1024)
for {
@ -270,7 +323,7 @@ func serveUDP(ln net.PacketConn, tenantID logstorage.TenantID, compressMethod st
wg.Wait()
}
func serveTCP(ln net.Listener, tenantID logstorage.TenantID, compressMethod string, useLocalTimestamp bool) {
func serveTCP(ln net.Listener, tenantID logstorage.TenantID, compressMethod string, useLocalTimestamp bool, streamFields, ignoreFields []string, extraFields []logstorage.Field) {
var cm ingestserver.ConnsMap
cm.Init("syslog")
@ -300,7 +353,7 @@ func serveTCP(ln net.Listener, tenantID logstorage.TenantID, compressMethod stri
wg.Add(1)
go func() {
cp := insertutils.GetCommonParamsForSyslog(tenantID)
cp := insertutils.GetCommonParamsForSyslog(tenantID, streamFields, ignoreFields, extraFields)
if err := processStream(c, compressMethod, useLocalTimestamp, cp); err != nil {
logger.Errorf("syslog: cannot process TCP data at %q: %s", addr, err)
}
@ -514,13 +567,15 @@ func processLine(line []byte, currentYear int, timezone *time.Location, useLocal
}
ts = nsecs
}
logstorage.RenameField(p.Fields, "message", "_msg")
logstorage.RenameField(p.Fields, msgFields, "_msg")
lmp.AddRow(ts, p.Fields)
logstorage.PutSyslogParser(p)
return nil
}
var msgFields = []string{"message"}
var (
rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="syslog"}`)
@ -529,3 +584,35 @@ var (
udpRequestsTotal = metrics.NewCounter(`vl_udp_reqests_total{type="syslog"}`)
udpErrorsTotal = metrics.NewCounter(`vl_udp_errors_total{type="syslog"}`)
)
func parseFieldsList(s string) ([]string, error) {
if s == "" {
return nil, nil
}
var a []string
err := json.Unmarshal([]byte(s), &a)
return a, err
}
func parseExtraFields(s string) ([]logstorage.Field, error) {
if s == "" {
return nil, nil
}
var m map[string]string
if err := json.Unmarshal([]byte(s), &m); err != nil {
return nil, err
}
fields := make([]logstorage.Field, 0, len(m))
for k, v := range m {
fields = append(fields, logstorage.Field{
Name: k,
Value: v,
})
}
sort.Slice(fields, func(i, j int) bool {
return fields[i].Name < fields[j].Name
})
return fields, nil
}

View file

@ -17,7 +17,7 @@ func isTerminal() bool {
return isatty.IsTerminal(os.Stdout.Fd()) && isatty.IsTerminal(os.Stderr.Fd())
}
func readWithLess(r io.Reader) error {
func readWithLess(r io.Reader, wrapLongLines bool) error {
if !isTerminal() {
// Just write everything to stdout if no terminal is available.
_, err := io.Copy(os.Stdout, r)
@ -48,7 +48,11 @@ func readWithLess(r io.Reader) error {
if err != nil {
return fmt.Errorf("cannot find 'less' command: %w", err)
}
p, err := os.StartProcess(path, []string{"less", "-F", "-X"}, &os.ProcAttr{
opts := []string{"less", "-F", "-X"}
if !wrapLongLines {
opts = append(opts, "-S")
}
p, err := os.StartProcess(path, opts, &os.ProcAttr{
Env: append(os.Environ(), "LESSCHARSET=utf-8"),
Files: []*os.File{pr, os.Stdout, os.Stderr},
})

View file

@ -69,8 +69,8 @@ func main() {
fatalf("cannot initialize readline: %s", err)
}
fmt.Fprintf(rl, "sending queries to %s\n", *datasourceURL)
fmt.Fprintf(rl, "sending queries to -datasource.url=%s\n", *datasourceURL)
fmt.Fprintf(rl, `type ? and press enter to see available commands`+"\n")
runReadlineLoop(rl, &incompleteLine)
if err := rl.Close(); err != nil {
@ -91,6 +91,7 @@ func runReadlineLoop(rl *readline.Instance, incompleteLine *string) {
}
outputMode := outputModeJSONMultiline
wrapLongLines := false
s := ""
for {
line, err := rl.ReadLine()
@ -99,7 +100,7 @@ func runReadlineLoop(rl *readline.Instance, incompleteLine *string) {
case io.EOF:
if s != "" {
// This is non-interactive query execution.
executeQuery(context.Background(), rl, s, outputMode)
executeQuery(context.Background(), rl, s, outputMode, wrapLongLines)
}
return
case readline.ErrInterrupt:
@ -163,6 +164,18 @@ func runReadlineLoop(rl *readline.Instance, incompleteLine *string) {
s = ""
continue
}
if s == `\wrap_long_lines` {
if wrapLongLines {
wrapLongLines = false
fmt.Fprintf(rl, "wrapping of long lines is disabled\n")
} else {
wrapLongLines = true
fmt.Fprintf(rl, "wrapping of long lines is enabled\n")
}
historyLines = pushToHistory(rl, historyLines, s)
s = ""
continue
}
if line != "" && !strings.HasSuffix(line, ";") {
// Assume the query is incomplete and allow the user finishing the query on the next line
s += "\n"
@ -172,7 +185,7 @@ func runReadlineLoop(rl *readline.Instance, incompleteLine *string) {
// Execute the query
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
executeQuery(ctx, rl, s, outputMode)
executeQuery(ctx, rl, s, outputMode, wrapLongLines)
cancel()
historyLines = pushToHistory(rl, historyLines, s)
@ -252,18 +265,21 @@ func isHelpCommand(s string) bool {
}
func printCommandsHelp(w io.Writer) {
fmt.Fprintf(w, "%s", `List of available commands:
fmt.Fprintf(w, "%s", `Available commands:
\q - quit
\h - show this help
\s - singleline json output mode
\m - multiline json output mode
\c - compact output
\logfmt - logfmt output mode
\wrap_long_lines - toggles wrapping long lines
\tail <query> - live tail <query> results
See https://docs.victoriametrics.com/victorialogs/querying/vlogscli/ for more details
`)
}
func executeQuery(ctx context.Context, output io.Writer, qStr string, outputMode outputMode) {
func executeQuery(ctx context.Context, output io.Writer, qStr string, outputMode outputMode, wrapLongLines bool) {
if strings.HasPrefix(qStr, `\tail `) {
tailQuery(ctx, output, qStr, outputMode)
return
@ -277,7 +293,7 @@ func executeQuery(ctx context.Context, output io.Writer, qStr string, outputMode
_ = respBody.Close()
}()
if err := readWithLess(respBody); err != nil {
if err := readWithLess(respBody, wrapLongLines); err != nil {
fmt.Fprintf(output, "error when reading query response: %s\n", err)
return
}

View file

@ -73,7 +73,6 @@ func ProcessHitsRequest(ctx context.Context, w http.ResponseWriter, r *http.Requ
}
// Prepare the query for hits count.
q.Optimize()
q.DropAllPipes()
q.AddCountByTimePipe(int64(step), int64(offset), fields)
@ -204,7 +203,6 @@ func ProcessFieldNamesRequest(ctx context.Context, w http.ResponseWriter, r *htt
}
// Obtain field names for the given query
q.Optimize()
fieldNames, err := vlstorage.GetFieldNames(ctx, tenantIDs, q)
if err != nil {
httpserver.Errorf(w, r, "cannot obtain field names: %s", err)
@ -244,7 +242,6 @@ func ProcessFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *ht
}
// Obtain unique values for the given field
q.Optimize()
values, err := vlstorage.GetFieldValues(ctx, tenantIDs, q, fieldName, uint64(limit))
if err != nil {
httpserver.Errorf(w, r, "cannot obtain values for field %q: %s", fieldName, err)
@ -267,7 +264,6 @@ func ProcessStreamFieldNamesRequest(ctx context.Context, w http.ResponseWriter,
}
// Obtain stream field names for the given query
q.Optimize()
names, err := vlstorage.GetStreamFieldNames(ctx, tenantIDs, q)
if err != nil {
httpserver.Errorf(w, r, "cannot obtain stream field names: %s", err)
@ -306,7 +302,6 @@ func ProcessStreamFieldValuesRequest(ctx context.Context, w http.ResponseWriter,
}
// Obtain stream field values for the given query and the given fieldName
q.Optimize()
values, err := vlstorage.GetStreamFieldValues(ctx, tenantIDs, q, fieldName, uint64(limit))
if err != nil {
httpserver.Errorf(w, r, "cannot obtain stream field values: %s", err)
@ -338,7 +333,6 @@ func ProcessStreamIDsRequest(ctx context.Context, w http.ResponseWriter, r *http
}
// Obtain streamIDs for the given query
q.Optimize()
streamIDs, err := vlstorage.GetStreamIDs(ctx, tenantIDs, q, uint64(limit))
if err != nil {
httpserver.Errorf(w, r, "cannot obtain stream_ids: %s", err)
@ -370,7 +364,6 @@ func ProcessStreamsRequest(ctx context.Context, w http.ResponseWriter, r *http.R
}
// Obtain streams for the given query
q.Optimize()
streams, err := vlstorage.GetStreams(ctx, tenantIDs, q, uint64(limit))
if err != nil {
httpserver.Errorf(w, r, "cannot obtain streams: %s", err)
@ -398,7 +391,6 @@ func ProcessLiveTailRequest(ctx context.Context, w http.ResponseWriter, r *http.
"see https://docs.victoriametrics.com/victorialogs/querying/#live-tailing for details", q)
return
}
q.Optimize()
refreshIntervalMsecs, err := httputils.GetDuration(r, "refresh_interval", 1000)
if err != nil {
@ -407,13 +399,28 @@ func ProcessLiveTailRequest(ctx context.Context, w http.ResponseWriter, r *http.
}
refreshInterval := time.Millisecond * time.Duration(refreshIntervalMsecs)
startOffsetMsecs, err := httputils.GetDuration(r, "start_offset", 5*1000)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
startOffset := startOffsetMsecs * 1e6
offsetMsecs, err := httputils.GetDuration(r, "offset", 1000)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
offset := offsetMsecs * 1e6
ctxWithCancel, cancel := context.WithCancel(ctx)
tp := newTailProcessor(cancel)
ticker := time.NewTicker(refreshInterval)
defer ticker.Stop()
end := time.Now().UnixNano()
end := time.Now().UnixNano() - offset
start := end - startOffset
doneCh := ctxWithCancel.Done()
flusher, ok := w.(http.Flusher)
if !ok {
@ -421,14 +428,7 @@ func ProcessLiveTailRequest(ctx context.Context, w http.ResponseWriter, r *http.
}
qOrig := q
for {
start := end - tailOffsetNsecs
end = time.Now().UnixNano()
q = qOrig.Clone(end)
q.AddTimeFilter(start, end)
// q.Optimize() call is needed for converting '*' into filterNoop.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6785#issuecomment-2358547733
q.Optimize()
q = qOrig.CloneWithTimeFilter(end, start, end)
if err := vlstorage.RunQuery(ctxWithCancel, tenantIDs, q, tp.writeBlock); err != nil {
httpserver.Errorf(w, r, "cannot execute tail query [%s]: %s", q, err)
return
@ -447,6 +447,8 @@ func ProcessLiveTailRequest(ctx context.Context, w http.ResponseWriter, r *http.
case <-doneCh:
return
case <-ticker.C:
start = end - tailOffsetNsecs
end = time.Now().UnixNano() - offset
}
}
}
@ -605,8 +607,6 @@ func ProcessStatsQueryRangeRequest(ctx context.Context, w http.ResponseWriter, r
return
}
q.Optimize()
m := make(map[string]*statsSeries)
var mLock sync.Mutex
@ -717,8 +717,6 @@ func ProcessStatsQueryRequest(ctx context.Context, w http.ResponseWriter, r *htt
return
}
q.Optimize()
var rows []statsRow
var rowsLock sync.Mutex
@ -818,7 +816,6 @@ func ProcessQueryRequest(ctx context.Context, w http.ResponseWriter, r *http.Req
q.AddPipeLimit(uint64(limit))
}
q.Optimize()
writeBlock := func(_ uint, timestamps []int64, columns []logstorage.BlockColumn) {
if len(columns) == 0 || len(columns[0].Values) == 0 {
@ -849,7 +846,6 @@ type row struct {
func getLastNQueryResults(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, limit int) ([]row, error) {
limitUpper := 2 * limit
q.AddPipeLimit(uint64(limitUpper))
q.Optimize()
rows, err := getQueryResultsWithLimit(ctx, tenantIDs, q, limitUpper)
if err != nil {
@ -869,11 +865,7 @@ func getLastNQueryResults(ctx context.Context, tenantIDs []logstorage.TenantID,
qOrig := q
for {
timestamp := qOrig.GetTimestamp()
q = qOrig.Clone(timestamp)
q.AddTimeFilter(start, end)
// q.Optimize() call is needed for converting '*' into filterNoop.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6785#issuecomment-2358547733
q.Optimize()
q = qOrig.CloneWithTimeFilter(timestamp, start, end)
rows, err := getQueryResultsWithLimit(ctx, tenantIDs, q, limitUpper)
if err != nil {
return nil, err
@ -977,14 +969,29 @@ func parseCommonArgs(r *http.Request) (*logstorage.Query, []logstorage.TenantID,
}
tenantIDs := []logstorage.TenantID{tenantID}
// Parse optional start and end args
start, okStart, err := getTimeNsec(r, "start")
if err != nil {
return nil, nil, err
}
end, okEnd, err := getTimeNsec(r, "end")
if err != nil {
return nil, nil, err
}
// Parse optional time arg
timestamp, okTime, err := getTimeNsec(r, "time")
if err != nil {
return nil, nil, err
}
if !okTime {
// If time arg is missing, then evaluate query at the current timestamp
timestamp = time.Now().UnixNano()
// If time arg is missing, then evaluate query either at the end timestamp (if it is set)
// or at the current timestamp (if end query arg isn't set)
if okEnd {
timestamp = end
} else {
timestamp = time.Now().UnixNano()
}
}
// decrease timestamp by one nanosecond in order to avoid capturing logs belonging
@ -998,16 +1005,8 @@ func parseCommonArgs(r *http.Request) (*logstorage.Query, []logstorage.TenantID,
return nil, nil, fmt.Errorf("cannot parse query [%s]: %s", qStr, err)
}
// Parse optional start and end args
start, okStart, err := getTimeNsec(r, "start")
if err != nil {
return nil, nil, err
}
end, okEnd, err := getTimeNsec(r, "end")
if err != nil {
return nil, nil, err
}
if okStart || okEnd {
// Add _time:[start, end] filter if start or end args were set.
if !okStart {
start = math.MinInt64
}
@ -1017,6 +1016,20 @@ func parseCommonArgs(r *http.Request) (*logstorage.Query, []logstorage.TenantID,
q.AddTimeFilter(start, end)
}
// Parse optional extra_filters
extraFilters, err := getExtraFilters(r, "extra_filters")
if err != nil {
return nil, nil, err
}
q.AddExtraFilters(extraFilters)
// Parse optional extra_stream_filters
extraStreamFilters, err := getExtraFilters(r, "extra_stream_filters")
if err != nil {
return nil, nil, err
}
q.AddExtraStreamFilters(extraStreamFilters)
return q, tenantIDs, nil
}
@ -1032,3 +1045,16 @@ func getTimeNsec(r *http.Request, argName string) (int64, bool, error) {
}
return nsecs, true, nil
}
func getExtraFilters(r *http.Request, argName string) ([]logstorage.Field, error) {
s := r.FormValue(argName)
if s == "" {
return nil, nil
}
var p logstorage.JSONParser
if err := p.ParseLogMessage([]byte(s)); err != nil {
return nil, fmt.Errorf("cannot parse %s: %w", argName, err)
}
return p.Fields, nil
}

View file

@ -23,7 +23,7 @@ var (
"See also -search.maxQueueDuration")
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the search request waits for execution when -search.maxConcurrentRequests "+
"limit is reached; see also -search.maxQueryDuration")
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution. It can be overridden on a per-query basis via 'timeout' query arg")
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution. It can be overridden to a smaller value on a per-query basis via 'timeout' query arg")
)
func getDefaultMaxConcurrentRequests() int {

View file

@ -1,13 +1,13 @@
{
"files": {
"main.css": "./static/css/main.faf86aa5.css",
"main.js": "./static/js/main.2810cc52.js",
"main.js": "./static/js/main.b204330a.js",
"static/js/685.f772060c.chunk.js": "./static/js/685.f772060c.chunk.js",
"static/media/MetricsQL.md": "./static/media/MetricsQL.a00044c91d9781cf8557.md",
"index.html": "./index.html"
},
"entrypoints": [
"static/css/main.faf86aa5.css",
"static/js/main.2810cc52.js"
"static/js/main.b204330a.js"
]
}

View file

@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><link rel="apple-touch-icon" href="./favicon.svg"/><link rel="mask-icon" href="./favicon.svg" color="#000000"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="Explore your log data with VictoriaLogs UI"/><link rel="manifest" href="./manifest.json"/><title>UI for VictoriaLogs</title><meta name="twitter:card" content="summary"><meta name="twitter:title" content="UI for VictoriaLogs"><meta name="twitter:site" content="@https://victoriametrics.com/products/victorialogs/"><meta name="twitter:description" content="Explore your log data with VictoriaLogs UI"><meta name="twitter:image" content="./preview.jpg"><meta property="og:type" content="website"><meta property="og:title" content="UI for VictoriaLogs"><meta property="og:url" content="https://victoriametrics.com/products/victorialogs/"><meta property="og:description" content="Explore your log data with VictoriaLogs UI"><script defer="defer" src="./static/js/main.2810cc52.js"></script><link href="./static/css/main.faf86aa5.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><link rel="apple-touch-icon" href="./favicon.svg"/><link rel="mask-icon" href="./favicon.svg" color="#000000"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="Explore your log data with VictoriaLogs UI"/><link rel="manifest" href="./manifest.json"/><title>UI for VictoriaLogs</title><meta name="twitter:card" content="summary"><meta name="twitter:title" content="UI for VictoriaLogs"><meta name="twitter:site" content="@https://victoriametrics.com/products/victorialogs/"><meta name="twitter:description" content="Explore your log data with VictoriaLogs UI"><meta name="twitter:image" content="./preview.jpg"><meta property="og:type" content="website"><meta property="og:title" content="UI for VictoriaLogs"><meta property="og:url" content="https://victoriametrics.com/products/victorialogs/"><meta property="og:description" content="Explore your log data with VictoriaLogs UI"><script defer="defer" src="./static/js/main.b204330a.js"></script><link href="./static/css/main.faf86aa5.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -510,7 +510,13 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
httpserver.Errorf(w, r, "%s", err)
return true
}
w.WriteHeader(http.StatusNoContent)
statusCode := http.StatusNoContent
if strings.HasPrefix(p.Suffix, "prometheus/api/v1/import/prometheus/metrics/job/") {
// Return 200 status code for pushgateway requests.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636
statusCode = http.StatusOK
}
w.WriteHeader(statusCode)
return true
}
if strings.HasPrefix(p.Suffix, "datadog/") {

View file

@ -9,6 +9,7 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/metricsql"
)
@ -48,7 +49,7 @@ Outer:
}
var expSamples []parsedSample
for _, s := range mt.ExpSamples {
expLb := datasource.Labels{}
expLb := []prompbmarshal.Label{}
if s.Labels != "" {
metricsqlExpr, err := metricsql.Parse(s.Labels)
if err != nil {
@ -64,7 +65,7 @@ Outer:
}
if len(metricsqlMetricExpr.LabelFilterss) > 0 {
for _, l := range metricsqlMetricExpr.LabelFilterss[0] {
expLb = append(expLb, datasource.Label{
expLb = append(expLb, prompbmarshal.Label{
Name: l.Label,
Value: l.Value,
})

View file

@ -270,6 +270,9 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
// tear down vmstorage and clean the data dir
defer tearDown()
if tg.Interval == nil {
tg.Interval = promutils.NewDuration(evalInterval)
}
err := writeInputSeries(tg.InputSeries, tg.Interval, testStartTime, testPromWriteHTTPPath)
if err != nil {
return []error{err}

View file

@ -3,6 +3,7 @@ package config
import (
"bytes"
"crypto/md5"
"flag"
"fmt"
"hash/fnv"
"io"
@ -17,6 +18,10 @@ import (
"gopkg.in/yaml.v2"
)
var (
defaultRuleType = flag.String("rule.defaultRuleType", "prometheus", `Default type for rule expressions, can be overridden via "type" parameter on the group level, see https://docs.victoriametrics.com/vmalert/#groups. Supported values: "graphite", "prometheus" and "vlogs".`)
)
// Group contains list of Rules grouped into
// entity with one name and evaluation interval
type Group struct {
@ -59,11 +64,9 @@ func (g *Group) UnmarshalYAML(unmarshal func(any) error) error {
if err != nil {
return fmt.Errorf("failed to marshal group configuration for checksum: %w", err)
}
// change default value to prometheus datasource.
if g.Type.Get() == "" {
g.Type.Set(NewPrometheusType())
g.Type = NewRawType(*defaultRuleType)
}
h := md5.New()
h.Write(b)
g.Checksum = fmt.Sprintf("%x", h.Sum(nil))

View file

@ -122,6 +122,7 @@ func TestParse_Failure(t *testing.T) {
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
f([]string{"testdata/dir/rules4-bad.rules"}, "either `record` or `alert` must be set")
f([]string{"testdata/rules/rules1-bad.rules"}, "bad graphite expr")
f([]string{"testdata/rules/vlog-rules0-bad.rules"}, "bad LogsQL expr")
f([]string{"testdata/dir/rules6-bad.rules"}, "missing ':' in header")
f([]string{"testdata/rules/rules-multi-doc-bad.rules"}, "unknown fields")
f([]string{"testdata/rules/rules-multi-doc-duplicates-bad.rules"}, "duplicate")
@ -240,7 +241,7 @@ func TestGroupValidate_Failure(t *testing.T) {
}, false, "duplicate")
f(&Group{
Name: "test graphite prometheus bad expr",
Name: "test graphite with prometheus expr",
Type: NewGraphiteType(),
Rules: []Rule{
{
@ -267,6 +268,20 @@ func TestGroupValidate_Failure(t *testing.T) {
},
}, false, "either `record` or `alert` must be set")
f(&Group{
Name: "test vlogs with prometheus expr",
Type: NewVLogsType(),
Rules: []Rule{
{
Expr: "sum(up == 0 ) by (host)",
For: promutils.NewDuration(10 * time.Millisecond),
},
{
Expr: "sumSeries(time('foo.bar',10))",
},
},
}, false, "invalid rule")
// validate expressions
f(&Group{
Name: "test",
@ -297,6 +312,16 @@ func TestGroupValidate_Failure(t *testing.T) {
}},
},
}, true, "bad graphite expr")
f(&Group{
Name: "test vlogs",
Type: NewVLogsType(),
Rules: []Rule{
{Alert: "alert", Expr: "stats count(*) as requests", Labels: map[string]string{
"description": "some-description",
}},
},
}, true, "bad LogsQL expr")
}
func TestGroupValidate_Success(t *testing.T) {
@ -336,7 +361,7 @@ func TestGroupValidate_Success(t *testing.T) {
},
}, false, false)
// validate annotiations
// validate annotations
f(&Group{
Name: "test",
Rules: []Rule{
@ -363,6 +388,15 @@ func TestGroupValidate_Success(t *testing.T) {
}},
},
}, false, true)
f(&Group{
Name: "test victorialogs",
Type: NewVLogsType(),
Rules: []Rule{
{Alert: "alert", Expr: " _time: 1m | stats count(*) as requests", Labels: map[string]string{
"description": "{{ value|query }}",
}},
},
}, false, true)
}
func TestHashRule_NotEqual(t *testing.T) {

View file

@ -0,0 +1,10 @@
groups:
- name: InvalidStatsLogsql
type: vlogs
interval: 5m
rules:
- record: MissingFilter
expr: 'stats count(*) as requests'
- record: MissingStatsPipe
expr: 'service: "nginx"'

View file

@ -0,0 +1,29 @@
groups:
- name: RequestCount
type: vlogs
interval: 5m
rules:
- record: nginxRequestCount
expr: 'env: "test" AND service: "nginx" | stats count(*) as requests'
annotations:
description: "Service nginx on env test accepted {{$labels.requests}} requests in the last 5 minutes"
- record: prodRequestCount
expr: 'env: "prod" | stats by (service) count(*) as requests'
annotations:
description: "Service {{$labels.service}} on env prod accepted {{$labels.requests}} requests in the last 5 minutes"
- name: ServiceLog
type: vlogs
interval: 5m
rules:
- alert: HasErrorLog
expr: 'env: "prod" AND status:~"error|warn" | stats by (service) count(*) as errorLog | filter errorLog:>0'
annotations:
description: "Service {{$labels.service}} generated {{$labels.errorLog}} error logs in the last 5 minutes"
- name: ServiceRequest
type: vlogs
interval: 10m
rules:
- alert: TooManyFailedRequest
expr: '* | extract "ip=<ip> " | extract "status_code=<code>;" | stats by (ip) count() if (code:!~200) as failed, count() as total| math failed / total as failed_percentage| filter failed_percentage :> 0.01 | fields ip,failed_percentage'
annotations:
description: "Connection from address {{$labels.ip}} has {{$value}} failed requests ratio in last 10 minutes"

View file

@ -5,6 +5,7 @@ import (
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/graphiteql"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/metricsql"
)
@ -27,6 +28,13 @@ func NewGraphiteType() Type {
}
}
// NewVLogsType returns victorialogs datasource type
func NewVLogsType() Type {
return Type{
Name: "vlogs",
}
}
// NewRawType returns datasource type from raw string
// without validation.
func NewRawType(d string) Type {
@ -62,6 +70,10 @@ func (t *Type) ValidateExpr(expr string) error {
if _, err := metricsql.Parse(expr); err != nil {
return fmt.Errorf("bad prometheus expr: %q, err: %w", expr, err)
}
case "vlogs":
if _, err := logstorage.ParseStatsQuery(expr, 0); err != nil {
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
}
default:
return fmt.Errorf("unknown datasource type=%q", t.Name)
}
@ -74,13 +86,10 @@ func (t *Type) UnmarshalYAML(unmarshal func(any) error) error {
if err := unmarshal(&s); err != nil {
return err
}
if s == "" {
s = "prometheus"
}
switch s {
case "graphite", "prometheus":
case "graphite", "prometheus", "vlogs":
default:
return fmt.Errorf("unknown datasource type=%q, want %q or %q", s, "prometheus", "graphite")
return fmt.Errorf("unknown datasource type=%q, want prometheus, graphite or vlogs", s)
}
t.Name = s
return nil

View file

@ -0,0 +1,333 @@
package datasource
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
)
type datasourceType string
const (
datasourcePrometheus datasourceType = "prometheus"
datasourceGraphite datasourceType = "graphite"
datasourceVLogs datasourceType = "vlogs"
)
func toDatasourceType(s string) datasourceType {
switch s {
case string(datasourcePrometheus):
return datasourcePrometheus
case string(datasourceGraphite):
return datasourceGraphite
case string(datasourceVLogs):
return datasourceVLogs
default:
logger.Panicf("BUG: unknown datasource type %q", s)
}
return ""
}
// Client is a datasource entity for reading data,
// supported clients are enumerated in datasourceType.
// WARN: when adding a new field, remember to check if Clone() method needs to be updated.
type Client struct {
c *http.Client
authCfg *promauth.Config
datasourceURL string
appendTypePrefix bool
queryStep time.Duration
dataSourceType datasourceType
// ApplyIntervalAsTimeFilter is only valid for vlogs datasource.
// Set to true if there is no [timeFilter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) in the rule expression,
// and we will add evaluation interval as an additional timeFilter when querying.
applyIntervalAsTimeFilter bool
// evaluationInterval will help setting request's `step` param,
// or adding time filter for LogsQL expression.
evaluationInterval time.Duration
// extraParams contains params to be attached to each HTTP request
extraParams url.Values
// extraHeaders are headers to be attached to each HTTP request
extraHeaders []keyValue
// whether to print additional log messages
// for each sent request
debug bool
}
type keyValue struct {
key string
value string
}
// Clone clones shared http client and other configuration to the new client.
func (c *Client) Clone() *Client {
ns := &Client{
c: c.c,
authCfg: c.authCfg,
datasourceURL: c.datasourceURL,
appendTypePrefix: c.appendTypePrefix,
queryStep: c.queryStep,
dataSourceType: c.dataSourceType,
evaluationInterval: c.evaluationInterval,
// init map so it can be populated below
extraParams: url.Values{},
debug: c.debug,
}
if len(c.extraHeaders) > 0 {
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
copy(ns.extraHeaders, c.extraHeaders)
}
for k, v := range c.extraParams {
ns.extraParams[k] = v
}
return ns
}
// ApplyParams - changes given querier params.
func (c *Client) ApplyParams(params QuerierParams) *Client {
if params.DataSourceType != "" {
c.dataSourceType = toDatasourceType(params.DataSourceType)
}
c.evaluationInterval = params.EvaluationInterval
c.applyIntervalAsTimeFilter = params.ApplyIntervalAsTimeFilter
if params.QueryParams != nil {
if c.extraParams == nil {
c.extraParams = url.Values{}
}
for k, vl := range params.QueryParams {
// custom query params are prior to default ones
if c.extraParams.Has(k) {
c.extraParams.Del(k)
}
for _, v := range vl {
// don't use .Set() instead of Del/Add since it is allowed
// for GET params to be duplicated
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4908
c.extraParams.Add(k, v)
}
}
}
if params.Headers != nil {
for key, value := range params.Headers {
kv := keyValue{key: key, value: value}
c.extraHeaders = append(c.extraHeaders, kv)
}
}
c.debug = params.Debug
return c
}
// BuildWithParams - implements interface.
func (c *Client) BuildWithParams(params QuerierParams) Querier {
return c.Clone().ApplyParams(params)
}
// NewPrometheusClient returns a new prometheus datasource client.
func NewPrometheusClient(baseURL string, authCfg *promauth.Config, appendTypePrefix bool, c *http.Client) *Client {
return &Client{
c: c,
authCfg: authCfg,
datasourceURL: strings.TrimSuffix(baseURL, "/"),
appendTypePrefix: appendTypePrefix,
queryStep: *queryStep,
dataSourceType: datasourcePrometheus,
extraParams: url.Values{},
}
}
// Query executes the given query and returns parsed response
func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error) {
req, err := c.newQueryRequest(ctx, query, ts)
if err != nil {
return Result{}, nil, err
}
resp, err := c.do(req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
// Return unexpected error to the caller.
return Result{}, nil, err
}
// Something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = c.newQueryRequest(ctx, query, ts)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
resp, err = c.do(req)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
}
// Process the received response.
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusResponse
case datasourceGraphite:
parseFn = parseGraphiteResponse
case datasourceVLogs:
parseFn = parseVLogsResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
}
result, err := parseFn(req, resp)
_ = resp.Body.Close()
return result, req, err
}
// QueryRange executes the given query on the given time range.
// For Prometheus type see https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
// Graphite type isn't supported.
func (c *Client) QueryRange(ctx context.Context, query string, start, end time.Time) (res Result, err error) {
if c.dataSourceType == datasourceGraphite {
return res, fmt.Errorf("%q is not supported for QueryRange", c.dataSourceType)
}
// TODO: disable range query LogsQL with time filter now
if c.dataSourceType == datasourceVLogs && !c.applyIntervalAsTimeFilter {
return res, fmt.Errorf("range query is not supported for LogsQL expression %q because it contains time filter. Remove time filter from the expression and try again", query)
}
if start.IsZero() {
return res, fmt.Errorf("start param is missing")
}
if end.IsZero() {
return res, fmt.Errorf("end param is missing")
}
req, err := c.newQueryRangeRequest(ctx, query, start, end)
if err != nil {
return res, err
}
resp, err := c.do(req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
// Return unexpected error to the caller.
return res, err
}
// Something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = c.newQueryRangeRequest(ctx, query, start, end)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
resp, err = c.do(req)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
}
// Process the received response.
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusResponse
case datasourceVLogs:
parseFn = parseVLogsResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
}
res, err = parseFn(req, resp)
_ = resp.Body.Close()
return res, err
}
func (c *Client) do(req *http.Request) (*http.Response, error) {
ru := req.URL.Redacted()
if *showDatasourceURL {
ru = req.URL.String()
}
if c.debug {
logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, ru)
}
resp, err := c.c.Do(req)
if err != nil {
return nil, fmt.Errorf("error getting response from %s: %w", ru, err)
}
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, ru, body)
}
return resp, nil
}
func (c *Client) newQueryRangeRequest(ctx context.Context, query string, start, end time.Time) (*http.Request, error) {
req, err := c.newRequest(ctx)
if err != nil {
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %w", c.datasourceURL, err)
}
switch c.dataSourceType {
case datasourcePrometheus:
c.setPrometheusRangeReqParams(req, query, start, end)
case datasourceVLogs:
c.setVLogsRangeReqParams(req, query, start, end)
default:
logger.Panicf("BUG: unsupported datasource type %q to create range query request", c.dataSourceType)
}
return req, nil
}
func (c *Client) newQueryRequest(ctx context.Context, query string, ts time.Time) (*http.Request, error) {
req, err := c.newRequest(ctx)
if err != nil {
return nil, fmt.Errorf("cannot create query request to datasource %q: %w", c.datasourceURL, err)
}
switch c.dataSourceType {
case datasourcePrometheus:
c.setPrometheusInstantReqParams(req, query, ts)
case datasourceGraphite:
c.setGraphiteReqParams(req, query)
case datasourceVLogs:
c.setVLogsInstantReqParams(req, query, ts)
default:
logger.Panicf("BUG: unsupported datasource type %q to create query request", c.dataSourceType)
}
return req, nil
}
func (c *Client) newRequest(ctx context.Context) (*http.Request, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.datasourceURL, nil)
if err != nil {
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", c.datasourceURL, err)
}
req.Header.Set("Content-Type", "application/json")
if c.authCfg != nil {
err = c.authCfg.SetHeaders(req, true)
if err != nil {
return nil, err
}
}
for _, h := range c.extraHeaders {
req.Header.Set(h.key, h.value)
}
return req, nil
}
// setReqParams adds query and other extra params for the request.
func (c *Client) setReqParams(r *http.Request, query string) {
q := r.URL.Query()
for k, vs := range c.extraParams {
if q.Has(k) { // extraParams are prior to params in URL
q.Del(k)
}
for _, v := range vs {
q.Add(k, v)
}
}
q.Set("query", query)
r.URL.RawQuery = q.Encode()
}

View file

@ -46,8 +46,8 @@ const (
graphitePrefix = "/graphite"
)
func (s *VMStorage) setGraphiteReqParams(r *http.Request, query string) {
if s.appendTypePrefix {
func (c *Client) setGraphiteReqParams(r *http.Request, query string) {
if c.appendTypePrefix {
r.URL.Path += graphitePrefix
}
r.URL.Path += graphitePath
@ -58,7 +58,7 @@ func (s *VMStorage) setGraphiteReqParams(r *http.Request, query string) {
q.Set("target", query)
q.Set("until", "now")
for k, vs := range s.extraParams {
for k, vs := range c.extraParams {
if q.Has(k) { // extraParams are prior to params in URL
q.Del(k)
}

View file

@ -9,14 +9,16 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/valyala/fastjson"
)
var (
disablePathAppend = flag.Bool("remoteRead.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/query' path "+
disablePathAppend = flag.Bool("remoteRead.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/query' or '/select/logsql/stats_query' path "+
"to the configured -datasource.url and -remoteRead.url")
disableStepParam = flag.Bool("datasource.disableStepParam", false, "Whether to disable adding 'step' param to the issued instant queries. "+
disableStepParam = flag.Bool("datasource.disableStepParam", false, "Whether to disable adding 'step' param in instant queries to the configured -datasource.url and -remoteRead.url. "+
"Only valid for prometheus datasource. "+
"This might be useful when using vmalert with datasources that do not support 'step' param for instant queries, like Google Managed Prometheus. "+
"It is not recommended to enable this flag if you use vmalert with VictoriaMetrics.")
)
@ -81,14 +83,14 @@ func (pi *promInstant) Unmarshal(b []byte) error {
labels := metric.GetObject()
r := &pi.ms[i]
r.Labels = make([]Label, 0, labels.Len())
r.Labels = make([]prompbmarshal.Label, 0, labels.Len())
labels.Visit(func(key []byte, v *fastjson.Value) {
lv, errLocal := v.StringBytes()
if errLocal != nil {
err = fmt.Errorf("error when parsing label value %q: %s", v, errLocal)
return
}
r.Labels = append(r.Labels, Label{
r.Labels = append(r.Labels, prompbmarshal.Label{
Name: string(key),
Value: string(lv),
})
@ -171,7 +173,7 @@ const (
func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
r := &promResponse{}
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
return res, fmt.Errorf("error parsing prometheus metrics for %s: %w", req.URL.Redacted(), err)
return res, fmt.Errorf("error parsing response from %s: %w", req.URL.Redacted(), err)
}
if r.Status == statusError {
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
@ -218,8 +220,8 @@ func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result
return res, nil
}
func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
if s.appendTypePrefix {
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
if c.appendTypePrefix {
r.URL.Path += "/prometheus"
}
if !*disablePathAppend {
@ -227,22 +229,22 @@ func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string,
}
q := r.URL.Query()
q.Set("time", timestamp.Format(time.RFC3339))
if !*disableStepParam && s.evaluationInterval > 0 { // set step as evaluationInterval by default
if !*disableStepParam && c.evaluationInterval > 0 { // set step as evaluationInterval by default
// always convert to seconds to keep compatibility with older
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds())))
q.Set("step", fmt.Sprintf("%ds", int(c.evaluationInterval.Seconds())))
}
if !*disableStepParam && s.queryStep > 0 { // override step with user-specified value
if !*disableStepParam && c.queryStep > 0 { // override step with user-specified value
// always convert to seconds to keep compatibility with older
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
q.Set("step", fmt.Sprintf("%ds", int(s.queryStep.Seconds())))
q.Set("step", fmt.Sprintf("%ds", int(c.queryStep.Seconds())))
}
r.URL.RawQuery = q.Encode()
s.setPrometheusReqParams(r, query)
c.setReqParams(r, query)
}
func (s *VMStorage) setPrometheusRangeReqParams(r *http.Request, query string, start, end time.Time) {
if s.appendTypePrefix {
func (c *Client) setPrometheusRangeReqParams(r *http.Request, query string, start, end time.Time) {
if c.appendTypePrefix {
r.URL.Path += "/prometheus"
}
if !*disablePathAppend {
@ -251,25 +253,11 @@ func (s *VMStorage) setPrometheusRangeReqParams(r *http.Request, query string, s
q := r.URL.Query()
q.Add("start", start.Format(time.RFC3339))
q.Add("end", end.Format(time.RFC3339))
if s.evaluationInterval > 0 { // set step as evaluationInterval by default
if c.evaluationInterval > 0 { // set step as evaluationInterval by default
// always convert to seconds to keep compatibility with older
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds())))
q.Set("step", fmt.Sprintf("%ds", int(c.evaluationInterval.Seconds())))
}
r.URL.RawQuery = q.Encode()
s.setPrometheusReqParams(r, query)
}
func (s *VMStorage) setPrometheusReqParams(r *http.Request, query string) {
q := r.URL.Query()
for k, vs := range s.extraParams {
if q.Has(k) { // extraParams are prior to params in URL
q.Del(k)
}
for _, v := range vs {
q.Add(k, v)
}
}
q.Set("query", query)
r.URL.RawQuery = q.Encode()
c.setReqParams(r, query)
}

View file

@ -14,6 +14,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
var (
@ -24,8 +25,10 @@ var (
Username: basicAuthName,
Password: promauth.NewSecret(basicAuthPass),
}
query = "vm_rows"
queryRender = "constantLine(10)"
vmQuery = "vm_rows"
queryRender = "constantLine(10)"
vlogsQuery = "_time: 5m | stats by (foo) count() total"
vlogsRangeQuery = "* | stats by (foo) count() total"
)
func TestVMInstantQuery(t *testing.T) {
@ -42,8 +45,8 @@ func TestVMInstantQuery(t *testing.T) {
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
}
if r.URL.Query().Get("query") != query {
t.Fatalf("expected %s in query param, got %s", query, r.URL.Query().Get("query"))
if r.URL.Query().Get("query") != vmQuery {
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
}
timeParam := r.URL.Query().Get("time")
if timeParam == "" {
@ -78,6 +81,31 @@ func TestVMInstantQuery(t *testing.T) {
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
}
})
mux.HandleFunc("/select/logsql/stats_query", func(w http.ResponseWriter, r *http.Request) {
c++
if r.Method != http.MethodPost {
t.Fatalf("expected POST method got %s", r.Method)
}
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
}
if r.URL.Query().Get("query") != vlogsQuery {
t.Fatalf("expected %s in query param, got %s", vlogsQuery, r.URL.Query().Get("query"))
}
timeParam := r.URL.Query().Get("time")
if timeParam == "" {
t.Fatalf("expected 'time' in query param, got nil instead")
}
if _, err := time.Parse(time.RFC3339, timeParam); err != nil {
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
}
switch c {
case 9:
w.Write([]byte("[]"))
case 10:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
}
})
srv := httptest.NewServer(mux)
defer srv.Close()
@ -86,13 +114,13 @@ func TestVMInstantQuery(t *testing.T) {
if err != nil {
t.Fatalf("unexpected: %s", err)
}
s := NewVMStorage(srv.URL, authCfg, 0, false, srv.Client())
s := NewPrometheusClient(srv.URL, authCfg, false, srv.Client())
p := datasourcePrometheus
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(p), EvaluationInterval: 15 * time.Second})
ts := time.Now()
expErr := func(err string) {
expErr := func(query, err string) {
_, _, gotErr := pq.Query(ctx, query, ts)
if gotErr == nil {
t.Fatalf("expected %q got nil", err)
@ -102,13 +130,13 @@ func TestVMInstantQuery(t *testing.T) {
}
}
expErr("500") // 0
expErr("error parsing prometheus metrics") // 1
expErr("response error") // 2
expErr("unknown status") // 3
expErr("unexpected end of JSON input") // 4
expErr(vmQuery, "500") // 0
expErr(vmQuery, "error parsing response") // 1
expErr(vmQuery, "response error") // 2
expErr(vmQuery, "unknown status") // 3
expErr(vmQuery, "unexpected end of JSON input") // 4
res, _, err := pq.Query(ctx, query, ts) // 5 - vector
res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
if err != nil {
t.Fatalf("unexpected %s", err)
}
@ -117,19 +145,19 @@ func TestVMInstantQuery(t *testing.T) {
}
expected := []Metric{
{
Labels: []Label{{Value: "vm_rows", Name: "__name__"}, {Value: "bar", Name: "foo"}},
Labels: []prompbmarshal.Label{{Value: "vm_rows", Name: "__name__"}, {Value: "bar", Name: "foo"}},
Timestamps: []int64{1583786142},
Values: []float64{13763},
},
{
Labels: []Label{{Value: "vm_requests", Name: "__name__"}, {Value: "baz", Name: "foo"}},
Labels: []prompbmarshal.Label{{Value: "vm_requests", Name: "__name__"}, {Value: "baz", Name: "foo"}},
Timestamps: []int64{1583786140},
Values: []float64{2000},
},
}
metricsEqual(t, res.Data, expected)
res, req, err := pq.Query(ctx, query, ts) // 6 - scalar
res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
if err != nil {
t.Fatalf("unexpected %s", err)
}
@ -154,7 +182,7 @@ func TestVMInstantQuery(t *testing.T) {
res.SeriesFetched)
}
res, _, err = pq.Query(ctx, query, ts) // 7 - scalar with stats
res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
if err != nil {
t.Fatalf("unexpected %s", err)
}
@ -175,6 +203,7 @@ func TestVMInstantQuery(t *testing.T) {
*res.SeriesFetched)
}
// test graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
res, _, err = gq.Query(ctx, queryRender, ts) // 8 - graphite
@ -186,12 +215,39 @@ func TestVMInstantQuery(t *testing.T) {
}
exp := []Metric{
{
Labels: []Label{{Value: "constantLine(10)", Name: "name"}},
Labels: []prompbmarshal.Label{{Value: "constantLine(10)", Name: "name"}},
Timestamps: []int64{1611758403},
Values: []float64{10},
},
}
metricsEqual(t, res.Data, exp)
// test victorialogs
vlogs := datasourceVLogs
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
expErr(vlogsQuery, "error parsing response") // 9
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 10
if err != nil {
t.Fatalf("unexpected %s", err)
}
if len(res.Data) != 2 {
t.Fatalf("expected 2 metrics got %d in %+v", len(res.Data), res.Data)
}
expected = []Metric{
{
Labels: []prompbmarshal.Label{{Value: "total", Name: "stats_result"}, {Value: "bar", Name: "foo"}},
Timestamps: []int64{1583786142},
Values: []float64{13763},
},
{
Labels: []prompbmarshal.Label{{Value: "total", Name: "stats_result"}, {Value: "baz", Name: "foo"}},
Timestamps: []int64{1583786140},
Values: []float64{2000},
},
}
metricsEqual(t, res.Data, expected)
}
func TestVMInstantQueryWithRetry(t *testing.T) {
@ -202,8 +258,8 @@ func TestVMInstantQueryWithRetry(t *testing.T) {
c := -1
mux.HandleFunc("/api/v1/query", func(w http.ResponseWriter, r *http.Request) {
c++
if r.URL.Query().Get("query") != query {
t.Fatalf("expected %s in query param, got %s", query, r.URL.Query().Get("query"))
if r.URL.Query().Get("query") != vmQuery {
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
}
switch c {
case 0:
@ -225,11 +281,11 @@ func TestVMInstantQueryWithRetry(t *testing.T) {
srv := httptest.NewServer(mux)
defer srv.Close()
s := NewVMStorage(srv.URL, nil, 0, false, srv.Client())
s := NewPrometheusClient(srv.URL, nil, false, srv.Client())
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourcePrometheus)})
expErr := func(err string) {
_, _, gotErr := pq.Query(ctx, query, time.Now())
_, _, gotErr := pq.Query(ctx, vmQuery, time.Now())
if gotErr == nil {
t.Fatalf("expected %q got nil", err)
}
@ -239,7 +295,7 @@ func TestVMInstantQueryWithRetry(t *testing.T) {
}
expValue := func(v float64) {
res, _, err := pq.Query(ctx, query, time.Now())
res, _, err := pq.Query(ctx, vmQuery, time.Now())
if err != nil {
t.Fatalf("unexpected %s", err)
}
@ -300,8 +356,8 @@ func TestVMRangeQuery(t *testing.T) {
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
}
if r.URL.Query().Get("query") != query {
t.Fatalf("expected %s in query param, got %s", query, r.URL.Query().Get("query"))
if r.URL.Query().Get("query") != vmQuery {
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
}
startTS := r.URL.Query().Get("start")
if startTS == "" {
@ -326,6 +382,40 @@ func TestVMRangeQuery(t *testing.T) {
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
}
})
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
c++
if r.Method != http.MethodPost {
t.Fatalf("expected POST method got %s", r.Method)
}
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
}
if r.URL.Query().Get("query") != vlogsRangeQuery {
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
}
startTS := r.URL.Query().Get("start")
if startTS == "" {
t.Fatalf("expected 'start' in query param, got nil instead")
}
if _, err := time.Parse(time.RFC3339, startTS); err != nil {
t.Fatalf("failed to parse 'start' query param: %s", err)
}
endTS := r.URL.Query().Get("end")
if endTS == "" {
t.Fatalf("expected 'end' in query param, got nil instead")
}
if _, err := time.Parse(time.RFC3339, endTS); err != nil {
t.Fatalf("failed to parse 'end' query param: %s", err)
}
step := r.URL.Query().Get("step")
if step != "60s" {
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
}
switch c {
case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
}
})
srv := httptest.NewServer(mux)
defer srv.Close()
@ -334,19 +424,19 @@ func TestVMRangeQuery(t *testing.T) {
if err != nil {
t.Fatalf("unexpected: %s", err)
}
s := NewVMStorage(srv.URL, authCfg, *queryStep, false, srv.Client())
s := NewPrometheusClient(srv.URL, authCfg, false, srv.Client())
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourcePrometheus), EvaluationInterval: 15 * time.Second})
_, err = pq.QueryRange(ctx, query, time.Now(), time.Time{})
_, err = pq.QueryRange(ctx, vmQuery, time.Now(), time.Time{})
expectError(t, err, "is missing")
_, err = pq.QueryRange(ctx, query, time.Time{}, time.Now())
_, err = pq.QueryRange(ctx, vmQuery, time.Time{}, time.Now())
expectError(t, err, "is missing")
start, end := time.Now().Add(-time.Minute), time.Now()
res, err := pq.QueryRange(ctx, query, start, end)
res, err := pq.QueryRange(ctx, vmQuery, start, end)
if err != nil {
t.Fatalf("unexpected %s", err)
}
@ -355,7 +445,7 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
}
expected := Metric{
Labels: []Label{{Value: "vm_rows", Name: "__name__"}},
Labels: []prompbmarshal.Label{{Value: "vm_rows", Name: "__name__"}},
Timestamps: []int64{1583786142},
Values: []float64{13763},
}
@ -363,33 +453,66 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
}
// test unsupported graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
_, err = gq.QueryRange(ctx, queryRender, start, end)
expectError(t, err, "is not supported")
// unsupported logsql
gq = s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceVLogs), EvaluationInterval: 60 * time.Second})
res, err = gq.QueryRange(ctx, vlogsRangeQuery, start, end)
expectError(t, err, "is not supported")
// supported logsql
gq = s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceVLogs), EvaluationInterval: 60 * time.Second, ApplyIntervalAsTimeFilter: true})
res, err = gq.QueryRange(ctx, vlogsRangeQuery, start, end)
if err != nil {
t.Fatalf("unexpected %s", err)
}
m = res.Data
if len(m) != 1 {
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
}
expected = Metric{
Labels: []prompbmarshal.Label{{Value: "total", Name: "stats_result"}},
Timestamps: []int64{1583786142},
Values: []float64{10},
}
if !reflect.DeepEqual(m[0], expected) {
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
}
}
func TestRequestParams(t *testing.T) {
query := "up"
vlogsQuery := "_time: 5m | stats count() total"
timestamp := time.Date(2001, 2, 3, 4, 5, 6, 0, time.UTC)
f := func(isQueryRange bool, vm *VMStorage, checkFn func(t *testing.T, r *http.Request)) {
f := func(isQueryRange bool, c *Client, checkFn func(t *testing.T, r *http.Request)) {
t.Helper()
req, err := vm.newRequest(ctx)
req, err := c.newRequest(ctx)
if err != nil {
t.Fatalf("error in newRequest: %s", err)
}
switch vm.dataSourceType {
case "", datasourcePrometheus:
switch c.dataSourceType {
case datasourcePrometheus:
if isQueryRange {
vm.setPrometheusRangeReqParams(req, query, timestamp, timestamp)
c.setPrometheusRangeReqParams(req, query, timestamp, timestamp)
} else {
vm.setPrometheusInstantReqParams(req, query, timestamp)
c.setPrometheusInstantReqParams(req, query, timestamp)
}
case datasourceGraphite:
vm.setGraphiteReqParams(req, query)
c.setGraphiteReqParams(req, query)
case datasourceVLogs:
if isQueryRange {
c.setVLogsRangeReqParams(req, vlogsQuery, timestamp, timestamp)
} else {
c.setVLogsInstantReqParams(req, vlogsQuery, timestamp)
}
}
checkFn(t, req)
@ -399,19 +522,19 @@ func TestRequestParams(t *testing.T) {
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
storage := VMStorage{
storage := Client{
extraParams: url.Values{"round_digits": {"10"}},
}
// prometheus path
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourcePrometheus,
}, func(t *testing.T, r *http.Request) {
checkEqualString(t, "/api/v1/query", r.URL.Path)
})
// prometheus prefix
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourcePrometheus,
appendTypePrefix: true,
}, func(t *testing.T, r *http.Request) {
@ -419,14 +542,14 @@ func TestRequestParams(t *testing.T) {
})
// prometheus range path
f(true, &VMStorage{
f(true, &Client{
dataSourceType: datasourcePrometheus,
}, func(t *testing.T, r *http.Request) {
checkEqualString(t, "/api/v1/query_range", r.URL.Path)
})
// prometheus range prefix
f(true, &VMStorage{
f(true, &Client{
dataSourceType: datasourcePrometheus,
appendTypePrefix: true,
}, func(t *testing.T, r *http.Request) {
@ -434,14 +557,14 @@ func TestRequestParams(t *testing.T) {
})
// graphite path
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourceGraphite,
}, func(t *testing.T, r *http.Request) {
checkEqualString(t, graphitePath, r.URL.Path)
})
// graphite prefix
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourceGraphite,
appendTypePrefix: true,
}, func(t *testing.T, r *http.Request) {
@ -449,21 +572,27 @@ func TestRequestParams(t *testing.T) {
})
// default params
f(false, &VMStorage{}, func(t *testing.T, r *http.Request) {
f(false, &Client{dataSourceType: datasourcePrometheus}, func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {query}, "time": {timestamp.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
f(false, &Client{dataSourceType: datasourcePrometheus, applyIntervalAsTimeFilter: true}, func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {query}, "time": {timestamp.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
// default range params
f(true, &VMStorage{}, func(t *testing.T, r *http.Request) {
f(true, &Client{dataSourceType: datasourcePrometheus}, func(t *testing.T, r *http.Request) {
ts := timestamp.Format(time.RFC3339)
exp := url.Values{"query": {query}, "start": {ts}, "end": {ts}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
// basic auth
f(false, &VMStorage{
authCfg: authCfg,
f(false, &Client{
dataSourceType: datasourcePrometheus,
authCfg: authCfg,
}, func(t *testing.T, r *http.Request) {
u, p, _ := r.BasicAuth()
checkEqualString(t, "foo", u)
@ -471,8 +600,9 @@ func TestRequestParams(t *testing.T) {
})
// basic auth range
f(true, &VMStorage{
authCfg: authCfg,
f(true, &Client{
dataSourceType: datasourcePrometheus,
authCfg: authCfg,
}, func(t *testing.T, r *http.Request) {
u, p, _ := r.BasicAuth()
checkEqualString(t, "foo", u)
@ -480,7 +610,8 @@ func TestRequestParams(t *testing.T) {
})
// evaluation interval
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourcePrometheus,
evaluationInterval: 15 * time.Second,
}, func(t *testing.T, r *http.Request) {
evalInterval := 15 * time.Second
@ -489,8 +620,9 @@ func TestRequestParams(t *testing.T) {
})
// step override
f(false, &VMStorage{
queryStep: time.Minute,
f(false, &Client{
dataSourceType: datasourcePrometheus,
queryStep: time.Minute,
}, func(t *testing.T, r *http.Request) {
exp := url.Values{
"query": {query},
@ -501,7 +633,8 @@ func TestRequestParams(t *testing.T) {
})
// step to seconds
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourcePrometheus,
evaluationInterval: 3 * time.Hour,
}, func(t *testing.T, r *http.Request) {
evalInterval := 3 * time.Hour
@ -510,15 +643,17 @@ func TestRequestParams(t *testing.T) {
})
// prometheus extra params
f(false, &VMStorage{
extraParams: url.Values{"round_digits": {"10"}},
f(false, &Client{
dataSourceType: datasourcePrometheus,
extraParams: url.Values{"round_digits": {"10"}},
}, func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {query}, "round_digits": {"10"}, "time": {timestamp.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
// prometheus extra params range
f(true, &VMStorage{
f(true, &Client{
dataSourceType: datasourcePrometheus,
extraParams: url.Values{
"nocache": {"1"},
"max_lookback": {"1h"},
@ -536,7 +671,8 @@ func TestRequestParams(t *testing.T) {
// custom params overrides the original params
f(false, storage.Clone().ApplyParams(QuerierParams{
QueryParams: url.Values{"round_digits": {"2"}},
DataSourceType: string(datasourcePrometheus),
QueryParams: url.Values{"round_digits": {"2"}},
}), func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {query}, "round_digits": {"2"}, "time": {timestamp.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
@ -544,14 +680,15 @@ func TestRequestParams(t *testing.T) {
// allow duplicates in query params
f(false, storage.Clone().ApplyParams(QuerierParams{
QueryParams: url.Values{"extra_labels": {"env=dev", "foo=bar"}},
DataSourceType: string(datasourcePrometheus),
QueryParams: url.Values{"extra_labels": {"env=dev", "foo=bar"}},
}), func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {query}, "round_digits": {"10"}, "extra_labels": {"env=dev", "foo=bar"}, "time": {timestamp.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
// graphite extra params
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourceGraphite,
extraParams: url.Values{
"nocache": {"1"},
@ -563,7 +700,7 @@ func TestRequestParams(t *testing.T) {
})
// graphite extra params allows to override from
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourceGraphite,
extraParams: url.Values{
"from": {"-10m"},
@ -572,10 +709,38 @@ func TestRequestParams(t *testing.T) {
exp := fmt.Sprintf("format=json&from=-10m&target=%s&until=now", query)
checkEqualString(t, exp, r.URL.RawQuery)
})
// test vlogs
f(false, &Client{
dataSourceType: datasourceVLogs,
evaluationInterval: time.Minute,
}, func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {vlogsQuery}, "time": {timestamp.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
f(false, &Client{
dataSourceType: datasourceVLogs,
evaluationInterval: time.Minute,
applyIntervalAsTimeFilter: true,
}, func(t *testing.T, r *http.Request) {
ts := timestamp.Format(time.RFC3339)
exp := url.Values{"query": {vlogsQuery}, "time": {ts}, "start": {timestamp.Add(-time.Minute).Format(time.RFC3339)}, "end": {ts}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
f(true, &Client{
dataSourceType: datasourceVLogs,
evaluationInterval: time.Minute,
}, func(t *testing.T, r *http.Request) {
ts := timestamp.Format(time.RFC3339)
exp := url.Values{"query": {vlogsQuery}, "start": {ts}, "end": {ts}, "step": {"60s"}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
}
func TestHeaders(t *testing.T) {
f := func(vmFn func() *VMStorage, checkFn func(t *testing.T, r *http.Request)) {
f := func(vmFn func() *Client, checkFn func(t *testing.T, r *http.Request)) {
t.Helper()
vm := vmFn()
@ -587,12 +752,12 @@ func TestHeaders(t *testing.T) {
}
// basic auth
f(func() *VMStorage {
f(func() *Client {
cfg, err := utils.AuthConfig(utils.WithBasicAuth("foo", "bar", ""))
if err != nil {
t.Fatalf("Error get auth config: %s", err)
}
return &VMStorage{authCfg: cfg}
return NewPrometheusClient("", cfg, false, nil)
}, func(t *testing.T, r *http.Request) {
u, p, _ := r.BasicAuth()
checkEqualString(t, "foo", u)
@ -600,12 +765,12 @@ func TestHeaders(t *testing.T) {
})
// bearer auth
f(func() *VMStorage {
f(func() *Client {
cfg, err := utils.AuthConfig(utils.WithBearer("foo", ""))
if err != nil {
t.Fatalf("Error get auth config: %s", err)
}
return &VMStorage{authCfg: cfg}
return NewPrometheusClient("", cfg, false, nil)
}, func(t *testing.T, r *http.Request) {
reqToken := r.Header.Get("Authorization")
splitToken := strings.Split(reqToken, "Bearer ")
@ -617,11 +782,13 @@ func TestHeaders(t *testing.T) {
})
// custom extraHeaders
f(func() *VMStorage {
return &VMStorage{extraHeaders: []keyValue{
f(func() *Client {
c := NewPrometheusClient("", nil, false, nil)
c.extraHeaders = []keyValue{
{key: "Foo", value: "bar"},
{key: "Baz", value: "qux"},
}}
}
return c
}, func(t *testing.T, r *http.Request) {
h1 := r.Header.Get("Foo")
checkEqualString(t, "bar", h1)
@ -630,17 +797,16 @@ func TestHeaders(t *testing.T) {
})
// custom header overrides basic auth
f(func() *VMStorage {
f(func() *Client {
cfg, err := utils.AuthConfig(utils.WithBasicAuth("foo", "bar", ""))
if err != nil {
t.Fatalf("Error get auth config: %s", err)
}
return &VMStorage{
authCfg: cfg,
extraHeaders: []keyValue{
{key: "Authorization", value: "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="},
},
c := NewPrometheusClient("", cfg, false, nil)
c.extraHeaders = []keyValue{
{key: "Authorization", value: "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="},
}
return c
}, func(t *testing.T, r *http.Request) {
u, p, _ := r.BasicAuth()
checkEqualString(t, "Aladdin", u)

View file

@ -0,0 +1,61 @@
package datasource
import (
"fmt"
"net/http"
"time"
)
func (c *Client) setVLogsInstantReqParams(r *http.Request, query string, timestamp time.Time) {
// there is no type path prefix in victorialogs APIs right now, ignore appendTypePrefix.
if !*disablePathAppend {
r.URL.Path += "/select/logsql/stats_query"
}
q := r.URL.Query()
// set `time` param explicitly, it will be used as the timestamp of query results.
q.Set("time", timestamp.Format(time.RFC3339))
// set the `start` and `end` params if applyIntervalAsTimeFilter is enabled(time filter is missing in the rule expr),
// so the query will be executed in time range [timestamp - evaluationInterval, timestamp].
if c.applyIntervalAsTimeFilter && c.evaluationInterval > 0 {
q.Set("start", timestamp.Add(-c.evaluationInterval).Format(time.RFC3339))
q.Set("end", timestamp.Format(time.RFC3339))
}
r.URL.RawQuery = q.Encode()
c.setReqParams(r, query)
}
func (c *Client) setVLogsRangeReqParams(r *http.Request, query string, start, end time.Time) {
// there is no type path prefix in victorialogs APIs right now, ignore appendTypePrefix.
if !*disablePathAppend {
r.URL.Path += "/select/logsql/stats_query_range"
}
q := r.URL.Query()
q.Add("start", start.Format(time.RFC3339))
q.Add("end", end.Format(time.RFC3339))
// set step as evaluationInterval by default
if c.evaluationInterval > 0 {
q.Set("step", fmt.Sprintf("%ds", int(c.evaluationInterval.Seconds())))
}
r.URL.RawQuery = q.Encode()
c.setReqParams(r, query)
}
func parseVLogsResponse(req *http.Request, resp *http.Response) (res Result, err error) {
res, err = parsePrometheusResponse(req, resp)
if err != nil {
return Result{}, err
}
for i := range res.Data {
m := &res.Data[i]
for j := range m.Labels {
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
// since there could be multiple stats results in a single query, for instance:
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
if m.Labels[j].Name == "__name__" {
m.Labels[j].Name = "stats_result"
break
}
}
}
return
}

View file

@ -8,6 +8,8 @@ import (
"sort"
"strconv"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
// Querier interface wraps Query and QueryRange methods
@ -42,16 +44,20 @@ type QuerierBuilder interface {
// QuerierParams params for Querier.
type QuerierParams struct {
DataSourceType string
EvaluationInterval time.Duration
QueryParams url.Values
Headers map[string]string
Debug bool
DataSourceType string
// ApplyIntervalAsTimeFilter is only valid for vlogs datasource.
// Set to true if there is no [timeFilter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) in the rule expression,
// and we will add evaluation interval as an additional timeFilter when querying.
ApplyIntervalAsTimeFilter bool
EvaluationInterval time.Duration
QueryParams url.Values
Headers map[string]string
Debug bool
}
// Metric is the basic entity which should be return by datasource
type Metric struct {
Labels []Label
Labels []prompbmarshal.Label
Timestamps []int64
Values []float64
}
@ -68,22 +74,9 @@ func (m *Metric) SetLabel(key, value string) {
m.AddLabel(key, value)
}
// SetLabels sets the given map as Metric labels
func (m *Metric) SetLabels(ls map[string]string) {
var i int
m.Labels = make([]Label, len(ls))
for k, v := range ls {
m.Labels[i] = Label{
Name: k,
Value: v,
}
i++
}
}
// AddLabel appends the given label to the label set
func (m *Metric) AddLabel(key, value string) {
m.Labels = append(m.Labels, Label{Name: key, Value: value})
m.Labels = append(m.Labels, prompbmarshal.Label{Name: key, Value: value})
}
// DelLabel deletes the given label from the label set
@ -106,14 +99,8 @@ func (m *Metric) Label(key string) string {
return ""
}
// Label represents metric's label
type Label struct {
Name string
Value string
}
// Labels is collection of Label
type Labels []Label
type Labels []prompbmarshal.Label
func (ls Labels) Len() int { return len(ls) }
func (ls Labels) Swap(i, j int) { ls[i], ls[j] = ls[j], ls[i] }
@ -168,7 +155,7 @@ func LabelCompare(a, b Labels) int {
// ConvertToLabels convert map to Labels
func ConvertToLabels(m map[string]string) (labelset Labels) {
for k, v := range m {
labelset = append(labelset, Label{
labelset = append(labelset, prompbmarshal.Label{
Name: k,
Value: v,
})

View file

@ -11,7 +11,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
)
@ -48,22 +47,15 @@ var (
oauth2TokenURL = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url")
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
lookBack = flag.Duration("datasource.lookback", 0, `Deprecated: please adjust "-search.latencyOffset" at datasource side `+
`or specify "latency_offset" in rule group's params. Lookback defines how far into the past to look when evaluating queries. `+
`For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries. "+
queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries to the configured -datasource.url and -remoteRead.url. Only valid for prometheus datasource. "+
"For example, if -datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query. "+
"If set to 0, rule's evaluation interval will be used instead.")
queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, `Deprecated: please use "eval_alignment" in rule group instead. `+
`Whether to align "time" parameter with evaluation interval. `+
"Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. "+
"See more details at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
maxIdleConnections = flag.Int("datasource.maxIdleConnections", 100, `Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state.`)
idleConnectionTimeout = flag.Duration("datasource.idleConnTimeout", 50*time.Second, `Defines a duration for idle (keep-alive connections) to exist. Consider setting this value less than "-http.idleConnTimeout". It must prevent possible "write: broken pipe" and "read: connection reset by peer" errors.`)
disableKeepAlive = flag.Bool("datasource.disableKeepAlive", false, `Whether to disable long-lived connections to the datasource. `+
`If true, disables HTTP keep-alive and will only use the connection to the server for a single HTTP request.`)
roundDigits = flag.Int("datasource.roundDigits", 0, `Adds "round_digits" GET param to datasource requests. `+
`In VM "round_digits" limits the number of digits after the decimal point in response values.`)
roundDigits = flag.Int("datasource.roundDigits", 0, `Adds "round_digits" GET param to datasource requests which limits the number of digits after the decimal point in response values. `+
`Only valid for VictoriaMetrics as the datasource.`)
)
// InitSecretFlags must be called after flag.Parse and before any logging
@ -90,12 +82,6 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
if *addr == "" {
return nil, fmt.Errorf("datasource.url is empty")
}
if !*queryTimeAlignment {
logger.Warnf("flag `-datasource.queryTimeAlignment` is deprecated and will be removed in next releases. Please use `eval_alignment` in rule group instead.")
}
if *lookBack != 0 {
logger.Warnf("flag `-datasource.lookback` is deprecated and will be removed in next releases. Please adjust `-search.latencyOffset` at datasource side or specify `latency_offset` in rule group's params. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155 for details.")
}
tr, err := httputils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
if err != nil {
@ -133,13 +119,12 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
return nil, fmt.Errorf("failed to set request auth header to datasource %q: %w", *addr, err)
}
return &VMStorage{
return &Client{
c: &http.Client{Transport: tr},
authCfg: authCfg,
datasourceURL: strings.TrimSuffix(*addr, "/"),
appendTypePrefix: *appendTypePrefix,
queryStep: *queryStep,
dataSourceType: datasourcePrometheus,
extraParams: extraParams,
}, nil
}

View file

@ -1,272 +0,0 @@
package datasource
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
)
type datasourceType string
const (
datasourcePrometheus datasourceType = "prometheus"
datasourceGraphite datasourceType = "graphite"
)
func toDatasourceType(s string) datasourceType {
if s == string(datasourceGraphite) {
return datasourceGraphite
}
return datasourcePrometheus
}
// VMStorage represents vmstorage entity with ability to read and write metrics
// WARN: when adding a new field, remember to update Clone() method.
type VMStorage struct {
c *http.Client
authCfg *promauth.Config
datasourceURL string
appendTypePrefix bool
queryStep time.Duration
dataSourceType datasourceType
// evaluationInterval will help setting request's `step` param.
evaluationInterval time.Duration
// extraParams contains params to be attached to each HTTP request
extraParams url.Values
// extraHeaders are headers to be attached to each HTTP request
extraHeaders []keyValue
// whether to print additional log messages
// for each sent request
debug bool
}
type keyValue struct {
key string
value string
}
// Clone makes clone of VMStorage, shares http client.
func (s *VMStorage) Clone() *VMStorage {
ns := &VMStorage{
c: s.c,
authCfg: s.authCfg,
datasourceURL: s.datasourceURL,
appendTypePrefix: s.appendTypePrefix,
queryStep: s.queryStep,
dataSourceType: s.dataSourceType,
evaluationInterval: s.evaluationInterval,
// init map so it can be populated below
extraParams: url.Values{},
debug: s.debug,
}
if len(s.extraHeaders) > 0 {
ns.extraHeaders = make([]keyValue, len(s.extraHeaders))
copy(ns.extraHeaders, s.extraHeaders)
}
for k, v := range s.extraParams {
ns.extraParams[k] = v
}
return ns
}
// ApplyParams - changes given querier params.
func (s *VMStorage) ApplyParams(params QuerierParams) *VMStorage {
s.dataSourceType = toDatasourceType(params.DataSourceType)
s.evaluationInterval = params.EvaluationInterval
if params.QueryParams != nil {
if s.extraParams == nil {
s.extraParams = url.Values{}
}
for k, vl := range params.QueryParams {
// custom query params are prior to default ones
if s.extraParams.Has(k) {
s.extraParams.Del(k)
}
for _, v := range vl {
// don't use .Set() instead of Del/Add since it is allowed
// for GET params to be duplicated
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4908
s.extraParams.Add(k, v)
}
}
}
if params.Headers != nil {
for key, value := range params.Headers {
kv := keyValue{key: key, value: value}
s.extraHeaders = append(s.extraHeaders, kv)
}
}
s.debug = params.Debug
return s
}
// BuildWithParams - implements interface.
func (s *VMStorage) BuildWithParams(params QuerierParams) Querier {
return s.Clone().ApplyParams(params)
}
// NewVMStorage is a constructor for VMStorage
func NewVMStorage(baseURL string, authCfg *promauth.Config, queryStep time.Duration, appendTypePrefix bool, c *http.Client) *VMStorage {
return &VMStorage{
c: c,
authCfg: authCfg,
datasourceURL: strings.TrimSuffix(baseURL, "/"),
appendTypePrefix: appendTypePrefix,
queryStep: queryStep,
dataSourceType: datasourcePrometheus,
extraParams: url.Values{},
}
}
// Query executes the given query and returns parsed response
func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error) {
req, err := s.newQueryRequest(ctx, query, ts)
if err != nil {
return Result{}, nil, err
}
resp, err := s.do(req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
// Return unexpected error to the caller.
return Result{}, nil, err
}
// Something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = s.newQueryRequest(ctx, query, ts)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
resp, err = s.do(req)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
}
// Process the received response.
parseFn := parsePrometheusResponse
if s.dataSourceType != datasourcePrometheus {
parseFn = parseGraphiteResponse
}
result, err := parseFn(req, resp)
_ = resp.Body.Close()
return result, req, err
}
// QueryRange executes the given query on the given time range.
// For Prometheus type see https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
// Graphite type isn't supported.
func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end time.Time) (res Result, err error) {
if s.dataSourceType != datasourcePrometheus {
return res, fmt.Errorf("%q is not supported for QueryRange", s.dataSourceType)
}
if start.IsZero() {
return res, fmt.Errorf("start param is missing")
}
if end.IsZero() {
return res, fmt.Errorf("end param is missing")
}
req, err := s.newQueryRangeRequest(ctx, query, start, end)
if err != nil {
return res, err
}
resp, err := s.do(req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
// Return unexpected error to the caller.
return res, err
}
// Something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = s.newQueryRangeRequest(ctx, query, start, end)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
resp, err = s.do(req)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
}
// Process the received response.
res, err = parsePrometheusResponse(req, resp)
_ = resp.Body.Close()
return res, err
}
func (s *VMStorage) do(req *http.Request) (*http.Response, error) {
ru := req.URL.Redacted()
if *showDatasourceURL {
ru = req.URL.String()
}
if s.debug {
logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, ru)
}
resp, err := s.c.Do(req)
if err != nil {
return nil, fmt.Errorf("error getting response from %s: %w", ru, err)
}
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, ru, body)
}
return resp, nil
}
func (s *VMStorage) newQueryRangeRequest(ctx context.Context, query string, start, end time.Time) (*http.Request, error) {
req, err := s.newRequest(ctx)
if err != nil {
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %w", s.datasourceURL, err)
}
s.setPrometheusRangeReqParams(req, query, start, end)
return req, nil
}
func (s *VMStorage) newQueryRequest(ctx context.Context, query string, ts time.Time) (*http.Request, error) {
req, err := s.newRequest(ctx)
if err != nil {
return nil, fmt.Errorf("cannot create query request to datasource %q: %w", s.datasourceURL, err)
}
switch s.dataSourceType {
case "", datasourcePrometheus:
s.setPrometheusInstantReqParams(req, query, ts)
case datasourceGraphite:
s.setGraphiteReqParams(req, query)
default:
logger.Panicf("BUG: engine not found: %q", s.dataSourceType)
}
return req, nil
}
func (s *VMStorage) newRequest(ctx context.Context) (*http.Request, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, s.datasourceURL, nil)
if err != nil {
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", s.datasourceURL, err)
}
req.Header.Set("Content-Type", "application/json")
if s.authCfg != nil {
err = s.authCfg.SetHeaders(req, true)
if err != nil {
return nil, err
}
}
for _, h := range s.extraHeaders {
req.Header.Set(h.key, h.value)
}
return req, nil
}

View file

@ -3,6 +3,8 @@ package datasource
import (
"reflect"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
func TestPromInstant_UnmarshalPositive(t *testing.T) {
@ -21,7 +23,7 @@ func TestPromInstant_UnmarshalPositive(t *testing.T) {
f(`[{"metric":{"__name__":"up"},"value":[1583780000,"42"]}]`, []Metric{
{
Labels: []Label{{Name: "__name__", Value: "up"}},
Labels: []prompbmarshal.Label{{Name: "__name__", Value: "up"}},
Timestamps: []int64{1583780000},
Values: []float64{42},
},
@ -31,17 +33,17 @@ func TestPromInstant_UnmarshalPositive(t *testing.T) {
{"metric":{"__name__":"foo"},"value":[1583780001,"7"]},
{"metric":{"__name__":"baz", "instance":"bar"},"value":[1583780002,"8"]}]`, []Metric{
{
Labels: []Label{{Name: "__name__", Value: "up"}},
Labels: []prompbmarshal.Label{{Name: "__name__", Value: "up"}},
Timestamps: []int64{1583780000},
Values: []float64{42},
},
{
Labels: []Label{{Name: "__name__", Value: "foo"}},
Labels: []prompbmarshal.Label{{Name: "__name__", Value: "foo"}},
Timestamps: []int64{1583780001},
Values: []float64{7},
},
{
Labels: []Label{{Name: "__name__", Value: "baz"}, {Name: "instance", Value: "bar"}},
Labels: []prompbmarshal.Label{{Name: "__name__", Value: "baz"}, {Name: "instance", Value: "bar"}},
Timestamps: []int64{1583780002},
Values: []float64{8},
},

View file

@ -66,7 +66,7 @@ absolute path to all .tpl files in root.
evaluationInterval = flag.Duration("evaluationInterval", time.Minute, "How often to evaluate the rules")
validateTemplates = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions for different types.")
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier. By default, hostname is used as address.")
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager `+
@ -78,8 +78,6 @@ absolute path to all .tpl files in root.
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
"In case of conflicts, original labels are kept with prefix `exported_`.")
remoteReadIgnoreRestoreErrors = flag.Bool("remoteRead.ignoreRestoreErrors", true, "Whether to ignore errors from remote storage when restoring alerts state on startup. DEPRECATED - this flag has no effect and will be removed in the next releases.")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
)
@ -97,10 +95,6 @@ func main() {
buildinfo.Init()
logger.Init()
if !*remoteReadIgnoreRestoreErrors {
logger.Warnf("flag `remoteRead.ignoreRestoreErrors` is deprecated and will be removed in next releases.")
}
err := templates.Load(*ruleTemplatesPath, true)
if err != nil {
logger.Fatalf("failed to parse %q: %s", *ruleTemplatesPath, err)

View file

@ -167,14 +167,8 @@ type tplData struct {
ExternalURL string
}
func templateAnnotation(dst io.Writer, text string, data tplData, tmpl *textTpl.Template, execute bool) error {
tpl, err := tmpl.Clone()
if err != nil {
return fmt.Errorf("error cloning template before parse annotation: %w", err)
}
// Clone() doesn't copy tpl Options, so we set them manually
tpl = tpl.Option("missingkey=zero")
tpl, err = tpl.Parse(text)
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
tpl, err := tpl.Parse(text)
if err != nil {
return fmt.Errorf("error parsing annotation template: %w", err)
}

View file

@ -33,7 +33,7 @@ func TestAlertExecTemplate(t *testing.T) {
qFn := func(_ string) ([]datasource.Metric, error) {
return []datasource.Metric{
{
Labels: []datasource.Label{
Labels: []prompbmarshal.Label{
{Name: "foo", Value: "bar"},
{Name: "baz", Value: "qux"},
},
@ -41,7 +41,7 @@ func TestAlertExecTemplate(t *testing.T) {
Timestamps: []int64{1},
},
{
Labels: []datasource.Label{
Labels: []prompbmarshal.Label{
{Name: "foo", Value: "garply"},
{Name: "baz", Value: "fred"},
},

View file

@ -86,5 +86,5 @@ func Init() (datasource.QuerierBuilder, error) {
return nil, fmt.Errorf("failed to configure auth: %w", err)
}
c := &http.Client{Transport: tr}
return datasource.NewVMStorage(*addr, authCfg, 0, false, c), nil
return datasource.NewPrometheusClient(*addr, authCfg, false, c), nil
}

View file

@ -27,7 +27,7 @@ var defaultConcurrency = cgroup.AvailableCPUs() * 2
const (
defaultMaxBatchSize = 1e4
defaultMaxQueueSize = 1e6
defaultMaxQueueSize = 1e5
defaultFlushInterval = 2 * time.Second
defaultWriteTimeout = 30 * time.Second
)

View file

@ -14,8 +14,10 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
// AlertingRule is basic alert entity
@ -72,11 +74,12 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
EvalInterval: group.Interval,
Debug: cfg.Debug,
q: qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: group.Type.String(),
EvaluationInterval: group.Interval,
QueryParams: group.Params,
Headers: group.Headers,
Debug: cfg.Debug,
DataSourceType: group.Type.String(),
ApplyIntervalAsTimeFilter: setIntervalAsTimeFilter(group.Type.String(), cfg.Expr),
EvaluationInterval: group.Interval,
QueryParams: group.Params,
Headers: group.Headers,
Debug: cfg.Debug,
}),
alerts: make(map[uint64]*notifier.Alert),
metrics: &alertingRuleMetrics{},
@ -453,13 +456,16 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
ar.logDebugf(ts, a, "created in state PENDING")
}
var numActivePending int
var tss []prompbmarshal.TimeSeries
for h, a := range ar.alerts {
// if alert wasn't updated in this iteration
// means it is resolved already
if _, ok := updated[h]; !ok {
if a.State == notifier.StatePending {
// alert was in Pending state - it is not
// active anymore
// alert was in Pending state - it is not active anymore
// add stale time series
tss = append(tss, pendingAlertStaleTimeSeries(a.Labels, ts.Unix(), true)...)
delete(ar.alerts, h)
ar.logDebugf(ts, a, "PENDING => DELETED: is absent in current evaluation round")
continue
@ -477,6 +483,9 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
if ts.Sub(a.KeepFiringSince) >= ar.KeepFiringFor {
a.State = notifier.StateInactive
a.ResolvedAt = ts
// add stale time series
tss = append(tss, firingAlertStaleTimeSeries(a.Labels, ts.Unix())...)
ar.logDebugf(ts, a, "FIRING => INACTIVE: is absent in current evaluation round")
continue
}
@ -488,6 +497,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
a.State = notifier.StateFiring
a.Start = ts
alertsFired.Inc()
if ar.For > 0 {
// add stale time series
tss = append(tss, pendingAlertStaleTimeSeries(a.Labels, ts.Unix(), false)...)
}
ar.logDebugf(ts, a, "PENDING => FIRING: %s since becoming active at %v", ts.Sub(a.ActiveAt), a.ActiveAt)
}
}
@ -496,7 +509,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
curState.Err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
return nil, curState.Err
}
return ar.toTimeSeries(ts.Unix()), nil
return append(tss, ar.toTimeSeries(ts.Unix())...), nil
}
func (ar *AlertingRule) expandTemplates(m datasource.Metric, qFn templates.QueryFn, ts time.Time) (*labelSet, map[string]string, error) {
@ -521,6 +534,7 @@ func (ar *AlertingRule) expandTemplates(m datasource.Metric, qFn templates.Query
return ls, as, nil
}
// toTimeSeries creates `ALERTS` and `ALERTS_FOR_STATE` for active alerts
func (ar *AlertingRule) toTimeSeries(timestamp int64) []prompbmarshal.TimeSeries {
var tss []prompbmarshal.TimeSeries
for _, a := range ar.alerts {
@ -600,26 +614,83 @@ func (ar *AlertingRule) alertToTimeSeries(a *notifier.Alert, timestamp int64) []
}
func alertToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.TimeSeries {
labels := make(map[string]string)
var labels []prompbmarshal.Label
for k, v := range a.Labels {
labels[k] = v
labels = append(labels, prompbmarshal.Label{
Name: k,
Value: v,
})
}
// __name__ already been dropped, no need to check duplication
labels = append(labels, prompbmarshal.Label{Name: "__name__", Value: alertMetricName})
if ol := promrelabel.GetLabelByName(labels, alertStateLabel); ol != nil {
ol.Value = a.State.String()
} else {
labels = append(labels, prompbmarshal.Label{Name: alertStateLabel, Value: a.State.String()})
}
labels["__name__"] = alertMetricName
labels[alertStateLabel] = a.State.String()
return newTimeSeries([]float64{1}, []int64{timestamp}, labels)
}
// alertForToTimeSeries returns a timeseries that represents
// alertForToTimeSeries returns a time series that represents
// state of active alerts, where value is time when alert become active
func alertForToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.TimeSeries {
labels := make(map[string]string)
var labels []prompbmarshal.Label
for k, v := range a.Labels {
labels[k] = v
labels = append(labels, prompbmarshal.Label{
Name: k,
Value: v,
})
}
labels["__name__"] = alertForStateMetricName
// __name__ already been dropped, no need to check duplication
labels = append(labels, prompbmarshal.Label{Name: "__name__", Value: alertForStateMetricName})
return newTimeSeries([]float64{float64(a.ActiveAt.Unix())}, []int64{timestamp}, labels)
}
// pendingAlertStaleTimeSeries returns stale `ALERTS` and `ALERTS_FOR_STATE` time series
// for alerts which changed their state from Pending to Inactive or Firing.
func pendingAlertStaleTimeSeries(ls map[string]string, timestamp int64, includeAlertForState bool) []prompbmarshal.TimeSeries {
var result []prompbmarshal.TimeSeries
var baseLabels []prompbmarshal.Label
for k, v := range ls {
baseLabels = append(baseLabels, prompbmarshal.Label{
Name: k,
Value: v,
})
}
// __name__ already been dropped, no need to check duplication
alertsLabels := append(baseLabels, prompbmarshal.Label{Name: "__name__", Value: alertMetricName})
alertsLabels = append(alertsLabels, prompbmarshal.Label{Name: alertStateLabel, Value: notifier.StatePending.String()})
result = append(result, newTimeSeries([]float64{decimal.StaleNaN}, []int64{timestamp}, alertsLabels))
if includeAlertForState {
alertsForStateLabels := append(baseLabels, prompbmarshal.Label{Name: "__name__", Value: alertForStateMetricName})
result = append(result, newTimeSeries([]float64{decimal.StaleNaN}, []int64{timestamp}, alertsForStateLabels))
}
return result
}
// firingAlertStaleTimeSeries returns stale `ALERTS` and `ALERTS_FOR_STATE` time series
// for alerts which changed their state from Firing to Inactive.
func firingAlertStaleTimeSeries(ls map[string]string, timestamp int64) []prompbmarshal.TimeSeries {
var baseLabels []prompbmarshal.Label
for k, v := range ls {
baseLabels = append(baseLabels, prompbmarshal.Label{
Name: k,
Value: v,
})
}
// __name__ already been dropped, no need to check duplication
alertsLabels := append(baseLabels, prompbmarshal.Label{Name: "__name__", Value: alertMetricName})
alertsLabels = append(alertsLabels, prompbmarshal.Label{Name: alertStateLabel, Value: notifier.StateFiring.String()})
alertsForStateLabels := append(baseLabels, prompbmarshal.Label{Name: "__name__", Value: alertForStateMetricName})
return []prompbmarshal.TimeSeries{
newTimeSeries([]float64{decimal.StaleNaN}, []int64{timestamp}, alertsLabels),
newTimeSeries([]float64{decimal.StaleNaN}, []int64{timestamp}, alertsForStateLabels),
}
}
// restore restores the value of ActiveAt field for active alerts,
// based on previously written time series `alertForStateMetricName`.
// Only rules with For > 0 can be restored.

View file

@ -15,6 +15,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
@ -28,7 +29,7 @@ func TestAlertingRuleToTimeSeries(t *testing.T) {
rule.alerts[alert.ID] = alert
tss := rule.toTimeSeries(timestamp.Unix())
if err := compareTimeSeries(t, tssExpected, tss); err != nil {
t.Fatalf("timeseries mismatch: %s", err)
t.Fatalf("timeseries mismatch for rule %q: %s", rule.Name, err)
}
}
@ -36,14 +37,23 @@ func TestAlertingRuleToTimeSeries(t *testing.T) {
State: notifier.StateFiring,
ActiveAt: timestamp.Add(time.Second),
}, []prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: alertMetricName,
},
{
Name: alertStateLabel,
Value: notifier.StateFiring.String(),
},
}),
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
[]int64{timestamp.UnixNano()},
map[string]string{
"__name__": alertForStateMetricName,
[]prompbmarshal.Label{
{
Name: "__name__",
Value: alertForStateMetricName,
},
}),
})
@ -54,18 +64,40 @@ func TestAlertingRuleToTimeSeries(t *testing.T) {
"instance": "bar",
},
}, []prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
"job": "foo",
"instance": "bar",
}),
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()},
[]prompbmarshal.Label{
{
Name: "__name__",
Value: alertMetricName,
},
{
Name: alertStateLabel,
Value: notifier.StateFiring.String(),
},
{
Name: "job",
Value: "foo",
},
{
Name: "instance",
Value: "bar",
},
}),
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
[]int64{timestamp.UnixNano()},
map[string]string{
"__name__": alertForStateMetricName,
"job": "foo",
"instance": "bar",
[]prompbmarshal.Label{
{
Name: "__name__",
Value: alertForStateMetricName,
},
{
Name: "job",
Value: "foo",
},
{
Name: "instance",
Value: "bar",
},
}),
})
@ -73,18 +105,29 @@ func TestAlertingRuleToTimeSeries(t *testing.T) {
State: notifier.StateFiring, ActiveAt: timestamp.Add(time.Second),
Labels: map[string]string{
alertStateLabel: "foo",
"__name__": "bar",
},
}, []prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: alertMetricName,
},
{
Name: alertStateLabel,
Value: notifier.StateFiring.String(),
},
}),
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
[]int64{timestamp.UnixNano()},
map[string]string{
"__name__": alertForStateMetricName,
alertStateLabel: "foo",
[]prompbmarshal.Label{
{
Name: "__name__",
Value: alertForStateMetricName,
},
{
Name: alertStateLabel,
Value: "foo",
},
}),
})
@ -92,14 +135,23 @@ func TestAlertingRuleToTimeSeries(t *testing.T) {
State: notifier.StateFiring,
ActiveAt: timestamp.Add(time.Second),
}, []prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StateFiring.String(),
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: alertMetricName,
},
{
Name: alertStateLabel,
Value: notifier.StateFiring.String(),
},
}),
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
[]int64{timestamp.UnixNano()},
map[string]string{
"__name__": alertForStateMetricName,
[]prompbmarshal.Label{
{
Name: "__name__",
Value: alertForStateMetricName,
},
}),
})
@ -107,12 +159,21 @@ func TestAlertingRuleToTimeSeries(t *testing.T) {
State: notifier.StatePending,
ActiveAt: timestamp.Add(time.Second),
}, []prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertMetricName,
alertStateLabel: notifier.StatePending.String(),
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: alertMetricName,
},
{
Name: alertStateLabel,
Value: notifier.StatePending.String(),
},
}),
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": alertForStateMetricName,
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())}, []int64{timestamp.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: alertForStateMetricName,
},
}),
})
}
@ -124,7 +185,9 @@ func TestAlertingRule_Exec(t *testing.T) {
alert *notifier.Alert
}
f := func(rule *AlertingRule, steps [][]datasource.Metric, alertsExpected map[int][]testAlert) {
ts, _ := time.Parse(time.RFC3339, "2024-10-29T00:00:00Z")
f := func(rule *AlertingRule, steps [][]datasource.Metric, alertsExpected map[int][]testAlert, tssExpected map[int][]prompbmarshal.TimeSeries) {
t.Helper()
fq := &datasource.FakeQuerier{}
@ -134,13 +197,19 @@ func TestAlertingRule_Exec(t *testing.T) {
Name: "TestRule_Exec",
}
rule.GroupID = fakeGroup.ID()
ts := time.Now()
for i, step := range steps {
fq.Reset()
fq.Add(step...)
if _, err := rule.exec(context.TODO(), ts, 0); err != nil {
tss, err := rule.exec(context.TODO(), ts, 0)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
// check generate time series
if _, ok := tssExpected[i]; ok {
if err := compareTimeSeries(t, tssExpected[i], tss); err != nil {
t.Fatalf("generated time series mismatch for rule %q in step %d: %s", rule.Name, i, err)
}
}
// shift the execution timestamp before the next iteration
ts = ts.Add(defaultStep)
@ -174,13 +243,21 @@ func TestAlertingRule_Exec(t *testing.T) {
}
}
f(newTestAlertingRule("empty", 0), [][]datasource.Metric{}, nil)
f(newTestAlertingRule("empty", 0), [][]datasource.Metric{}, nil, nil)
f(newTestAlertingRule("empty labels", 0), [][]datasource.Metric{
f(newTestAlertingRule("empty_labels", 0), [][]datasource.Metric{
{datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}},
}, map[int][]testAlert{
0: {{alert: &notifier.Alert{State: notifier.StateFiring}}},
})
},
map[int][]prompbmarshal.TimeSeries{
0: {
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "empty_labels"}, {Name: "alertstate", Value: "firing"}},
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "empty_labels"}},
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.UnixNano() / 1e6}}},
},
})
f(newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>inactive", 0), [][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
@ -194,6 +271,25 @@ func TestAlertingRule_Exec(t *testing.T) {
2: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
3: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
4: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
}, map[int][]prompbmarshal.TimeSeries{
0: {
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "single-firing=>inactive=>firing=>inactive=>inactive"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "single-firing=>inactive=>firing=>inactive=>inactive"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.UnixNano() / 1e6}}},
},
1: {
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "single-firing=>inactive=>firing=>inactive=>inactive"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "single-firing=>inactive=>firing=>inactive=>inactive"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
},
2: {
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "single-firing=>inactive=>firing=>inactive=>inactive"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "single-firing=>inactive=>firing=>inactive=>inactive"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(2 * defaultStep).Unix()), Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
},
})
f(newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>inactive=>firing", 0), [][]datasource.Metric{
@ -210,7 +306,7 @@ func TestAlertingRule_Exec(t *testing.T) {
3: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
4: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
5: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
})
}, nil)
f(newTestAlertingRule("multiple-firing", 0), [][]datasource.Metric{
{
@ -224,7 +320,7 @@ func TestAlertingRule_Exec(t *testing.T) {
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateFiring}},
{labels: []string{"name", "foo2"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
})
}, nil)
// 1: fire first alert
// 2: fire second alert, set first inactive
@ -233,27 +329,57 @@ func TestAlertingRule_Exec(t *testing.T) {
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo1")},
{metricWithLabels(t, "name", "foo2")},
},
map[int][]testAlert{
0: {
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
1: {
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
2: {
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo2"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
})
}, map[int][]testAlert{
0: {
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
1: {
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
2: {
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo2"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
}, map[int][]prompbmarshal.TimeSeries{
0: {
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.UnixNano() / 1e6}}},
},
1: {
// stale time series for foo, `firing -> inactive`
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
// new time series for foo1
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo1"}},
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo1"}},
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(defaultStep).Unix()), Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
},
2: {
// stale time series for foo1
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo1"}},
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo1"}},
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
// new time series for foo2
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo2"}},
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo2"}},
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(2 * defaultStep).Unix()), Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
},
})
f(newTestAlertingRule("for-pending", time.Minute), [][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
}, map[int][]testAlert{
0: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
})
}, nil)
f(newTestAlertingRule("for-fired", defaultStep), [][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
@ -261,6 +387,22 @@ func TestAlertingRule_Exec(t *testing.T) {
}, map[int][]testAlert{
0: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
1: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
}, map[int][]prompbmarshal.TimeSeries{
0: {
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "alertstate", Value: "pending"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.UnixNano() / 1e6}}},
},
1: {
// stale time series for `pending -> firing`
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "alertstate", Value: "pending"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(defaultStep).Unix()), Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
},
})
f(newTestAlertingRule("for-pending=>empty", time.Second), [][]datasource.Metric{
@ -272,6 +414,26 @@ func TestAlertingRule_Exec(t *testing.T) {
0: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
1: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
2: {},
}, map[int][]prompbmarshal.TimeSeries{
0: {
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-pending=>empty"}, {Name: "alertstate", Value: "pending"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "for-pending=>empty"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.UnixNano() / 1e6}}},
},
1: {
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-pending=>empty"}, {Name: "alertstate", Value: "pending"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "for-pending=>empty"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
},
// stale time series for `pending -> inactive`
2: {
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-pending=>empty"}, {Name: "alertstate", Value: "pending"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "for-pending=>empty"}, {Name: "name", Value: "foo"}},
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
},
})
f(newTestAlertingRule("for-pending=>firing=>inactive=>pending=>firing", defaultStep), [][]datasource.Metric{
@ -287,7 +449,7 @@ func TestAlertingRule_Exec(t *testing.T) {
2: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
3: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
4: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
})
}, nil)
f(newTestAlertingRuleWithCustomFields("for-pending=>firing=>keepfiring=>firing", defaultStep, 0, defaultStep, nil), [][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
@ -300,7 +462,7 @@ func TestAlertingRule_Exec(t *testing.T) {
1: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
2: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
3: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
})
}, nil)
f(newTestAlertingRuleWithCustomFields("for-pending=>firing=>keepfiring=>keepfiring=>inactive=>pending=>firing", defaultStep, 0, 2*defaultStep, nil), [][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
@ -321,7 +483,7 @@ func TestAlertingRule_Exec(t *testing.T) {
4: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
5: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
6: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
})
}, nil)
}
func TestAlertingRuleExecRange(t *testing.T) {
@ -477,7 +639,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
{
Values: []float64{1, 1}, Timestamps: []int64{1, 5},
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
Labels: []prompbmarshal.Label{{Name: "foo", Value: "bar"}},
},
}, []*notifier.Alert{
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
@ -523,7 +685,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
{Values: []float64{1, 1}, Timestamps: []int64{1, 100}},
{
Values: []float64{1, 1}, Timestamps: []int64{1, 5},
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
Labels: []prompbmarshal.Label{{Name: "foo", Value: "bar"}},
},
}, []*notifier.Alert{
{
@ -1047,7 +1209,7 @@ func newTestAlertingRuleWithCustomFields(name string, waitFor, evalInterval, kee
func TestAlertingRule_ToLabels(t *testing.T) {
metric := datasource.Metric{
Labels: []datasource.Label{
Labels: []prompbmarshal.Label{
{Name: "instance", Value: "0.0.0.0:8800"},
{Name: "group", Value: "vmalert"},
{Name: "alertname", Value: "ConfigurationReloadFailure"},

View file

@ -8,12 +8,9 @@ import (
"fmt"
"hash/fnv"
"net/url"
"strconv"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/cheggaaa/pb/v3"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
@ -21,7 +18,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/metrics"
@ -213,7 +209,6 @@ func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
continue
}
q := qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: g.Type.String(),
EvaluationInterval: g.Interval,
QueryParams: g.Params,
Headers: g.Headers,
@ -351,10 +346,9 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
}
e := &executor{
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
}
g.infof("started")
@ -427,8 +421,6 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
continue
}
// ensure that staleness is tracked for existing rules only
e.purgeStaleSeries(g.Rules)
e.notifierHeaders = g.NotifierHeaders
g.mu.Unlock()
@ -540,10 +532,9 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
// ExecOnce evaluates all the rules under group for once with given timestamp.
func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
e := &executor{
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
}
if len(g.Rules) < 1 {
return nil
@ -634,13 +625,6 @@ type executor struct {
notifierHeaders map[string]string
Rw remotewrite.RWClient
previouslySentSeriesToRWMu sync.Mutex
// previouslySentSeriesToRW stores series sent to RW on previous iteration
// map[ruleID]map[ruleLabels][]prompb.Label
// where `ruleID` is ID of the Rule within a Group
// and `ruleLabels` is []prompb.Label marshalled to a string
previouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
}
// execConcurrently executes rules concurrently if concurrency>1
@ -707,11 +691,6 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
if err := pushToRW(tss); err != nil {
return err
}
staleSeries := e.getStaleSeries(r, tss, ts)
if err := pushToRW(staleSeries); err != nil {
return err
}
}
ar, ok := r.(*AlertingRule)
@ -738,79 +717,3 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
wg.Wait()
return errGr.Err()
}
var bbPool bytesutil.ByteBufferPool
// getStaleSeries checks whether there are stale series from previously sent ones.
func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timestamp time.Time) []prompbmarshal.TimeSeries {
bb := bbPool.Get()
defer bbPool.Put(bb)
ruleLabels := make(map[string][]prompbmarshal.Label, len(tss))
for _, ts := range tss {
// convert labels to strings, so we can compare with previously sent series
bb.B = labelsToString(bb.B, ts.Labels)
ruleLabels[string(bb.B)] = ts.Labels
bb.Reset()
}
rID := r.ID()
var staleS []prompbmarshal.TimeSeries
// check whether there are series which disappeared and need to be marked as stale
e.previouslySentSeriesToRWMu.Lock()
for key, labels := range e.previouslySentSeriesToRW[rID] {
if _, ok := ruleLabels[key]; ok {
continue
}
// previously sent series are missing in current series, so we mark them as stale
ss := newTimeSeriesPB([]float64{decimal.StaleNaN}, []int64{timestamp.Unix()}, labels)
staleS = append(staleS, ss)
}
// set previous series to current
e.previouslySentSeriesToRW[rID] = ruleLabels
e.previouslySentSeriesToRWMu.Unlock()
return staleS
}
// purgeStaleSeries deletes references in tracked
// previouslySentSeriesToRW list to Rules which aren't present
// in the given activeRules list. The method is used when the list
// of loaded rules has changed and executor has to remove
// references to non-existing rules.
func (e *executor) purgeStaleSeries(activeRules []Rule) {
newPreviouslySentSeriesToRW := make(map[uint64]map[string][]prompbmarshal.Label)
e.previouslySentSeriesToRWMu.Lock()
for _, rule := range activeRules {
id := rule.ID()
prev, ok := e.previouslySentSeriesToRW[id]
if ok {
// keep previous series for staleness detection
newPreviouslySentSeriesToRW[id] = prev
}
}
e.previouslySentSeriesToRW = nil
e.previouslySentSeriesToRW = newPreviouslySentSeriesToRW
e.previouslySentSeriesToRWMu.Unlock()
}
func labelsToString(dst []byte, labels []prompbmarshal.Label) []byte {
dst = append(dst, '{')
for i, label := range labels {
if len(label.Name) == 0 {
dst = append(dst, "__name__"...)
} else {
dst = append(dst, label.Name...)
}
dst = append(dst, '=')
dst = strconv.AppendQuote(dst, label.Value)
if i < len(labels)-1 {
dst = append(dst, ',')
}
}
dst = append(dst, '}')
return dst
}

View file

@ -5,7 +5,6 @@ import (
"fmt"
"math"
"os"
"reflect"
"sort"
"testing"
"time"
@ -17,8 +16,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
@ -383,153 +380,6 @@ func TestGetResolveDuration(t *testing.T) {
f(2*time.Minute, 0, 1*time.Minute, 8*time.Minute)
}
func TestGetStaleSeries(t *testing.T) {
ts := time.Now()
e := &executor{
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
f := func(r Rule, labels, expLabels [][]prompbmarshal.Label) {
t.Helper()
var tss []prompbmarshal.TimeSeries
for _, l := range labels {
tss = append(tss, newTimeSeriesPB([]float64{1}, []int64{ts.Unix()}, l))
}
staleS := e.getStaleSeries(r, tss, ts)
if staleS == nil && expLabels == nil {
return
}
if len(staleS) != len(expLabels) {
t.Fatalf("expected to get %d stale series, got %d",
len(expLabels), len(staleS))
}
for i, exp := range expLabels {
got := staleS[i]
if !reflect.DeepEqual(exp, got.Labels) {
t.Fatalf("expected to get labels: \n%v;\ngot instead: \n%v",
exp, got.Labels)
}
if len(got.Samples) != 1 {
t.Fatalf("expected to have 1 sample; got %d", len(got.Samples))
}
if !decimal.IsStaleNaN(got.Samples[0].Value) {
t.Fatalf("expected sample value to be %v; got %v", decimal.StaleNaN, got.Samples[0].Value)
}
}
}
// warn: keep in mind, that executor holds the state, so sequence of f calls matters
// single series
f(&AlertingRule{RuleID: 1},
[][]prompbmarshal.Label{toPromLabels(t, "__name__", "job:foo", "job", "foo")},
nil)
f(&AlertingRule{RuleID: 1},
[][]prompbmarshal.Label{toPromLabels(t, "__name__", "job:foo", "job", "foo")},
nil)
f(&AlertingRule{RuleID: 1},
nil,
[][]prompbmarshal.Label{toPromLabels(t, "__name__", "job:foo", "job", "foo")})
f(&AlertingRule{RuleID: 1},
nil,
nil)
// multiple series
f(&AlertingRule{RuleID: 1},
[][]prompbmarshal.Label{
toPromLabels(t, "__name__", "job:foo", "job", "foo"),
toPromLabels(t, "__name__", "job:foo", "job", "bar"),
},
nil)
f(&AlertingRule{RuleID: 1},
[][]prompbmarshal.Label{toPromLabels(t, "__name__", "job:foo", "job", "bar")},
[][]prompbmarshal.Label{toPromLabels(t, "__name__", "job:foo", "job", "foo")})
f(&AlertingRule{RuleID: 1},
[][]prompbmarshal.Label{toPromLabels(t, "__name__", "job:foo", "job", "bar")},
nil)
f(&AlertingRule{RuleID: 1},
nil,
[][]prompbmarshal.Label{toPromLabels(t, "__name__", "job:foo", "job", "bar")})
// multiple rules and series
f(&AlertingRule{RuleID: 1},
[][]prompbmarshal.Label{
toPromLabels(t, "__name__", "job:foo", "job", "foo"),
toPromLabels(t, "__name__", "job:foo", "job", "bar"),
},
nil)
f(&AlertingRule{RuleID: 2},
[][]prompbmarshal.Label{
toPromLabels(t, "__name__", "job:foo", "job", "foo"),
toPromLabels(t, "__name__", "job:foo", "job", "bar"),
},
nil)
f(&AlertingRule{RuleID: 1},
[][]prompbmarshal.Label{toPromLabels(t, "__name__", "job:foo", "job", "bar")},
[][]prompbmarshal.Label{toPromLabels(t, "__name__", "job:foo", "job", "foo")})
f(&AlertingRule{RuleID: 1},
[][]prompbmarshal.Label{toPromLabels(t, "__name__", "job:foo", "job", "bar")},
nil)
}
func TestPurgeStaleSeries(t *testing.T) {
ts := time.Now()
labels := toPromLabels(t, "__name__", "job:foo", "job", "foo")
tss := []prompbmarshal.TimeSeries{newTimeSeriesPB([]float64{1}, []int64{ts.Unix()}, labels)}
f := func(curRules, newRules, expStaleRules []Rule) {
t.Helper()
e := &executor{
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
// seed executor with series for
// current rules
for _, rule := range curRules {
e.getStaleSeries(rule, tss, ts)
}
e.purgeStaleSeries(newRules)
if len(e.previouslySentSeriesToRW) != len(expStaleRules) {
t.Fatalf("expected to get %d stale series, got %d",
len(expStaleRules), len(e.previouslySentSeriesToRW))
}
for _, exp := range expStaleRules {
if _, ok := e.previouslySentSeriesToRW[exp.ID()]; !ok {
t.Fatalf("expected to have rule %d; got nil instead", exp.ID())
}
}
}
f(nil, nil, nil)
f(
nil,
[]Rule{&AlertingRule{RuleID: 1}},
nil,
)
f(
[]Rule{&AlertingRule{RuleID: 1}},
nil,
nil,
)
f(
[]Rule{&AlertingRule{RuleID: 1}},
[]Rule{&AlertingRule{RuleID: 2}},
nil,
)
f(
[]Rule{&AlertingRule{RuleID: 1}, &AlertingRule{RuleID: 2}},
[]Rule{&AlertingRule{RuleID: 2}},
[]Rule{&AlertingRule{RuleID: 2}},
)
f(
[]Rule{&AlertingRule{RuleID: 1}, &AlertingRule{RuleID: 2}},
[]Rule{&AlertingRule{RuleID: 1}, &AlertingRule{RuleID: 2}},
[]Rule{&AlertingRule{RuleID: 1}, &AlertingRule{RuleID: 2}},
)
}
func TestFaultyNotifier(t *testing.T) {
fq := &datasource.FakeQuerier{}
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
@ -580,8 +430,7 @@ func TestFaultyRW(t *testing.T) {
}
e := &executor{
Rw: &remotewrite.Client{},
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
Rw: &remotewrite.Client{},
}
err := e.exec(context.Background(), r, time.Now(), 0, 10)

View file

@ -1,36 +0,0 @@
package rule
import (
"fmt"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
func BenchmarkGetStaleSeries(b *testing.B) {
ts := time.Now()
n := 100
payload := make([]prompbmarshal.TimeSeries, 0, n)
for i := 0; i < n; i++ {
s := fmt.Sprintf("%d", i)
labels := toPromLabels(b,
"__name__", "foo", ""+
"instance", s,
"job", s,
"state", s,
)
payload = append(payload, newTimeSeriesPB([]float64{1}, []int64{ts.Unix()}, labels))
}
e := &executor{
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
ar := &AlertingRule{RuleID: 1}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
e.getStaleSeries(ar, payload, ts)
}
}

View file

@ -3,14 +3,17 @@ package rule
import (
"context"
"fmt"
"sort"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
// RecordingRule is a Rule that supposed
@ -32,6 +35,8 @@ type RecordingRule struct {
// during evaluations
state *ruleState
lastEvaluation map[string]struct{}
metrics *recordingRuleMetrics
}
@ -64,10 +69,11 @@ func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
File: group.File,
metrics: &recordingRuleMetrics{},
q: qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: group.Type.String(),
EvaluationInterval: group.Interval,
QueryParams: group.Params,
Headers: group.Headers,
DataSourceType: group.Type.String(),
ApplyIntervalAsTimeFilter: setIntervalAsTimeFilter(group.Type.String(), cfg.Expr),
EvaluationInterval: group.Interval,
QueryParams: group.Params,
Headers: group.Headers,
}),
}
@ -110,7 +116,7 @@ func (rr *RecordingRule) execRange(ctx context.Context, start, end time.Time) ([
var tss []prompbmarshal.TimeSeries
for _, s := range res.Data {
ts := rr.toTimeSeries(s)
key := stringifyLabels(ts)
key := stringifyLabels(ts.Labels)
if _, ok := duplicates[key]; ok {
return nil, fmt.Errorf("original metric %v; resulting labels %q: %w", s.Labels, key, errDuplicate)
}
@ -152,28 +158,47 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
return nil, curState.Err
}
duplicates := make(map[string]struct{}, len(qMetrics))
curEvaluation := make(map[string]struct{}, len(qMetrics))
lastEvaluation := rr.lastEvaluation
var tss []prompbmarshal.TimeSeries
for _, r := range qMetrics {
ts := rr.toTimeSeries(r)
key := stringifyLabels(ts)
if _, ok := duplicates[key]; ok {
key := stringifyLabels(ts.Labels)
if _, ok := curEvaluation[key]; ok {
curState.Err = fmt.Errorf("original metric %v; resulting labels %q: %w", r, key, errDuplicate)
return nil, curState.Err
}
duplicates[key] = struct{}{}
curEvaluation[key] = struct{}{}
delete(lastEvaluation, key)
tss = append(tss, ts)
}
// check for stale time series
for k := range lastEvaluation {
tss = append(tss, prompbmarshal.TimeSeries{
Labels: stringToLabels(k),
Samples: []prompbmarshal.Sample{
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
}})
}
rr.lastEvaluation = curEvaluation
return tss, nil
}
func stringifyLabels(ts prompbmarshal.TimeSeries) string {
labels := ts.Labels
if len(labels) > 1 {
sort.Slice(labels, func(i, j int) bool {
return labels[i].Name < labels[j].Name
})
func stringToLabels(s string) []prompbmarshal.Label {
labels := strings.Split(s, ",")
rLabels := make([]prompbmarshal.Label, 0, len(labels))
for i := range labels {
if label := strings.Split(labels[i], "="); len(label) == 2 {
rLabels = append(rLabels, prompbmarshal.Label{
Name: label[0],
Value: label[1],
})
}
}
return rLabels
}
func stringifyLabels(labels []prompbmarshal.Label) string {
b := strings.Builder{}
for i, l := range labels {
b.WriteString(l.Name)
@ -187,19 +212,27 @@ func stringifyLabels(ts prompbmarshal.TimeSeries) string {
}
func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompbmarshal.TimeSeries {
labels := make(map[string]string)
for _, l := range m.Labels {
labels[l.Name] = l.Value
if preN := promrelabel.GetLabelByName(m.Labels, "__name__"); preN != nil {
preN.Value = rr.Name
} else {
m.Labels = append(m.Labels, prompbmarshal.Label{
Name: "__name__",
Value: rr.Name,
})
}
labels["__name__"] = rr.Name
// override existing labels with configured ones
for k, v := range rr.Labels {
if _, ok := labels[k]; ok && labels[k] != v {
labels[fmt.Sprintf("exported_%s", k)] = labels[k]
for k := range rr.Labels {
prevLabel := promrelabel.GetLabelByName(m.Labels, k)
if prevLabel != nil && prevLabel.Value != rr.Labels[k] {
// Rename the prevLabel to "exported_" + label.Name
prevLabel.Name = fmt.Sprintf("exported_%s", prevLabel.Name)
}
labels[k] = v
m.Labels = append(m.Labels, prompbmarshal.Label{
Name: k,
Value: rr.Labels[k],
})
}
return newTimeSeries(m.Values, m.Timestamps, labels)
ts := newTimeSeries(m.Values, m.Timestamps, m.Labels)
return ts
}
// updateWith copies all significant fields.
@ -213,3 +246,15 @@ func (rr *RecordingRule) updateWith(r Rule) error {
rr.q = nr.q
return nil
}
// setIntervalAsTimeFilter returns true if given LogsQL has a time filter.
func setIntervalAsTimeFilter(dType, expr string) bool {
if dType != "vlogs" {
return false
}
q, err := logstorage.ParseStatsQuery(expr, 0)
if err != nil {
logger.Panicf("BUG: the LogsQL query must be valid here; got error: %s; query=[%s]", err, expr)
}
return !q.HasGlobalTimeFilter()
}

View file

@ -9,59 +9,131 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
func TestRecordingRule_Exec(t *testing.T) {
f := func(rule *RecordingRule, metrics []datasource.Metric, tssExpected []prompbmarshal.TimeSeries) {
ts, _ := time.Parse(time.RFC3339, "2024-10-29T00:00:00Z")
const defaultStep = 5 * time.Millisecond
f := func(rule *RecordingRule, steps [][]datasource.Metric, tssExpected [][]prompbmarshal.TimeSeries) {
t.Helper()
fq := &datasource.FakeQuerier{}
fq.Add(metrics...)
rule.q = fq
rule.state = &ruleState{
entries: make([]StateEntry, 10),
}
tss, err := rule.exec(context.TODO(), time.Now(), 0)
if err != nil {
t.Fatalf("unexpected RecordingRule.exec error: %s", err)
}
if err := compareTimeSeries(t, tssExpected, tss); err != nil {
t.Fatalf("timeseries missmatch: %s", err)
for i, step := range steps {
fq.Reset()
fq.Add(step...)
rule.q = fq
rule.state = &ruleState{
entries: make([]StateEntry, 10),
}
tss, err := rule.exec(context.TODO(), ts, 0)
if err != nil {
t.Fatalf("fail to test rule %s: unexpected error: %s", rule.Name, err)
}
if err := compareTimeSeries(t, tssExpected[i], tss); err != nil {
t.Fatalf("fail to test rule %s: time series mismatch on step %d: %s", rule.Name, i, err)
}
ts = ts.Add(defaultStep)
}
}
timestamp := time.Now()
f(&RecordingRule{
Name: "foo",
}, []datasource.Metric{
}, [][]datasource.Metric{{
metricWithValueAndLabels(t, 10, "__name__", "bar"),
}, []prompbmarshal.TimeSeries{
newTimeSeries([]float64{10}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": "foo",
}}, [][]prompbmarshal.TimeSeries{{
newTimeSeries([]float64{10}, []int64{ts.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "foo",
},
}),
})
}})
f(&RecordingRule{
Name: "foobarbaz",
}, []datasource.Metric{
metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"),
metricWithValueAndLabels(t, 2, "__name__", "bar", "job", "bar"),
metricWithValueAndLabels(t, 3, "__name__", "baz", "job", "baz"),
}, []prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": "foobarbaz",
"job": "foo",
}),
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": "foobarbaz",
"job": "bar",
}),
newTimeSeries([]float64{3}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": "foobarbaz",
"job": "baz",
}),
}, [][]datasource.Metric{
{
metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"),
metricWithValueAndLabels(t, 2, "__name__", "bar", "job", "bar"),
},
{
metricWithValueAndLabels(t, 10, "__name__", "foo", "job", "foo"),
},
{
metricWithValueAndLabels(t, 10, "__name__", "foo", "job", "bar"),
},
}, [][]prompbmarshal.TimeSeries{
{
newTimeSeries([]float64{1}, []int64{ts.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "foobarbaz",
},
{
Name: "job",
Value: "foo",
},
}),
newTimeSeries([]float64{2}, []int64{ts.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "foobarbaz",
},
{
Name: "job",
Value: "bar",
},
}),
},
{
newTimeSeries([]float64{10}, []int64{ts.Add(defaultStep).UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "foobarbaz",
},
{
Name: "job",
Value: "foo",
},
}),
// stale time series
newTimeSeries([]float64{decimal.StaleNaN}, []int64{ts.Add(defaultStep).UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "foobarbaz",
},
{
Name: "job",
Value: "bar",
},
}),
},
{
newTimeSeries([]float64{10}, []int64{ts.Add(2 * defaultStep).UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "foobarbaz",
},
{
Name: "job",
Value: "bar",
},
}),
newTimeSeries([]float64{decimal.StaleNaN}, []int64{ts.Add(2 * defaultStep).UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "foobarbaz",
},
{
Name: "job",
Value: "foo",
},
}),
},
})
f(&RecordingRule{
@ -69,22 +141,44 @@ func TestRecordingRule_Exec(t *testing.T) {
Labels: map[string]string{
"source": "test",
},
}, []datasource.Metric{
}, [][]datasource.Metric{{
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar", "source", "origin"),
}, []prompbmarshal.TimeSeries{
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": "job:foo",
"job": "foo",
"source": "test",
}}, [][]prompbmarshal.TimeSeries{{
newTimeSeries([]float64{2}, []int64{ts.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "job:foo",
},
{
Name: "job",
Value: "foo",
},
{
Name: "source",
Value: "test",
},
}),
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": "job:foo",
"job": "bar",
"source": "test",
"exported_source": "origin",
}),
})
newTimeSeries([]float64{1}, []int64{ts.UnixNano()},
[]prompbmarshal.Label{
{
Name: "__name__",
Value: "job:foo",
},
{
Name: "job",
Value: "bar",
},
{
Name: "source",
Value: "test",
},
{
Name: "exported_source",
Value: "origin",
},
}),
}})
}
func TestRecordingRule_ExecRange(t *testing.T) {
@ -110,9 +204,13 @@ func TestRecordingRule_ExecRange(t *testing.T) {
}, []datasource.Metric{
metricWithValuesAndLabels(t, []float64{10, 20, 30}, "__name__", "bar"),
}, []prompbmarshal.TimeSeries{
newTimeSeries([]float64{10, 20, 30}, []int64{timestamp.UnixNano(), timestamp.UnixNano(), timestamp.UnixNano()}, map[string]string{
"__name__": "foo",
}),
newTimeSeries([]float64{10, 20, 30}, []int64{timestamp.UnixNano(), timestamp.UnixNano(), timestamp.UnixNano()},
[]prompbmarshal.Label{
{
Name: "__name__",
Value: "foo",
},
}),
})
f(&RecordingRule{
@ -122,18 +220,36 @@ func TestRecordingRule_ExecRange(t *testing.T) {
metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
}, []prompbmarshal.TimeSeries{
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": "foobarbaz",
"job": "foo",
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "foobarbaz",
},
{
Name: "job",
Value: "foo",
},
}),
newTimeSeries([]float64{2, 3}, []int64{timestamp.UnixNano(), timestamp.UnixNano()}, map[string]string{
"__name__": "foobarbaz",
"job": "bar",
newTimeSeries([]float64{2, 3}, []int64{timestamp.UnixNano(), timestamp.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "foobarbaz",
},
{
Name: "job",
Value: "bar",
},
}),
newTimeSeries([]float64{4, 5, 6},
[]int64{timestamp.UnixNano(), timestamp.UnixNano(), timestamp.UnixNano()}, map[string]string{
"__name__": "foobarbaz",
"job": "baz",
[]int64{timestamp.UnixNano(), timestamp.UnixNano(), timestamp.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "foobarbaz",
},
{
Name: "job",
Value: "baz",
},
}),
})
@ -146,16 +262,35 @@ func TestRecordingRule_ExecRange(t *testing.T) {
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar"),
}, []prompbmarshal.TimeSeries{
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": "job:foo",
"job": "foo",
"source": "test",
}),
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": "job:foo",
"job": "bar",
"source": "test",
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, []prompbmarshal.Label{
{
Name: "__name__",
Value: "job:foo",
},
{
Name: "job",
Value: "foo",
},
{
Name: "source",
Value: "test",
},
}),
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()},
[]prompbmarshal.Label{
{
Name: "__name__",
Value: "job:foo",
},
{
Name: "job",
Value: "bar",
},
{
Name: "source",
Value: "test",
},
}),
})
}
@ -266,3 +401,25 @@ func TestRecordingRuleExec_Negative(t *testing.T) {
t.Fatalf("cannot execute recroding rule: %s", err)
}
}
func TestSetIntervalAsTimeFilter(t *testing.T) {
f := func(s, dType string, expected bool) {
t.Helper()
if setIntervalAsTimeFilter(dType, s) != expected {
t.Fatalf("unexpected result for hasTimeFilter(%q); want %v", s, expected)
}
}
f(`* | count()`, "prometheus", false)
f(`* | count()`, "vlogs", true)
f(`error OR _time:5m | count()`, "vlogs", true)
f(`(_time: 5m AND error) OR (_time: 5m AND warn) | count()`, "vlogs", true)
f(`* | error OR _time:5m | count()`, "vlogs", true)
f(`_time:5m | count()`, "vlogs", false)
f(`_time:2023-04-25T22:45:59Z | count()`, "vlogs", false)
f(`error AND _time:5m | count()`, "vlogs", false)
f(`* | error AND _time:5m | count()`, "vlogs", false)
}

View file

@ -8,6 +8,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
@ -87,7 +88,7 @@ func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
}
m := datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}
for i := 0; i < len(labels); i += 2 {
m.Labels = append(m.Labels, datasource.Label{
m.Labels = append(m.Labels, prompbmarshal.Label{
Name: labels[i],
Value: labels[i+1],
})
@ -95,21 +96,6 @@ func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
return m
}
func toPromLabels(t testing.TB, labels ...string) []prompbmarshal.Label {
t.Helper()
if len(labels) == 0 || len(labels)%2 != 0 {
t.Fatalf("expected to get even number of labels")
}
var ls []prompbmarshal.Label
for i := 0; i < len(labels); i += 2 {
ls = append(ls, prompbmarshal.Label{
Name: labels[i],
Value: labels[i+1],
})
}
return ls
}
func compareTimeSeries(t *testing.T, a, b []prompbmarshal.TimeSeries) error {
t.Helper()
if len(a) != len(b) {
@ -122,7 +108,7 @@ func compareTimeSeries(t *testing.T, a, b []prompbmarshal.TimeSeries) error {
}
for i, exp := range expTS.Samples {
got := gotTS.Samples[i]
if got.Value != exp.Value {
if got.Value != exp.Value && (!decimal.IsStaleNaN(got.Value) || !decimal.IsStaleNaN(exp.Value)) {
return fmt.Errorf("expected value %.2f; got %.2f", exp.Value, got.Value)
}
// timestamp validation isn't always correct for now.

View file

@ -9,10 +9,14 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
func newTimeSeries(values []float64, timestamps []int64, labels map[string]string) prompbmarshal.TimeSeries {
// newTimeSeries first sorts given labels, then returns new time series.
func newTimeSeries(values []float64, timestamps []int64, labels []prompbmarshal.Label) prompbmarshal.TimeSeries {
promrelabel.SortLabels(labels)
ts := prompbmarshal.TimeSeries{
Labels: labels,
Samples: make([]prompbmarshal.Sample, len(values)),
}
for i := range values {
@ -21,34 +25,6 @@ func newTimeSeries(values []float64, timestamps []int64, labels map[string]strin
Timestamp: time.Unix(timestamps[i], 0).UnixNano() / 1e6,
}
}
keys := make([]string, 0, len(labels))
for k := range labels {
keys = append(keys, k)
}
sort.Strings(keys) // make order deterministic
for _, key := range keys {
ts.Labels = append(ts.Labels, prompbmarshal.Label{
Name: key,
Value: labels[key],
})
}
return ts
}
// newTimeSeriesPB creates prompbmarshal.TimeSeries with given
// values, timestamps and labels.
// It expects that labels are already sorted.
func newTimeSeriesPB(values []float64, timestamps []int64, labels []prompbmarshal.Label) prompbmarshal.TimeSeries {
ts := prompbmarshal.TimeSeries{
Samples: make([]prompbmarshal.Sample, len(values)),
}
for i := range values {
ts.Samples[i] = prompbmarshal.Sample{
Value: values[i],
Timestamp: time.Unix(timestamps[i], 0).UnixNano() / 1e6,
}
}
ts.Labels = labels
return ts
}

View file

@ -169,6 +169,8 @@ func GetWithFuncs(funcs textTpl.FuncMap) (*textTpl.Template, error) {
if err != nil {
return nil, err
}
// Clone() doesn't copy tpl Options, so we set them manually
tmpl = tmpl.Option("missingkey=zero")
return tmpl.Funcs(funcs), nil
}

View file

@ -462,7 +462,6 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
// Slow path - select other backend urls.
n := atomicCounter.Add(1) - 1
for i := uint32(0); i < uint32(len(bus)); i++ {
idx := (n + i) % uint32(len(bus))
bu := bus[idx]
@ -484,7 +483,7 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
if bu.isBroken() {
continue
}
if n := bu.concurrentRequests.Load(); n < minRequests {
if n := bu.concurrentRequests.Load(); n < minRequests || buMin.isBroken() {
buMin = bu
minRequests = n
}
@ -861,22 +860,23 @@ func (ui *UserInfo) initURLs() error {
loadBalancingPolicy := *defaultLoadBalancingPolicy
dropSrcPathPrefixParts := 0
discoverBackendIPs := *discoverBackendIPsGlobal
if ui.RetryStatusCodes != nil {
retryStatusCodes = ui.RetryStatusCodes
}
if ui.LoadBalancingPolicy != "" {
loadBalancingPolicy = ui.LoadBalancingPolicy
}
if ui.DropSrcPathPrefixParts != nil {
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
}
if ui.DiscoverBackendIPs != nil {
discoverBackendIPs = *ui.DiscoverBackendIPs
}
if ui.URLPrefix != nil {
if err := ui.URLPrefix.sanitizeAndInitialize(); err != nil {
return err
}
if ui.RetryStatusCodes != nil {
retryStatusCodes = ui.RetryStatusCodes
}
if ui.LoadBalancingPolicy != "" {
loadBalancingPolicy = ui.LoadBalancingPolicy
}
if ui.DropSrcPathPrefixParts != nil {
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
}
if ui.DiscoverBackendIPs != nil {
discoverBackendIPs = *ui.DiscoverBackendIPs
}
ui.URLPrefix.retryStatusCodes = retryStatusCodes
ui.URLPrefix.dropSrcPathPrefixParts = dropSrcPathPrefixParts
ui.URLPrefix.discoverBackendIPs = discoverBackendIPs

View file

@ -777,6 +777,28 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
fn(7, 7, 7)
}
func TestBrokenBackend(t *testing.T) {
up := mustParseURLs([]string{
"http://node1:343",
"http://node2:343",
"http://node3:343",
})
up.loadBalancingPolicy = "least_loaded"
pbus := up.bus.Load()
bus := *pbus
// explicitly mark one of the backends as broken
bus[1].setBroken()
// broken backend should never return while there are healthy backends
for i := 0; i < 1e3; i++ {
b := up.getBackendURL()
if b.isBroken() {
t.Fatalf("unexpected broken backend %q", b.url)
}
}
}
func getRegexs(paths []string) []*Regex {
var sps []*Regex
for _, path := range paths {

View file

@ -123,6 +123,12 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
ui := getUserInfoByAuthTokens(ats)
if ui == nil {
uu := authConfig.Load().UnauthorizedUser
if uu != nil {
processUserRequest(w, r, uu)
return true
}
invalidAuthTokenRequests.Inc()
if *logInvalidAuthTokens {
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)

View file

@ -90,6 +90,20 @@ User-Agent: vmauth
X-Forwarded-For: 12.34.56.78, 42.2.3.84`
f(cfgStr, requestURL, backendHandler, responseExpected)
// routing of all failed to authorize requests to unauthorized_user (issue #7543)
cfgStr = `
unauthorized_user:
url_prefix: "{BACKEND}/foo"
keep_original_host: true`
requestURL = "http://foo:invalid-secret@some-host.com/abc/def"
backendHandler = func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "requested_url=http://%s%s", r.Host, r.URL)
}
responseExpected = `
statusCode=200
requested_url=http://some-host.com/foo/abc/def`
f(cfgStr, requestURL, backendHandler, responseExpected)
// keep_original_host
cfgStr = `
unauthorized_user:

View file

@ -187,6 +187,10 @@ func TestCreateTargetURLSuccess(t *testing.T) {
RetryStatusCodes: []int{},
DropSrcPathPrefixParts: intp(0),
},
{
SrcPaths: getRegexs([]string{"/metrics"}),
URLPrefix: mustParseURL("http://metrics-server"),
},
},
URLPrefix: mustParseURL("http://default-server"),
HeadersConf: HeadersConf{
@ -206,6 +210,35 @@ func TestCreateTargetURLSuccess(t *testing.T) {
"bb: aaa", "x: y", []int{502}, "least_loaded", 2)
f(ui, "https://foo-host/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "", "", []int{}, "least_loaded", 0)
f(ui, "https://foo-host/foo/bar/api/v1/query_range", "http://default-server/api/v1/query_range", "bb: aaa", "x: y", []int{502}, "least_loaded", 2)
f(ui, "https://foo-host/metrics", "http://metrics-server", "", "", []int{502}, "least_loaded", 2)
// Complex routing with `url_map` without global url_prefix
ui = &UserInfo{
URLMaps: []URLMap{
{
SrcPaths: getRegexs([]string{"/api/v1/write"}),
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
RetryStatusCodes: []int{},
DropSrcPathPrefixParts: intp(0),
},
{
SrcPaths: getRegexs([]string{"/metrics/a/b"}),
URLPrefix: mustParseURL("http://metrics-server"),
},
},
HeadersConf: HeadersConf{
RequestHeaders: []*Header{
mustNewHeader("'bb: aaa'"),
},
ResponseHeaders: []*Header{
mustNewHeader("'x: y'"),
},
},
RetryStatusCodes: []int{502},
DropSrcPathPrefixParts: intp(2),
}
f(ui, "https://foo-host/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "", "", []int{}, "least_loaded", 0)
f(ui, "https://foo-host/metrics/a/b", "http://metrics-server/b", "", "", []int{502}, "least_loaded", 2)
// Complex routing regexp paths in `url_map`
ui = &UserInfo{

View file

@ -616,7 +616,7 @@ var (
},
&cli.BoolFlag{
Name: vmNativeDisableBinaryProtocol,
Usage: "Whether to use https://docs.victoriametrics.com/#how-to-export-data-in-json-line-format" +
Usage: "Whether to use https://docs.victoriametrics.com/#how-to-export-data-in-json-line-format " +
"instead of https://docs.victoriametrics.com/#how-to-export-data-in-native-format API." +
"Binary export/import API protocol implies less network and resource usage, as it transfers compressed binary data blocks." +
"Non-binary export/import API is less efficient, but supports deduplication if it is configured on vm-native-src-addr side.",

View file

@ -51,30 +51,31 @@ type Series struct {
Measurement string
Field string
LabelPairs []LabelPair
// EmptyTags contains tags in measurement whose value must be empty.
EmptyTags []string
}
var valueEscaper = strings.NewReplacer(`\`, `\\`, `'`, `\'`)
func (s Series) fetchQuery(timeFilter string) string {
f := &strings.Builder{}
fmt.Fprintf(f, "select %q from %q", s.Field, s.Measurement)
if len(s.LabelPairs) > 0 || len(timeFilter) > 0 {
f.WriteString(" where")
conditions := make([]string, 0, len(s.LabelPairs)+len(s.EmptyTags))
for _, pair := range s.LabelPairs {
conditions = append(conditions, fmt.Sprintf("%q::tag='%s'", pair.Name, valueEscaper.Replace(pair.Value)))
}
for i, pair := range s.LabelPairs {
pairV := valueEscaper.Replace(pair.Value)
fmt.Fprintf(f, " %q::tag='%s'", pair.Name, pairV)
if i != len(s.LabelPairs)-1 {
f.WriteString(" and")
}
for _, label := range s.EmptyTags {
conditions = append(conditions, fmt.Sprintf("%q::tag=''", label))
}
if len(timeFilter) > 0 {
if len(s.LabelPairs) > 0 {
f.WriteString(" and")
}
fmt.Fprintf(f, " %s", timeFilter)
conditions = append(conditions, timeFilter)
}
return f.String()
q := fmt.Sprintf("select %q from %q", s.Field, s.Measurement)
if len(conditions) > 0 {
q += fmt.Sprintf(" where %s", strings.Join(conditions, " and "))
}
return q
}
// LabelPair is the key-value record
@ -118,7 +119,7 @@ func NewClient(cfg Config) (*Client, error) {
}
// Database returns database name
func (c Client) Database() string {
func (c *Client) Database() string {
return c.database
}
@ -140,7 +141,7 @@ func timeFilter(start, end string) string {
}
// Explore checks the existing data schema in influx
// by checking available fields and series,
// by checking available (non-empty) tags, fields and measurements
// which unique combination represents all possible
// time series existing in database.
// The explore required to reduce the load on influx
@ -150,6 +151,8 @@ func timeFilter(start, end string) string {
// May contain non-existing time series.
func (c *Client) Explore() ([]*Series, error) {
log.Printf("Exploring scheme for database %q", c.database)
// {"measurement1": ["value1", "value2"]}
mFields, err := c.fieldsByMeasurement()
if err != nil {
return nil, fmt.Errorf("failed to get field keys: %s", err)
@ -159,6 +162,12 @@ func (c *Client) Explore() ([]*Series, error) {
return nil, fmt.Errorf("found no numeric fields for import in database %q", c.database)
}
// {"measurement1": {"tag1", "tag2"}}
measurementTags, err := c.getMeasurementTags()
if err != nil {
return nil, fmt.Errorf("failed to get tags of measurements: %s", err)
}
series, err := c.getSeries()
if err != nil {
return nil, fmt.Errorf("failed to get series: %s", err)
@ -171,11 +180,17 @@ func (c *Client) Explore() ([]*Series, error) {
log.Printf("skip measurement %q since it has no fields", s.Measurement)
continue
}
tags, ok := measurementTags[s.Measurement]
if !ok {
return nil, fmt.Errorf("failed to find tags of measurement %s", s.Measurement)
}
emptyTags := getEmptyTags(tags, s.LabelPairs)
for _, field := range fields {
is := &Series{
Measurement: s.Measurement,
Field: field,
LabelPairs: s.LabelPairs,
EmptyTags: emptyTags,
}
iSeries = append(iSeries, is)
}
@ -183,6 +198,22 @@ func (c *Client) Explore() ([]*Series, error) {
return iSeries, nil
}
// getEmptyTags returns tags of a measurement that are missing in a specific series.
// Tags represent all tags of a measurement. LabelPairs represent tags of a specific series.
func getEmptyTags(tags map[string]struct{}, LabelPairs []LabelPair) []string {
labelMap := make(map[string]struct{})
for _, pair := range LabelPairs {
labelMap[pair.Name] = struct{}{}
}
result := make([]string, 0, len(labelMap)-len(LabelPairs))
for tag := range tags {
if _, ok := labelMap[tag]; !ok {
result = append(result, tag)
}
}
return result
}
// ChunkedResponse is a wrapper over influx.ChunkedResponse.
// Used for better memory usage control while iterating
// over huge time series.
@ -357,6 +388,57 @@ func (c *Client) getSeries() ([]*Series, error) {
return result, nil
}
// getMeasurementTags get the tags for each measurement.
// tags are placed in a map without values (similar to a set) for quick lookups:
// {"measurement1": {"tag1", "tag2"}, "measurement2": {"tag3", "tag4"}}
func (c *Client) getMeasurementTags() (map[string]map[string]struct{}, error) {
com := "show tag keys"
q := influx.Query{
Command: com,
Database: c.database,
RetentionPolicy: c.retention,
Chunked: true,
ChunkSize: c.chunkSize,
}
log.Printf("fetching tag keys: %s", stringify(q))
cr, err := c.QueryAsChunk(q)
if err != nil {
return nil, fmt.Errorf("error while executing query %q: %s", q.Command, err)
}
const tagKey = "tagKey"
var tagsCount int
result := make(map[string]map[string]struct{})
for {
resp, err := cr.NextResponse()
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
if resp.Error() != nil {
return nil, fmt.Errorf("response error for query %q: %s", q.Command, resp.Error())
}
qValues, err := parseResult(resp.Results[0])
if err != nil {
return nil, err
}
for _, qv := range qValues {
if result[qv.name] == nil {
result[qv.name] = make(map[string]struct{}, len(qv.values[tagKey]))
}
for _, tk := range qv.values[tagKey] {
result[qv.name][tk.(string)] = struct{}{}
tagsCount++
}
}
}
log.Printf("found %d tag(s) for %d measurements", tagsCount, len(result))
return result, nil
}
func (c *Client) do(q influx.Query) ([]queryValues, error) {
res, err := c.Query(q)
if err != nil {

View file

@ -73,6 +73,12 @@ func TestFetchQuery(t *testing.T) {
Measurement: "cpu",
Field: "value",
}, "", `select "value" from "cpu"`)
f(&Series{
Measurement: "cpu",
Field: "value1",
EmptyTags: []string{"e1", "e2", "e3"},
}, "", `select "value1" from "cpu" where "e1"::tag='' and "e2"::tag='' and "e3"::tag=''`)
}
func TestTimeFilter(t *testing.T) {

View file

@ -266,7 +266,7 @@ func main() {
},
{
Name: "vm-native",
Usage: "Migrate time series between VictoriaMetrics installations via native binary format",
Usage: "Migrate time series between VictoriaMetrics installations",
Flags: mergeFlags(globalFlags, vmNativeFlags),
Before: beforeFn,
Action: func(c *cli.Context) error {

View file

@ -2137,6 +2137,25 @@ func TestExecExprSuccess(t *testing.T) {
},
})
f(`removeEmptySeries(removeBelowValue(time('a'),150),1)`, []*series{})
// if xFilesFactor is set, a single value in the series needs to be non-null for it to be
// considered non-empty
f(`removeEmptySeries(removeBelowValue(time('a'),150),0)`, []*series{
{
Timestamps: []int64{120000, 180000},
Values: []float64{nan, 180},
Name: "removeBelowValue(a,150)",
Tags: map[string]string{"name": "a"},
},
})
f(`removeEmptySeries(removeBelowValue(time('a'),150),-1)`, []*series{
{
Timestamps: []int64{120000, 180000},
Values: []float64{nan, 180},
Name: "removeBelowValue(a,150)",
Tags: map[string]string{"name": "a"},
},
})
f(`round(time('a',17),-1)`, []*series{
{
Timestamps: []int64{120000, 137000, 154000, 171000, 188000, 205000},

View file

@ -3151,7 +3151,7 @@ func transformRemoveEmptySeries(ec *evalConfig, fe *graphiteql.FuncExpr) (nextSe
xff = xFilesFactor
}
n := aggrCount(s.Values)
if n/float64(len(s.Values)) < xff {
if n/float64(len(s.Values)) <= xff {
return nil, nil
}
s.expr = fe

View file

@ -108,7 +108,7 @@ func maySortResults(e metricsql.Expr) bool {
switch v := e.(type) {
case *metricsql.FuncExpr:
switch strings.ToLower(v.Name) {
case "sort", "sort_desc",
case "sort", "sort_desc", "limit_offset",
"sort_by_label", "sort_by_label_desc",
"sort_by_label_numeric", "sort_by_label_numeric_desc":
// Results already sorted

View file

@ -9274,6 +9274,75 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`limit_offset(5, 0, sort_by_label_numeric_desc(multiple_labels_numbers_special_chars, "foo"))`, func(t *testing.T) {
t.Parallel()
q := `limit_offset(5, 0, sort_by_label_numeric_desc((
label_set(3, "foo", "1:0:3"),
label_set(4, "foo", "5:0:15"),
label_set(1, "foo", "1:0:2"),
label_set(5, "foo", "7:0:15"),
label_set(7, "foo", "3:0:1"),
label_set(6, "foo", "1:0:2"),
label_set(8, "foo", "9:0:15")
), "foo"))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{8, 8, 8, 8, 8, 8},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("9:0:15"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{5, 5, 5, 5, 5, 5},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("7:0:15"),
},
}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{4, 4, 4, 4, 4, 4},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("5:0:15"),
},
}
r4 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{7, 7, 7, 7, 7, 7},
Timestamps: timestampsExpected,
}
r4.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("3:0:1"),
},
}
r5 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{3, 3, 3, 3, 3, 3},
Timestamps: timestampsExpected,
}
r5.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("1:0:3"),
},
}
resultExpected := []netstorage.Result{r1, r2, r3, r4, r5}
f(q, resultExpected)
})
t.Run(`sort_by_label_numeric(alias_numbers_with_special_chars)`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label_numeric((

View file

@ -15,7 +15,7 @@ import (
var (
maxExportDuration = flag.Duration("search.maxExportDuration", time.Hour*24*30, "The maximum duration for /api/v1/export call")
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution. It can be overridden on a per-query basis via 'timeout' query arg")
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution. It can be overridden to a smaller value on a per-query basis via 'timeout' query arg")
maxStatusRequestDuration = flag.Duration("search.maxStatusRequestDuration", time.Minute*5, "The maximum duration for /api/v1/status/* requests")
maxLabelsAPIDuration = flag.Duration("search.maxLabelsAPIDuration", time.Second*5, "The maximum duration for /api/v1/labels, /api/v1/label/.../values and /api/v1/series requests. "+
"See also -search.maxLabelsAPISeries and -search.ignoreExtraFiltersAtLabelsAPI")

View file

@ -1,13 +1,13 @@
{
"files": {
"main.css": "./static/css/main.d781989c.css",
"main.js": "./static/js/main.68e2aae8.js",
"main.js": "./static/js/main.a7037969.js",
"static/js/685.f772060c.chunk.js": "./static/js/685.f772060c.chunk.js",
"static/media/MetricsQL.md": "./static/media/MetricsQL.a00044c91d9781cf8557.md",
"index.html": "./index.html"
},
"entrypoints": [
"static/css/main.d781989c.css",
"static/js/main.68e2aae8.js"
"static/js/main.a7037969.js"
]
}

View file

@ -0,0 +1,5 @@
{
"license": {
"type": "opensource"
}
}

View file

@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><link rel="apple-touch-icon" href="./favicon.svg"/><link rel="mask-icon" href="./favicon.svg" color="#000000"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="Explore and troubleshoot your VictoriaMetrics data"/><link rel="manifest" href="./manifest.json"/><title>vmui</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:site" content="@https://victoriametrics.com/"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:image" content="./preview.jpg"><meta property="og:type" content="website"><meta property="og:title" content="UI for VictoriaMetrics"><meta property="og:url" content="https://victoriametrics.com/"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><script defer="defer" src="./static/js/main.68e2aae8.js"></script><link href="./static/css/main.d781989c.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><link rel="apple-touch-icon" href="./favicon.svg"/><link rel="mask-icon" href="./favicon.svg" color="#000000"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="Explore and troubleshoot your VictoriaMetrics data"/><link rel="manifest" href="./manifest.json"/><title>vmui</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:site" content="@https://victoriametrics.com/"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:image" content="./preview.jpg"><meta property="og:type" content="website"><meta property="og:title" content="UI for VictoriaMetrics"><meta property="og:url" content="https://victoriametrics.com/"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><script defer="defer" src="./static/js/main.a7037969.js"></script><link href="./static/css/main.d781989c.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,4 +1,4 @@
FROM golang:1.23.1 AS build-web-stage
FROM golang:1.23.3 AS build-web-stage
COPY build /build
WORKDIR /build

View file

@ -1,7 +1,6 @@
import { useAppDispatch, useAppState } from "../state/common/StateContext";
import { useEffect, useState } from "preact/compat";
import { ErrorTypes } from "../types";
import { getUrlWithoutTenant } from "../utils/tenants";
const useFetchFlags = () => {
const { serverUrl } = useAppState();
@ -17,7 +16,7 @@ const useFetchFlags = () => {
setIsLoading(true);
try {
const url = getUrlWithoutTenant(serverUrl);
const url = new URL(serverUrl).origin;
const response = await fetch(`${url}/flags`);
const data = await response.text();
const flags = data.split("\n").filter(flag => flag.trim() !== "")

View file

@ -28,7 +28,7 @@ const ExploreLogs: FC = () => {
const [limit, setLimit] = useStateSearchParams(defaultLimit, "limit");
const [query, setQuery] = useStateSearchParams("*", "query");
const [tmpQuery, setTmpQuery] = useState("");
const [isUpdatingQuery, setIsUpdatingQuery] = useState(false);
const [period, setPeriod] = useState<TimeParams>(periodState);
const [queryError, setQueryError] = useState<ErrorTypes | string>("");
@ -70,26 +70,28 @@ const ExploreLogs: FC = () => {
const handleApplyFilter = (val: string) => {
setQuery(prev => `_stream: ${val === "other" ? "{}" : val} AND (${prev})`);
setIsUpdatingQuery(true);
};
const handleUpdateQuery = useCallback(() => {
const handleUpdateQuery = () => {
if (isLoading || dataLogHits.isLoading) {
abortController.abort && abortController.abort();
dataLogHits.abortController.abort && dataLogHits.abortController.abort();
} else {
setQuery(tmpQuery);
handleRunQuery();
}
}, [isLoading, dataLogHits.isLoading]);
};
useEffect(() => {
if (query) handleRunQuery();
if (!query) return;
handleRunQuery();
}, [periodState]);
useEffect(() => {
if (!isUpdatingQuery) return;
handleRunQuery();
setTmpQuery(query);
}, [query]);
setIsUpdatingQuery(false);
}, [query, isUpdatingQuery]);
useEffect(() => {
!hideChart && fetchLogHits(period);
@ -98,10 +100,10 @@ const ExploreLogs: FC = () => {
return (
<div className="vm-explore-logs">
<ExploreLogsHeader
query={tmpQuery}
query={query}
error={queryError}
limit={limit}
onChange={setTmpQuery}
onChange={setQuery}
onChangeLimit={handleChangeLimit}
onRun={handleUpdateQuery}
isLoading={isLoading || dataLogHits.isLoading}

View file

@ -64,11 +64,15 @@ const GroupLogs: FC<TableLogsProps> = ({ logs, settingsRef }) => {
return groupByMultipleKeys(logs, [groupBy]).map((item) => {
const streamValue = item.values[0]?.[groupBy] || "";
const pairs = getStreamPairs(streamValue);
// values sorting by time
const values = item.values.sort((a,b) => new Date(b._time).getTime() - new Date(a._time).getTime());
return {
...item,
keys: item.keys,
keysString: item.keys.join(""),
values,
pairs,
};
});
}).sort((a, b) => a.keysString.localeCompare(b.keysString)); // groups sorting
}, [logs, groupBy]);
const handleClickByPair = (value: string) => async (e: MouseEvent<HTMLDivElement>) => {
@ -117,7 +121,7 @@ const GroupLogs: FC<TableLogsProps> = ({ logs, settingsRef }) => {
{groupData.map((item, i) => (
<div
className="vm-group-logs-section"
key={item.keys.join("")}
key={item.keysString}
>
<Accordion
key={String(expandGroups[i])}
@ -129,7 +133,7 @@ const GroupLogs: FC<TableLogsProps> = ({ logs, settingsRef }) => {
{item.pairs.map((pair) => (
<Tooltip
title={copied === pair ? "Copied" : "Copy to clipboard"}
key={`${item.keys.join("")}_${pair}`}
key={`${item.keysString}_${pair}`}
placement={"top-center"}
>
<div

View file

@ -1,4 +1,5 @@
import router, { routerOptions } from "./index";
import { getTenantIdFromUrl } from "../utils/tenants";
export enum NavigationItemType {
internalLink,
@ -24,10 +25,12 @@ interface NavigationConfig {
* Special case for alert link
*/
const getAlertLink = (url: string, showAlertLink: boolean) => {
// see more https://docs.victoriametrics.com/cluster-victoriametrics/?highlight=vmalertproxyurl#vmalert
// see more https://docs.victoriametrics.com/cluster-victoriametrics/#vmalert
const isCluster = !!getTenantIdFromUrl(url);
const value = isCluster ? `${url}/vmalert` : url.replace(/\/prometheus$/, "/vmalert");
return {
label: "Alerts",
value: `${url}/vmalert`,
value,
type: NavigationItemType.externalLink,
hide: !showAlertLink,
};

47
apptest/README.md Normal file
View file

@ -0,0 +1,47 @@
# App Integration Tests
The `apptest` package contains the integration tests for the VictoriaMetrics
applications (such as vmstorage, vminsert, and vmselect).
An integration test aims at verifying the behavior of an application as a whole,
as apposed to a unit test that verifies the behavior of a building block of an
application.
To achieve that an integration test starts an application in a separate process
and then issues HTTP requets to it and verifies the responses, examines the
metrics the app exposes and/or files it creates, etc.
Note that an object of testing may be not just a single app, but several apps
working together. A good example is VictoriaMetrics cluster. An integration test
may reproduce an arbitrary cluster configuration and verify how the components
work together as a system.
The package provides a collection of helpers to start applications and make
queries to them:
- `app.go` - contains the generic code for staring an application and should
not be used by integration tests directly.
- `{vmstorage,vminsert,etc}.go` - build on top of `app.go` and provide the
code for staring a specific application.
- `client.go` - provides helper functions for sending HTTP requests to
applications.
The integration tests themselves reside in `tests/*_test.go` files. Apart from having
the `_test` suffix, there are no strict rules of how to name a file, but the
name should reflect the prevailing purpose of the tests located in that file.
For example, `sharding_test.go` aims at testing data sharding.
Since integration tests start applications in a separate process, they require
the application binary files to be built and put into the `bin` directory. The
build rule used for running integration tests, `make integration-test`,
accounts for that, it builds all application binaries before running the tests.
But if you want to run the tests without `make`, i.e. by executing
`go test ./app/apptest`, you will need to build the binaries first (for example,
by executing `make all`).
Not all binaries can be built from `master` branch, cluster binaries can be built
only from `cluster` branch. Hence, not all test cases suitable to run in both branches:
- If test is using binaries from `cluster` branch, then test name should be prefixed
with `TestCluster` word
- If test is using binaries from `master` branch, then test name should be prefixed
with `TestVmsingle` word.

249
apptest/app.go Normal file
View file

@ -0,0 +1,249 @@
package apptest
import (
"bufio"
"fmt"
"io"
"log"
"os"
"os/exec"
"reflect"
"regexp"
"strings"
"time"
)
// Regular expressions for runtime information to extract from the app logs.
var (
storageDataPathRE = regexp.MustCompile(`successfully opened storage "(.*)"`)
httpListenAddrRE = regexp.MustCompile(`started server at http://(.*:\d{1,5})/`)
vminsertAddrRE = regexp.MustCompile(`accepting vminsert conns at (.*:\d{1,5})$`)
vmselectAddrRE = regexp.MustCompile(`accepting vmselect conns at (.*:\d{1,5})$`)
)
// app represents an instance of some VictoriaMetrics server (such as vmstorage,
// vminsert, or vmselect).
type app struct {
instance string
binary string
flags []string
process *os.Process
}
// appOptions holds the optional configuration of an app, such as default flags
// to set and things to extract from the app's log.
type appOptions struct {
defaultFlags map[string]string
extractREs []*regexp.Regexp
}
// startApp starts an instance of an app using the app binary file path and
// flags. When the opts are set, it also sets the default flag values and
// extracts runtime information from the app's log.
//
// If the app has started successfully and all the requested items has been
// extracted from logs, the function returns the instance of the app and the
// extracted items. The extracted items are returned in the same order as the
// corresponding extract regular expression have been provided in the opts.
//
// The function returns an error if the application has failed to start or the
// function has timed out extracting items from the log (normally because no log
// records match the regular expression).
func startApp(instance string, binary string, flags []string, opts *appOptions) (*app, []string, error) {
flags = setDefaultFlags(flags, opts.defaultFlags)
cmd := exec.Command(binary, flags...)
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, nil, err
}
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, nil, err
}
if err := cmd.Start(); err != nil {
return nil, nil, err
}
app := &app{
instance: instance,
binary: binary,
flags: flags,
process: cmd.Process,
}
go app.processOutput("stdout", stdout, app.writeToStderr)
lineProcessors := make([]lineProcessor, len(opts.extractREs))
reExtractors := make([]*reExtractor, len(opts.extractREs))
timeout := time.NewTimer(5 * time.Second).C
for i, re := range opts.extractREs {
reExtractors[i] = newREExtractor(re, timeout)
lineProcessors[i] = reExtractors[i].extractRE
}
go app.processOutput("stderr", stderr, append(lineProcessors, app.writeToStderr)...)
extracts, err := extractREs(reExtractors, timeout)
if err != nil {
app.Stop()
return nil, nil, err
}
return app, extracts, nil
}
// setDefaultFlags adds flags with default values to `flags` if it does not
// initially contain them.
func setDefaultFlags(flags []string, defaultFlags map[string]string) []string {
for _, flag := range flags {
for name := range defaultFlags {
if strings.HasPrefix(flag, name) {
delete(defaultFlags, name)
continue
}
}
}
for name, value := range defaultFlags {
flags = append(flags, name+"="+value)
}
return flags
}
// stop sends the app process a SIGINT signal and waits until it terminates
// gracefully.
func (app *app) Stop() {
if err := app.process.Signal(os.Interrupt); err != nil {
log.Fatalf("Could not send SIGINT signal to %s process: %v", app.instance, err)
}
if _, err := app.process.Wait(); err != nil {
log.Fatalf("Could not wait for %s process completion: %v", app.instance, err)
}
}
// String returns the string representation of the app state.
func (app *app) String() string {
return fmt.Sprintf("{instance: %q binary: %q flags: %q}", app.instance, app.binary, app.flags)
}
// lineProcessor is a function that is applied to the each line of the app
// output (stdout or stderr). The function returns true to indicate the caller
// that it has completed its work and should not be called again.
type lineProcessor func(line string) (done bool)
// processOutput invokes a set of processors on each line of app output (stdout
// or stderr). Once a line processor is done (returns true) it is never invoked
// again.
//
// A simple use case for this is to pipe the output of the child process to the
// output of the parent process. A more sophisticated one is to retrieve some
// runtime information from the child process logs, such as the server's
// host:port.
func (app *app) processOutput(outputName string, output io.Reader, lps ...lineProcessor) {
activeLPs := map[int]lineProcessor{}
for i, lp := range lps {
activeLPs[i] = lp
}
scanner := bufio.NewScanner(output)
for scanner.Scan() {
line := scanner.Text()
for i, process := range activeLPs {
if process(line) {
delete(activeLPs, i)
}
}
}
if err := scanner.Err(); err != nil {
log.Printf("could not scan %s %s: %v", app.instance, outputName, err)
}
}
// writeToStderr is a line processor that writes the line to the stderr.
// The function always returns false to indicate its caller that each line must
// be written to the stderr.
func (app *app) writeToStderr(line string) bool {
fmt.Fprintf(os.Stderr, "%s %s\n", app.instance, line)
return false
}
// extractREs waits until all reExtractors return the result and then returns
// the combined result with items ordered the same way as reExtractors.
//
// The function returns an error if timeout occurs sooner then all reExtractors
// finish its work.
func extractREs(reExtractors []*reExtractor, timeout <-chan time.Time) ([]string, error) {
n := len(reExtractors)
notFoundREs := make(map[int]string)
extracts := make([]string, n)
cases := make([]reflect.SelectCase, n+1)
for i, x := range reExtractors {
cases[i] = x.selectCase
notFoundREs[i] = x.re.String()
}
cases[n] = reflect.SelectCase{
Dir: reflect.SelectRecv,
Chan: reflect.ValueOf(timeout),
}
for notFound := n; notFound > 0; {
i, value, _ := reflect.Select(cases)
if i == n {
// n-th select case means timeout.
values := func(m map[int]string) []string {
s := []string{}
for _, v := range m {
s = append(s, v)
}
return s
}
return nil, fmt.Errorf("could not extract some or all regexps from stderr: %q", values(notFoundREs))
}
extracts[i] = value.String()
delete(notFoundREs, i)
notFound--
}
return extracts, nil
}
// reExtractor extracts some information based on a regular expression from the
// app output within a timeout.
type reExtractor struct {
re *regexp.Regexp
result chan string
timeout <-chan time.Time
selectCase reflect.SelectCase
}
// newREExtractor create a new reExtractor based on a regexp and a timeout.
func newREExtractor(re *regexp.Regexp, timeout <-chan time.Time) *reExtractor {
result := make(chan string)
return &reExtractor{
re: re,
result: result,
timeout: timeout,
selectCase: reflect.SelectCase{
Dir: reflect.SelectRecv,
Chan: reflect.ValueOf(result),
},
}
}
// extractRE is a line processor that extracts some information from a line
// based on a regular expression. The function returns trun (to request the
// caller to not to be called again) either when the match is found or due to
// the timeout. The found match is written to the x.result channel and it is
// important that this channel is monitored by a separate goroutine, otherwise
// the function will block.
func (x *reExtractor) extractRE(line string) bool {
submatch := x.re.FindSubmatch([]byte(line))
if len(submatch) == 2 {
select {
case x.result <- string(submatch[1]):
case <-x.timeout:
}
return true
}
return false
}

158
apptest/client.go Normal file
View file

@ -0,0 +1,158 @@
package apptest
import (
"io"
"net/http"
"net/url"
"strconv"
"strings"
"testing"
)
// Client is used for interacting with the apps over the network.
//
// At the moment it only supports HTTP protocol but may be exptended to support
// RPCs, etc.
type Client struct {
httpCli *http.Client
}
// NewClient creates a new client.
func NewClient() *Client {
return &Client{
httpCli: &http.Client{
Transport: &http.Transport{},
},
}
}
// CloseConnections closes client connections.
func (c *Client) CloseConnections() {
c.httpCli.CloseIdleConnections()
}
// Get sends a HTTP GET request. Once the function receives a response, it
// checks whether the response status code matches the expected one and returns
// the response body to the caller.
func (c *Client) Get(t *testing.T, url string, wantStatusCode int) string {
t.Helper()
return c.do(t, http.MethodGet, url, "", "", wantStatusCode)
}
// Post sends a HTTP POST request. Once the function receives a response, it
// checks whether the response status code matches the expected one and returns
// the response body to the caller.
func (c *Client) Post(t *testing.T, url, contentType, data string, wantStatusCode int) string {
t.Helper()
return c.do(t, http.MethodPost, url, contentType, data, wantStatusCode)
}
// PostForm sends a HTTP POST request containing the POST-form data. Once the
// function receives a response, it checks whether the response status code
// matches the expected one and returns the response body to the caller.
func (c *Client) PostForm(t *testing.T, url string, data url.Values, wantStatusCode int) string {
t.Helper()
return c.Post(t, url, "application/x-www-form-urlencoded", data.Encode(), wantStatusCode)
}
// do prepares a HTTP request, sends it to the server, receives the response
// from the server, ensures then response code matches the expected one, reads
// the rentire response body and returns it to the caller.
func (c *Client) do(t *testing.T, method, url, contentType, data string, wantStatusCode int) string {
t.Helper()
req, err := http.NewRequest(method, url, strings.NewReader(data))
if err != nil {
t.Fatalf("could not create a HTTP request: %v", err)
}
if len(contentType) > 0 {
req.Header.Add("Content-Type", contentType)
}
res, err := c.httpCli.Do(req)
if err != nil {
t.Fatalf("could not send HTTP request: %v", err)
}
body := readAllAndClose(t, res.Body)
if got, want := res.StatusCode, wantStatusCode; got != want {
t.Fatalf("unexpected response code: got %d, want %d (body: %s)", got, want, body)
}
return body
}
// readAllAndClose reads everything from the response body and then closes it.
func readAllAndClose(t *testing.T, responseBody io.ReadCloser) string {
t.Helper()
defer responseBody.Close()
b, err := io.ReadAll(responseBody)
if err != nil {
t.Fatalf("could not read response body: %d", err)
}
return string(b)
}
// ServesMetrics is used to retrive the app's metrics.
//
// This type is expected to be embdded by the apps that serve metrics.
type ServesMetrics struct {
metricsURL string
cli *Client
}
// GetIntMetric retrieves the value of a metric served by an app at /metrics URL.
// The value is then converted to int.
func (app *ServesMetrics) GetIntMetric(t *testing.T, metricName string) int {
return int(app.GetMetric(t, metricName))
}
// GetMetric retrieves the value of a metric served by an app at /metrics URL.
func (app *ServesMetrics) GetMetric(t *testing.T, metricName string) float64 {
t.Helper()
metrics := app.cli.Get(t, app.metricsURL, http.StatusOK)
for _, metric := range strings.Split(metrics, "\n") {
value, found := strings.CutPrefix(metric, metricName)
if found {
value = strings.Trim(value, " ")
res, err := strconv.ParseFloat(value, 64)
if err != nil {
t.Fatalf("could not parse metric value %s: %v", metric, err)
}
return res
}
}
t.Fatalf("metic not found: %s", metricName)
return 0
}
// GetMetricsByPrefix retrieves the values of all metrics that start with given
// prefix.
func (app *ServesMetrics) GetMetricsByPrefix(t *testing.T, prefix string) []float64 {
t.Helper()
values := []float64{}
metrics := app.cli.Get(t, app.metricsURL, http.StatusOK)
for _, metric := range strings.Split(metrics, "\n") {
if !strings.HasPrefix(metric, prefix) {
continue
}
parts := strings.Split(metric, " ")
if len(parts) < 2 {
t.Fatalf("unexpected record format: got %q, want metric name and value separated by a space", metric)
}
value, err := strconv.ParseFloat(parts[len(parts)-1], 64)
if err != nil {
t.Fatalf("could not parse metric value %s: %v", metric, err)
}
values = append(values, value)
}
return values
}

153
apptest/model.go Normal file
View file

@ -0,0 +1,153 @@
package apptest
import (
"encoding/json"
"fmt"
"slices"
"strconv"
"strings"
"testing"
"time"
)
// PrometheusQuerier contains methods available to Prometheus-like HTTP API for Querying
type PrometheusQuerier interface {
PrometheusAPIV1Query(t *testing.T, query, time, step string, opts QueryOpts) *PrometheusAPIV1QueryResponse
PrometheusAPIV1QueryRange(t *testing.T, query, start, end, step string, opts QueryOpts) *PrometheusAPIV1QueryResponse
PrometheusAPIV1Series(t *testing.T, matchQuery string, opts QueryOpts) *PrometheusAPIV1SeriesResponse
}
// PrometheusWriter contains methods available to Prometheus-like HTTP API for Writing new data
type PrometheusWriter interface {
PrometheusAPIV1ImportPrometheus(t *testing.T, records []string, opts QueryOpts)
}
// StorageFlusher defines a method that forces the flushing of data inserted
// into the storage, so it becomes available for searching immediately.
type StorageFlusher interface {
ForceFlush(t *testing.T)
}
// PrometheusWriteQuerier encompasses the methods for writing, flushing and
// querying the data.
type PrometheusWriteQuerier interface {
PrometheusWriter
PrometheusQuerier
StorageFlusher
}
// QueryOpts contains various params used for querying or ingesting data
type QueryOpts struct {
Tenant string
Timeout string
}
// PrometheusAPIV1QueryResponse is an inmemory representation of the
// /prometheus/api/v1/query or /prometheus/api/v1/query_range response.
type PrometheusAPIV1QueryResponse struct {
Status string
Data *QueryData
}
// NewPrometheusAPIV1QueryResponse is a test helper function that creates a new
// instance of PrometheusAPIV1QueryResponse by unmarshalling a json string.
func NewPrometheusAPIV1QueryResponse(t *testing.T, s string) *PrometheusAPIV1QueryResponse {
t.Helper()
res := &PrometheusAPIV1QueryResponse{}
if err := json.Unmarshal([]byte(s), res); err != nil {
t.Fatalf("could not unmarshal query response: %v", err)
}
return res
}
// QueryData holds the query result along with its type.
type QueryData struct {
ResultType string
Result []*QueryResult
}
// QueryResult holds the metric name (in the form of label name-value
// collection) and its samples.
//
// Sample or Samples field is set for /prometheus/api/v1/query or
// /prometheus/api/v1/query_range response respectively.
type QueryResult struct {
Metric map[string]string
Sample *Sample `json:"value"`
Samples []*Sample `json:"values"`
}
// Sample is a timeseries value at a given timestamp.
type Sample struct {
Timestamp int64
Value float64
}
// NewSample is a test helper function that creates a new sample out of time in
// RFC3339 format and a value.
func NewSample(t *testing.T, timeStr string, value float64) *Sample {
parsedTime, err := time.Parse(time.RFC3339, timeStr)
if err != nil {
t.Fatalf("could not parse RFC3339 time %q: %v", timeStr, err)
}
return &Sample{parsedTime.Unix(), value}
}
// UnmarshalJSON populates the sample fields from a JSON string.
func (s *Sample) UnmarshalJSON(b []byte) error {
var (
ts int64
v string
)
raw := []any{&ts, &v}
if err := json.Unmarshal(b, &raw); err != nil {
return err
}
if got, want := len(raw), 2; got != want {
return fmt.Errorf("unexpected number of fields: got %d, want %d (raw sample: %s)", got, want, string(b))
}
s.Timestamp = ts
var err error
s.Value, err = strconv.ParseFloat(v, 64)
if err != nil {
return fmt.Errorf("could not parse sample value %q: %w", v, err)
}
return nil
}
// PrometheusAPIV1SeriesResponse is an inmemory representation of the
// /prometheus/api/v1/series response.
type PrometheusAPIV1SeriesResponse struct {
Status string
IsPartial bool
Data []map[string]string
}
// NewPrometheusAPIV1SeriesResponse is a test helper function that creates a new
// instance of PrometheusAPIV1SeriesResponse by unmarshalling a json string.
func NewPrometheusAPIV1SeriesResponse(t *testing.T, s string) *PrometheusAPIV1SeriesResponse {
t.Helper()
res := &PrometheusAPIV1SeriesResponse{}
if err := json.Unmarshal([]byte(s), res); err != nil {
t.Fatalf("could not unmarshal series response: %v", err)
}
return res
}
// Sort sorts the response data.
func (r *PrometheusAPIV1SeriesResponse) Sort() {
str := func(m map[string]string) string {
s := []string{}
for k, v := range m {
s = append(s, k+v)
}
slices.Sort(s)
return strings.Join(s, "")
}
slices.SortFunc(r.Data, func(a, b map[string]string) int {
return strings.Compare(str(a), str(b))
})
}

229
apptest/testcase.go Normal file
View file

@ -0,0 +1,229 @@
package apptest
import (
"fmt"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/google/go-cmp/cmp"
)
// TestCase holds the state and defines clean-up procedure common for all test
// cases.
type TestCase struct {
t *testing.T
cli *Client
startedApps []Stopper
}
// Stopper is an interface of objects that needs to be stopped via Stop() call
type Stopper interface {
Stop()
}
// NewTestCase creates a new test case.
func NewTestCase(t *testing.T) *TestCase {
return &TestCase{t, NewClient(), nil}
}
// Dir returns the directory name that should be used by as the -storageDataDir.
func (tc *TestCase) Dir() string {
return tc.t.Name()
}
// Client returns an instance of the client that can be used for interacting with
// the app(s) under test.
func (tc *TestCase) Client() *Client {
return tc.cli
}
// Stop performs the test case clean up, such as closing all client connections
// and removing the -storageDataDir directory.
//
// Note that the -storageDataDir is not removed in case of test case failure to
// allow for further manual debugging.
func (tc *TestCase) Stop() {
tc.cli.CloseConnections()
for _, app := range tc.startedApps {
app.Stop()
}
if !tc.t.Failed() {
fs.MustRemoveAll(tc.Dir())
}
}
// MustStartVmsingle is a test helper function that starts an instance of
// vmsingle and fails the test if the app fails to start.
func (tc *TestCase) MustStartVmsingle(instance string, flags []string) *Vmsingle {
tc.t.Helper()
app, err := StartVmsingle(instance, flags, tc.cli)
if err != nil {
tc.t.Fatalf("Could not start %s: %v", instance, err)
}
tc.addApp(app)
return app
}
// MustStartVmstorage is a test helper function that starts an instance of
// vmstorage and fails the test if the app fails to start.
func (tc *TestCase) MustStartVmstorage(instance string, flags []string) *Vmstorage {
tc.t.Helper()
app, err := StartVmstorage(instance, flags, tc.cli)
if err != nil {
tc.t.Fatalf("Could not start %s: %v", instance, err)
}
tc.addApp(app)
return app
}
// MustStartVmselect is a test helper function that starts an instance of
// vmselect and fails the test if the app fails to start.
func (tc *TestCase) MustStartVmselect(instance string, flags []string) *Vmselect {
tc.t.Helper()
app, err := StartVmselect(instance, flags, tc.cli)
if err != nil {
tc.t.Fatalf("Could not start %s: %v", instance, err)
}
tc.addApp(app)
return app
}
// MustStartVminsert is a test helper function that starts an instance of
// vminsert and fails the test if the app fails to start.
func (tc *TestCase) MustStartVminsert(instance string, flags []string) *Vminsert {
tc.t.Helper()
app, err := StartVminsert(instance, flags, tc.cli)
if err != nil {
tc.t.Fatalf("Could not start %s: %v", instance, err)
}
tc.addApp(app)
return app
}
type vmcluster struct {
*Vminsert
*Vmselect
vmstorages []*Vmstorage
}
func (c *vmcluster) ForceFlush(t *testing.T) {
for _, s := range c.vmstorages {
s.ForceFlush(t)
}
}
// MustStartCluster is a typical cluster configuration.
//
// The cluster consists of two vmstorages, one vminsert and one vmselect, no
// data replication.
//
// Such configuration is suitable for tests that don't verify the
// cluster-specific behavior (such as sharding, replication, or multilevel
// vmselect) but instead just need a typical cluster configuration to verify
// some business logic (such as API surface, or MetricsQL). Such cluster
// tests usually come paired with corresponding vmsingle tests.
func (tc *TestCase) MustStartCluster() PrometheusWriteQuerier {
tc.t.Helper()
vmstorage1 := tc.MustStartVmstorage("vmstorage-1", []string{
"-storageDataPath=" + tc.Dir() + "/vmstorage-1",
"-retentionPeriod=100y",
})
vmstorage2 := tc.MustStartVmstorage("vmstorage-2", []string{
"-storageDataPath=" + tc.Dir() + "/vmstorage-2",
"-retentionPeriod=100y",
})
vminsert := tc.MustStartVminsert("vminsert", []string{
"-storageNode=" + vmstorage1.VminsertAddr() + "," + vmstorage2.VminsertAddr(),
})
vmselect := tc.MustStartVmselect("vmselect", []string{
"-storageNode=" + vmstorage1.VmselectAddr() + "," + vmstorage2.VmselectAddr(),
})
return &vmcluster{vminsert, vmselect, []*Vmstorage{vmstorage1, vmstorage2}}
}
func (tc *TestCase) addApp(app Stopper) {
tc.startedApps = append(tc.startedApps, app)
}
// AssertOptions hold the assertion params, such as got and wanted values as
// well as the message that should be included into the assertion error message
// in case of failure.
//
// In VictoriaMetrics (especially the cluster version) the inserted data does
// not become visible for querying right away. Therefore, the first comparisons
// may fail. AssertOptions allow to configure how many times the actual result
// must be retrieved and compared with the expected one and for long to wait
// between the retries. If these two params (`Retries` and `Period`) are not
// set, the default values will be used.
//
// If it is known that the data is available, then the retry functionality can
// be disabled by setting the `DoNotRetry` field.
//
// AssertOptions are used by the TestCase.Assert() method, and this method uses
// cmp.Diff() from go-cmp package for comparing got and wanted values.
// AssertOptions, therefore, allows to pass cmp.Options to cmp.Diff() via
// `CmpOpts` field.
//
// Finally the `FailNow` field controls whether the assertion should fail using
// `testing.T.Errorf()` or `testing.T.Fatalf()`.
type AssertOptions struct {
Msg string
Got func() any
Want any
CmpOpts []cmp.Option
DoNotRetry bool
Retries int
Period time.Duration
FailNow bool
}
// Assert compares the actual result with the expected one possibly multiple
// times in order to account for the fact that the inserted data does not become
// available for querying right away (especially in cluster version of
// VictoriaMetrics).
func (tc *TestCase) Assert(opts *AssertOptions) {
tc.t.Helper()
const (
defaultRetries = 20
defaultPeriod = 100 * time.Millisecond
)
if opts.DoNotRetry {
opts.Retries = 1
opts.Period = 0
} else {
if opts.Retries <= 0 {
opts.Retries = defaultRetries
}
if opts.Period <= 0 {
opts.Period = defaultPeriod
}
}
var diff string
for range opts.Retries {
diff = cmp.Diff(opts.Want, opts.Got(), opts.CmpOpts...)
if diff == "" {
return
}
time.Sleep(opts.Period)
}
msg := fmt.Sprintf("%s (-want, +got):\n%s", opts.Msg, diff)
if opts.FailNow {
tc.t.Fatal(msg)
} else {
tc.t.Error(msg)
}
}

View file

@ -0,0 +1,207 @@
package tests
import (
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/apptest"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
)
// Data used in examples in
// https://docs.victoriametrics.com/keyconcepts/#instant-query and
// https://docs.victoriametrics.com/keyconcepts/#range-query
var docData = []string{
"foo_bar 1.00 1652169600000", // 2022-05-10T08:00:00Z
"foo_bar 2.00 1652169660000", // 2022-05-10T08:01:00Z
"foo_bar 3.00 1652169720000", // 2022-05-10T08:02:00Z
"foo_bar 5.00 1652169840000", // 2022-05-10T08:04:00Z, one point missed
"foo_bar 5.50 1652169960000", // 2022-05-10T08:06:00Z, one point missed
"foo_bar 5.50 1652170020000", // 2022-05-10T08:07:00Z
"foo_bar 4.00 1652170080000", // 2022-05-10T08:08:00Z
"foo_bar 3.50 1652170260000", // 2022-05-10T08:11:00Z, two points missed
"foo_bar 3.25 1652170320000", // 2022-05-10T08:12:00Z
"foo_bar 3.00 1652170380000", // 2022-05-10T08:13:00Z
"foo_bar 2.00 1652170440000", // 2022-05-10T08:14:00Z
"foo_bar 1.00 1652170500000", // 2022-05-10T08:15:00Z
"foo_bar 4.00 1652170560000", // 2022-05-10T08:16:00Z
}
// TestSingleKeyConceptsQuery verifies cases from https://docs.victoriametrics.com/keyconcepts/#query-data
// for vm-single.
func TestSingleKeyConceptsQuery(t *testing.T) {
tc := apptest.NewTestCase(t)
defer tc.Stop()
sut := tc.MustStartVmsingle("vmsingle", []string{
"-storageDataPath=" + tc.Dir() + "/vmstorage",
"-retentionPeriod=100y",
})
testKeyConceptsQueryData(t, sut)
}
// TestClusterKeyConceptsQueryData verifies cases from https://docs.victoriametrics.com/keyconcepts/#query-data
// for vm-cluster.
func TestClusterKeyConceptsQueryData(t *testing.T) {
tc := apptest.NewTestCase(t)
defer tc.Stop()
sut := tc.MustStartCluster()
testKeyConceptsQueryData(t, sut)
}
// testClusterKeyConceptsQuery verifies cases from https://docs.victoriametrics.com/keyconcepts/#query-data
func testKeyConceptsQueryData(t *testing.T, sut apptest.PrometheusWriteQuerier) {
opts := apptest.QueryOpts{Timeout: "5s", Tenant: "0"}
// Insert example data from documentation.
sut.PrometheusAPIV1ImportPrometheus(t, docData, opts)
sut.ForceFlush(t)
testInstantQuery(t, sut, opts)
testRangeQuery(t, sut, opts)
testRangeQueryIsEquivalentToManyInstantQueries(t, sut, opts)
}
// testInstantQuery verifies the statements made in the `Instant query` section
// of the VictoriaMetrics documentation. See:
// https://docs.victoriametrics.com/keyconcepts/#instant-query
func testInstantQuery(t *testing.T, q apptest.PrometheusQuerier, opts apptest.QueryOpts) {
// Get the value of the foo_bar time series at 2022-05-10T08:03:00Z with the
// step of 5m and timeout 5s. There is no sample at exactly this timestamp.
// Therefore, VictoriaMetrics will search for the nearest sample within the
// [time-5m..time] interval.
got := q.PrometheusAPIV1Query(t, "foo_bar", "2022-05-10T08:03:00.000Z", "5m", opts)
want := apptest.NewPrometheusAPIV1QueryResponse(t, `{"data":{"result":[{"metric":{"__name__":"foo_bar"},"value":[1652169780,"3"]}]}}`)
opt := cmpopts.IgnoreFields(apptest.PrometheusAPIV1QueryResponse{}, "Status", "Data.ResultType")
if diff := cmp.Diff(want, got, opt); diff != "" {
t.Errorf("unexpected response (-want, +got):\n%s", diff)
}
// Get the value of the foo_bar time series at 2022-05-10T08:18:00Z with the
// step of 1m and timeout 5s. There is no sample at this timestamp.
// Therefore, VictoriaMetrics will search for the nearest sample within the
// [time-1m..time] interval. Since the nearest sample is 2m away and the
// step is 1m, then the VictoriaMetrics must return empty response.
got = q.PrometheusAPIV1Query(t, "foo_bar", "2022-05-10T08:18:00.000Z", "1m", opts)
if len(got.Data.Result) > 0 {
t.Errorf("unexpected response: got non-empty result, want empty result:\n%v", got)
}
}
// testRangeQuery verifies the statements made in the `Range query` section of
// the VictoriaMetrics documentation. See:
// https://docs.victoriametrics.com/keyconcepts/#range-query
func testRangeQuery(t *testing.T, q apptest.PrometheusQuerier, opts apptest.QueryOpts) {
f := func(start, end, step string, wantSamples []*apptest.Sample) {
t.Helper()
got := q.PrometheusAPIV1QueryRange(t, "foo_bar", start, end, step, opts)
want := apptest.NewPrometheusAPIV1QueryResponse(t, `{"data": {"result": [{"metric": {"__name__": "foo_bar"}, "values": []}]}}`)
want.Data.Result[0].Samples = wantSamples
opt := cmpopts.IgnoreFields(apptest.PrometheusAPIV1QueryResponse{}, "Status", "Data.ResultType")
if diff := cmp.Diff(want, got, opt); diff != "" {
t.Errorf("unexpected response (-want, +got):\n%s", diff)
}
}
// Verify the statement that the query result for
// [2022-05-10T07:59:00Z..2022-05-10T08:17:00Z] time range and 1m step will
// contain 17 points.
f("2022-05-10T07:59:00.000Z", "2022-05-10T08:17:00.000Z", "1m", []*apptest.Sample{
// Sample for 2022-05-10T07:59:00Z is missing because the time series has
// samples only starting from 8:00.
apptest.NewSample(t, "2022-05-10T08:00:00Z", 1),
apptest.NewSample(t, "2022-05-10T08:01:00Z", 2),
apptest.NewSample(t, "2022-05-10T08:02:00Z", 3),
apptest.NewSample(t, "2022-05-10T08:03:00Z", 3),
apptest.NewSample(t, "2022-05-10T08:04:00Z", 5),
apptest.NewSample(t, "2022-05-10T08:05:00Z", 5),
apptest.NewSample(t, "2022-05-10T08:06:00Z", 5.5),
apptest.NewSample(t, "2022-05-10T08:07:00Z", 5.5),
apptest.NewSample(t, "2022-05-10T08:08:00Z", 4),
apptest.NewSample(t, "2022-05-10T08:09:00Z", 4),
// Sample for 2022-05-10T08:10:00Z is missing because there is no sample
// within the [8:10 - 1m .. 8:10] interval.
apptest.NewSample(t, "2022-05-10T08:11:00Z", 3.5),
apptest.NewSample(t, "2022-05-10T08:12:00Z", 3.25),
apptest.NewSample(t, "2022-05-10T08:13:00Z", 3),
apptest.NewSample(t, "2022-05-10T08:14:00Z", 2),
apptest.NewSample(t, "2022-05-10T08:15:00Z", 1),
apptest.NewSample(t, "2022-05-10T08:16:00Z", 4),
apptest.NewSample(t, "2022-05-10T08:17:00Z", 4),
})
// Verify the statement that a query is executed at start, start+step,
// start+2*step, …, step+N*step timestamps, where N is the whole number
// of steps that fit between start and end.
f("2022-05-10T08:00:01.000Z", "2022-05-10T08:02:00.000Z", "1m", []*apptest.Sample{
apptest.NewSample(t, "2022-05-10T08:00:01Z", 1),
apptest.NewSample(t, "2022-05-10T08:01:01Z", 2),
})
// Verify the statement that a query is executed at start, start+step,
// start+2*step, …, end timestamps, when end = start + N*step.
f("2022-05-10T08:00:00.000Z", "2022-05-10T08:02:00.000Z", "1m", []*apptest.Sample{
apptest.NewSample(t, "2022-05-10T08:00:00Z", 1),
apptest.NewSample(t, "2022-05-10T08:01:00Z", 2),
apptest.NewSample(t, "2022-05-10T08:02:00Z", 3),
})
// If the step isnt set, then it defaults to 5m (5 minutes).
f("2022-05-10T07:59:00.000Z", "2022-05-10T08:17:00.000Z", "", []*apptest.Sample{
// Sample for 2022-05-10T07:59:00Z is missing because the time series has
// samples only starting from 8:00.
apptest.NewSample(t, "2022-05-10T08:04:00Z", 5),
apptest.NewSample(t, "2022-05-10T08:09:00Z", 4),
apptest.NewSample(t, "2022-05-10T08:14:00Z", 2),
})
}
// testRangeQueryIsEquivalentToManyInstantQueries verifies the statement made in
// the `Range query` section of the VictoriaMetrics documentation that a range
// query is actually an instant query executed 1 + (start-end)/step times on the
// time range from start to end. See:
// https://docs.victoriametrics.com/keyconcepts/#range-query
func testRangeQueryIsEquivalentToManyInstantQueries(t *testing.T, q apptest.PrometheusQuerier, opts apptest.QueryOpts) {
f := func(timestamp string, want *apptest.Sample) {
t.Helper()
gotInstant := q.PrometheusAPIV1Query(t, "foo_bar", timestamp, "1m", opts)
if want == nil {
if got, want := len(gotInstant.Data.Result), 0; got != want {
t.Errorf("unexpected instant result size: got %d, want %d", got, want)
}
} else {
got := gotInstant.Data.Result[0].Sample
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("unexpected instant sample (-want, +got):\n%s", diff)
}
}
}
rangeRes := q.PrometheusAPIV1QueryRange(t, "foo_bar", "2022-05-10T07:59:00.000Z", "2022-05-10T08:17:00.000Z", "1m", opts)
rangeSamples := rangeRes.Data.Result[0].Samples
f("2022-05-10T07:59:00.000Z", nil)
f("2022-05-10T08:00:00.000Z", rangeSamples[0])
f("2022-05-10T08:01:00.000Z", rangeSamples[1])
f("2022-05-10T08:02:00.000Z", rangeSamples[2])
f("2022-05-10T08:03:00.000Z", rangeSamples[3])
f("2022-05-10T08:04:00.000Z", rangeSamples[4])
f("2022-05-10T08:05:00.000Z", rangeSamples[5])
f("2022-05-10T08:06:00.000Z", rangeSamples[6])
f("2022-05-10T08:07:00.000Z", rangeSamples[7])
f("2022-05-10T08:08:00.000Z", rangeSamples[8])
f("2022-05-10T08:09:00.000Z", rangeSamples[9])
f("2022-05-10T08:10:00.000Z", nil)
f("2022-05-10T08:11:00.000Z", rangeSamples[10])
f("2022-05-10T08:12:00.000Z", rangeSamples[11])
f("2022-05-10T08:13:00.000Z", rangeSamples[12])
f("2022-05-10T08:14:00.000Z", rangeSamples[13])
f("2022-05-10T08:15:00.000Z", rangeSamples[14])
f("2022-05-10T08:16:00.000Z", rangeSamples[15])
f("2022-05-10T08:17:00.000Z", rangeSamples[16])
}

View file

@ -0,0 +1,72 @@
package tests
import (
"fmt"
"math/rand/v2"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/apptest"
)
func TestClusterMultilevelSelect(t *testing.T) {
tc := apptest.NewTestCase(t)
defer tc.Stop()
// Set up the following multi-level cluster configuration:
//
// vmselect (L2) -> vmselect (L1) -> vmstorage <- vminsert
//
// vmisert writes data into vmstorage.
// vmselect (L2) reads that data via vmselect (L1).
vmstorage := tc.MustStartVmstorage("vmstorage", []string{
"-storageDataPath=" + tc.Dir() + "/vmstorage",
})
vminsert := tc.MustStartVminsert("vminsert", []string{
"-storageNode=" + vmstorage.VminsertAddr(),
})
vmselectL1 := tc.MustStartVmselect("vmselect-level1", []string{
"-storageNode=" + vmstorage.VmselectAddr(),
})
vmselectL2 := tc.MustStartVmselect("vmselect-level2", []string{
"-storageNode=" + vmselectL1.ClusternativeListenAddr(),
})
// Insert 1000 unique time series.
const numMetrics = 1000
records := make([]string, numMetrics)
want := &apptest.PrometheusAPIV1SeriesResponse{
Status: "success",
IsPartial: false,
Data: make([]map[string]string, numMetrics),
}
for i := range numMetrics {
name := fmt.Sprintf("metric_%d", i)
records[i] = fmt.Sprintf("%s %d", name, rand.IntN(1000))
want.Data[i] = map[string]string{"__name__": name}
}
want.Sort()
qopts := apptest.QueryOpts{Tenant: "0"}
vminsert.PrometheusAPIV1ImportPrometheus(t, records, qopts)
vmstorage.ForceFlush(t)
// Retrieve all time series and verify that both vmselect (L1) and
// vmselect (L2) serve the complete set of time series.
got := func(app *apptest.Vmselect) any {
res := app.PrometheusAPIV1Series(t, `{__name__=~".*"}`, qopts)
res.Sort()
return res
}
tc.Assert(&apptest.AssertOptions{
Msg: "unexpected level-1 series count",
Got: func() any { return got(vmselectL1) },
Want: want,
})
tc.Assert(&apptest.AssertOptions{
Msg: "unexpected level-2 series count",
Got: func() any { return got(vmselectL2) },
Want: want,
})
}

View file

@ -0,0 +1,84 @@
package tests
import (
"fmt"
"math/rand/v2"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/apptest"
)
func TestClusterVminsertShardsDataVmselectBuildsFullResultFromShards(t *testing.T) {
tc := apptest.NewTestCase(t)
defer tc.Stop()
// Set up the following cluster configuration:
//
// - two vmstorage instances
// - vminsert points to the two vmstorages, its replication setting
// is off which means it will only shard the incoming data across the two
// vmstorages.
// - vmselect points to the two vmstorages and is expected to query both
// vmstorages and build the full result out of the two partial results.
vmstorage1 := tc.MustStartVmstorage("vmstorage-1", []string{
"-storageDataPath=" + tc.Dir() + "/vmstorage-1",
})
vmstorage2 := tc.MustStartVmstorage("vmstorage-2", []string{
"-storageDataPath=" + tc.Dir() + "/vmstorage-2",
})
vminsert := tc.MustStartVminsert("vminsert", []string{
"-storageNode=" + vmstorage1.VminsertAddr() + "," + vmstorage2.VminsertAddr(),
})
vmselect := tc.MustStartVmselect("vmselect", []string{
"-storageNode=" + vmstorage1.VmselectAddr() + "," + vmstorage2.VmselectAddr(),
})
// Insert 1000 unique time series.
const numMetrics = 1000
records := make([]string, numMetrics)
want := &apptest.PrometheusAPIV1SeriesResponse{
Status: "success",
IsPartial: false,
Data: make([]map[string]string, numMetrics),
}
for i := range numMetrics {
name := fmt.Sprintf("metric_%d", i)
records[i] = fmt.Sprintf("%s %d", name, rand.IntN(1000))
want.Data[i] = map[string]string{"__name__": name}
}
want.Sort()
qopts := apptest.QueryOpts{Tenant: "0"}
vminsert.PrometheusAPIV1ImportPrometheus(t, records, qopts)
vmstorage1.ForceFlush(t)
vmstorage2.ForceFlush(t)
// Verify that inserted data has been indeed sharded by checking metrics
// exposed by vmstorage.
numMetrics1 := vmstorage1.GetIntMetric(t, "vm_vminsert_metrics_read_total")
if numMetrics1 == 0 {
t.Fatalf("storage-1 has no time series")
}
numMetrics2 := vmstorage2.GetIntMetric(t, "vm_vminsert_metrics_read_total")
if numMetrics2 == 0 {
t.Fatalf("storage-2 has no time series")
}
if numMetrics1+numMetrics2 != numMetrics {
t.Fatalf("unxepected total number of metrics: vmstorage-1 (%d) + vmstorage-2 (%d) != %d", numMetrics1, numMetrics2, numMetrics)
}
// Retrieve all time series and verify that vmselect serves the complete set
// of time series.
tc.Assert(&apptest.AssertOptions{
Msg: "unexpected /api/v1/series response",
Got: func() any {
res := vmselect.PrometheusAPIV1Series(t, `{__name__=~".*"}`, qopts)
res.Sort()
return res
},
Want: want,
})
}

102
apptest/vminsert.go Normal file
View file

@ -0,0 +1,102 @@
package apptest
import (
"fmt"
"net/http"
"regexp"
"strings"
"testing"
"time"
)
// Vminsert holds the state of a vminsert app and provides vminsert-specific
// functions.
type Vminsert struct {
*app
*ServesMetrics
httpListenAddr string
cli *Client
}
// StartVminsert starts an instance of vminsert with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr)
func StartVminsert(instance string, flags []string, cli *Client) (*Vminsert, error) {
app, stderrExtracts, err := startApp(instance, "../../bin/vminsert", flags, &appOptions{
defaultFlags: map[string]string{
"-httpListenAddr": "127.0.0.1:0",
},
extractREs: []*regexp.Regexp{
httpListenAddrRE,
},
})
if err != nil {
return nil, err
}
return &Vminsert{
app: app,
ServesMetrics: &ServesMetrics{
metricsURL: fmt.Sprintf("http://%s/metrics", stderrExtracts[0]),
cli: cli,
},
httpListenAddr: stderrExtracts[0],
cli: cli,
}, nil
}
// PrometheusAPIV1ImportPrometheus is a test helper function that inserts a
// collection of records in Prometheus text exposition format for the given
// tenant by sending a HTTP POST request to
// /prometheus/api/v1/import/prometheus vminsert endpoint.
//
// See https://docs.victoriametrics.com/url-examples/#apiv1importprometheus
func (app *Vminsert) PrometheusAPIV1ImportPrometheus(t *testing.T, records []string, opts QueryOpts) {
t.Helper()
url := fmt.Sprintf("http://%s/insert/%s/prometheus/api/v1/import/prometheus", app.httpListenAddr, opts.Tenant)
wantRowsSentCount := app.rpcRowsSentTotal(t) + len(records)
app.cli.Post(t, url, "text/plain", strings.Join(records, "\n"), http.StatusNoContent)
app.waitUntilSent(t, wantRowsSentCount)
}
// String returns the string representation of the vminsert app state.
func (app *Vminsert) String() string {
return fmt.Sprintf("{app: %s httpListenAddr: %q}", app.app, app.httpListenAddr)
}
// waitUntilSent waits until vminsert sends buffered data to vmstorage.
//
// Waiting is implemented a retrieving the value of `vm_rpc_rows_sent_total`
// metric and checking whether it is equal or greater than the wanted value.
// If it is, then the data has been sent to vmstorage.
//
// Unreliable if the records are inserted concurrently.
func (app *Vminsert) waitUntilSent(t *testing.T, wantRowsSentCount int) {
t.Helper()
const (
retries = 20
period = 100 * time.Millisecond
)
for range retries {
if app.rpcRowsSentTotal(t) >= wantRowsSentCount {
return
}
time.Sleep(period)
}
t.Fatalf("timed out while waiting for inserted rows to be sent to vmstorage")
}
// rpcRowsSentTotal retrieves the values of all vminsert
// `vm_rpc_rows_sent_total` metrics (there will be one for each vmstorage) and
// returns their integer sum.
func (app *Vminsert) rpcRowsSentTotal(t *testing.T) int {
total := 0.0
for _, v := range app.GetMetricsByPrefix(t, "vm_rpc_rows_sent_total") {
total += v
}
return int(total)
}

112
apptest/vmselect.go Normal file
View file

@ -0,0 +1,112 @@
package apptest
import (
"fmt"
"net/http"
"net/url"
"regexp"
"testing"
)
// Vmselect holds the state of a vmselect app and provides vmselect-specific
// functions.
type Vmselect struct {
*app
*ServesMetrics
httpListenAddr string
clusternativeListenAddr string
cli *Client
}
// StartVmselect starts an instance of vmselect with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr)
func StartVmselect(instance string, flags []string, cli *Client) (*Vmselect, error) {
app, stderrExtracts, err := startApp(instance, "../../bin/vmselect", flags, &appOptions{
defaultFlags: map[string]string{
"-httpListenAddr": "127.0.0.1:0",
"-clusternativeListenAddr": "127.0.0.1:0",
},
extractREs: []*regexp.Regexp{
httpListenAddrRE,
vmselectAddrRE,
},
})
if err != nil {
return nil, err
}
return &Vmselect{
app: app,
ServesMetrics: &ServesMetrics{
metricsURL: fmt.Sprintf("http://%s/metrics", stderrExtracts[0]),
cli: cli,
},
httpListenAddr: stderrExtracts[0],
clusternativeListenAddr: stderrExtracts[1],
cli: cli,
}, nil
}
// ClusternativeListenAddr returns the address at which the vmselect process is
// listening for connections from other vmselect apps.
func (app *Vmselect) ClusternativeListenAddr() string {
return app.clusternativeListenAddr
}
// PrometheusAPIV1Query is a test helper function that performs PromQL/MetricsQL
// instant query by sending a HTTP POST request to /prometheus/api/v1/query
// vmsingle endpoint.
//
// See https://docs.victoriametrics.com/url-examples/#apiv1query
func (app *Vmselect) PrometheusAPIV1Query(t *testing.T, query, time, step string, opts QueryOpts) *PrometheusAPIV1QueryResponse {
t.Helper()
queryURL := fmt.Sprintf("http://%s/select/%s/prometheus/api/v1/query", app.httpListenAddr, opts.Tenant)
values := url.Values{}
values.Add("query", query)
values.Add("time", time)
values.Add("step", step)
values.Add("timeout", opts.Timeout)
res := app.cli.PostForm(t, queryURL, values, http.StatusOK)
return NewPrometheusAPIV1QueryResponse(t, res)
}
// PrometheusAPIV1QueryRange is a test helper function that performs
// PromQL/MetricsQL range query by sending a HTTP POST request to
// /prometheus/api/v1/query_range vmsingle endpoint.
//
// See https://docs.victoriametrics.com/url-examples/#apiv1query_range
func (app *Vmselect) PrometheusAPIV1QueryRange(t *testing.T, query, start, end, step string, opts QueryOpts) *PrometheusAPIV1QueryResponse {
t.Helper()
queryURL := fmt.Sprintf("http://%s/select/%s/prometheus/api/v1/query_range", app.httpListenAddr, opts.Tenant)
values := url.Values{}
values.Add("query", query)
values.Add("start", start)
values.Add("end", end)
values.Add("step", step)
values.Add("timeout", opts.Timeout)
res := app.cli.PostForm(t, queryURL, values, http.StatusOK)
return NewPrometheusAPIV1QueryResponse(t, res)
}
// PrometheusAPIV1Series sends a query to a /prometheus/api/v1/series endpoint
// and returns the list of time series that match the query.
//
// See https://docs.victoriametrics.com/url-examples/#apiv1series
func (app *Vmselect) PrometheusAPIV1Series(t *testing.T, matchQuery string, opts QueryOpts) *PrometheusAPIV1SeriesResponse {
t.Helper()
seriesURL := fmt.Sprintf("http://%s/select/%s/prometheus/api/v1/series", app.httpListenAddr, opts.Tenant)
values := url.Values{}
values.Add("match[]", matchQuery)
res := app.cli.PostForm(t, seriesURL, values, http.StatusOK)
return NewPrometheusAPIV1SeriesResponse(t, res)
}
// String returns the string representation of the vmselect app state.
func (app *Vmselect) String() string {
return fmt.Sprintf("{app: %s httpListenAddr: %q}", app.app, app.httpListenAddr)
}

136
apptest/vmsingle.go Normal file
View file

@ -0,0 +1,136 @@
package apptest
import (
"fmt"
"net/http"
"net/url"
"os"
"regexp"
"strings"
"testing"
"time"
)
// Vmsingle holds the state of a vmsingle app and provides vmsingle-specific
// functions.
type Vmsingle struct {
*app
*ServesMetrics
storageDataPath string
httpListenAddr string
forceFlushURL string
prometheusAPIV1ImportPrometheusURL string
prometheusAPIV1QueryURL string
prometheusAPIV1QueryRangeURL string
prometheusAPIV1SeriesURL string
}
// StartVmsingle starts an instance of vmsingle with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr).
func StartVmsingle(instance string, flags []string, cli *Client) (*Vmsingle, error) {
app, stderrExtracts, err := startApp(instance, "../../bin/victoria-metrics", flags, &appOptions{
defaultFlags: map[string]string{
"-storageDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
"-httpListenAddr": "127.0.0.1:0",
},
extractREs: []*regexp.Regexp{
storageDataPathRE,
httpListenAddrRE,
},
})
if err != nil {
return nil, err
}
return &Vmsingle{
app: app,
ServesMetrics: &ServesMetrics{
metricsURL: fmt.Sprintf("http://%s/metrics", stderrExtracts[1]),
cli: cli,
},
storageDataPath: stderrExtracts[0],
httpListenAddr: stderrExtracts[1],
forceFlushURL: fmt.Sprintf("http://%s/internal/force_flush", stderrExtracts[1]),
prometheusAPIV1ImportPrometheusURL: fmt.Sprintf("http://%s/prometheus/api/v1/import/prometheus", stderrExtracts[1]),
prometheusAPIV1QueryURL: fmt.Sprintf("http://%s/prometheus/api/v1/query", stderrExtracts[1]),
prometheusAPIV1QueryRangeURL: fmt.Sprintf("http://%s/prometheus/api/v1/query_range", stderrExtracts[1]),
prometheusAPIV1SeriesURL: fmt.Sprintf("http://%s/prometheus/api/v1/series", stderrExtracts[1]),
}, nil
}
// ForceFlush is a test helper function that forces the flushing of inserted
// data, so it becomes available for searching immediately.
func (app *Vmsingle) ForceFlush(t *testing.T) {
t.Helper()
app.cli.Get(t, app.forceFlushURL, http.StatusOK)
}
// PrometheusAPIV1ImportPrometheus is a test helper function that inserts a
// collection of records in Prometheus text exposition format by sending a HTTP
// POST request to /prometheus/api/v1/import/prometheus vmsingle endpoint.
//
// See https://docs.victoriametrics.com/url-examples/#apiv1importprometheus
func (app *Vmsingle) PrometheusAPIV1ImportPrometheus(t *testing.T, records []string, _ QueryOpts) {
t.Helper()
app.cli.Post(t, app.prometheusAPIV1ImportPrometheusURL, "text/plain", strings.Join(records, "\n"), http.StatusNoContent)
}
// PrometheusAPIV1Query is a test helper function that performs PromQL/MetricsQL
// instant query by sending a HTTP POST request to /prometheus/api/v1/query
// vmsingle endpoint.
//
// See https://docs.victoriametrics.com/url-examples/#apiv1query
func (app *Vmsingle) PrometheusAPIV1Query(t *testing.T, query, time, step string, opts QueryOpts) *PrometheusAPIV1QueryResponse {
t.Helper()
values := url.Values{}
values.Add("query", query)
values.Add("time", time)
values.Add("step", step)
values.Add("timeout", opts.Timeout)
res := app.cli.PostForm(t, app.prometheusAPIV1QueryURL, values, http.StatusOK)
return NewPrometheusAPIV1QueryResponse(t, res)
}
// PrometheusAPIV1QueryRange is a test helper function that performs
// PromQL/MetricsQL range query by sending a HTTP POST request to
// /prometheus/api/v1/query_range vmsingle endpoint.
//
// See https://docs.victoriametrics.com/url-examples/#apiv1query_range
func (app *Vmsingle) PrometheusAPIV1QueryRange(t *testing.T, query, start, end, step string, opts QueryOpts) *PrometheusAPIV1QueryResponse {
t.Helper()
values := url.Values{}
values.Add("query", query)
values.Add("start", start)
values.Add("end", end)
values.Add("step", step)
values.Add("timeout", opts.Timeout)
res := app.cli.PostForm(t, app.prometheusAPIV1QueryRangeURL, values, http.StatusOK)
return NewPrometheusAPIV1QueryResponse(t, res)
}
// PrometheusAPIV1Series sends a query to a /prometheus/api/v1/series endpoint
// and returns the list of time series that match the query.
//
// See https://docs.victoriametrics.com/url-examples/#apiv1series
func (app *Vmsingle) PrometheusAPIV1Series(t *testing.T, matchQuery string, _ QueryOpts) *PrometheusAPIV1SeriesResponse {
t.Helper()
values := url.Values{}
values.Add("match[]", matchQuery)
res := app.cli.PostForm(t, app.prometheusAPIV1SeriesURL, values, http.StatusOK)
return NewPrometheusAPIV1SeriesResponse(t, res)
}
// String returns the string representation of the vmsingle app state.
func (app *Vmsingle) String() string {
return fmt.Sprintf("{app: %s storageDataPath: %q httpListenAddr: %q}", []any{
app.app, app.storageDataPath, app.httpListenAddr}...)
}

87
apptest/vmstorage.go Normal file
View file

@ -0,0 +1,87 @@
package apptest
import (
"fmt"
"net/http"
"os"
"regexp"
"testing"
"time"
)
// Vmstorage holds the state of a vmstorage app and provides vmstorage-specific
// functions.
type Vmstorage struct {
*app
*ServesMetrics
storageDataPath string
httpListenAddr string
vminsertAddr string
vmselectAddr string
forceFlushURL string
}
// StartVmstorage starts an instance of vmstorage with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr)
func StartVmstorage(instance string, flags []string, cli *Client) (*Vmstorage, error) {
app, stderrExtracts, err := startApp(instance, "../../bin/vmstorage", flags, &appOptions{
defaultFlags: map[string]string{
"-storageDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
"-httpListenAddr": "127.0.0.1:0",
"-vminsertAddr": "127.0.0.1:0",
"-vmselectAddr": "127.0.0.1:0",
},
extractREs: []*regexp.Regexp{
storageDataPathRE,
httpListenAddrRE,
vminsertAddrRE,
vmselectAddrRE,
},
})
if err != nil {
return nil, err
}
return &Vmstorage{
app: app,
ServesMetrics: &ServesMetrics{
metricsURL: fmt.Sprintf("http://%s/metrics", stderrExtracts[1]),
cli: cli,
},
storageDataPath: stderrExtracts[0],
httpListenAddr: stderrExtracts[1],
vminsertAddr: stderrExtracts[2],
vmselectAddr: stderrExtracts[3],
forceFlushURL: fmt.Sprintf("http://%s/internal/force_flush", stderrExtracts[1]),
}, nil
}
// VminsertAddr returns the address at which the vmstorage process is listening
// for vminsert connections.
func (app *Vmstorage) VminsertAddr() string {
return app.vminsertAddr
}
// VmselectAddr returns the address at which the vmstorage process is listening
// for vmselect connections.
func (app *Vmstorage) VmselectAddr() string {
return app.vmselectAddr
}
// ForceFlush is a test helper function that forces the flushing of inserted
// data, so it becomes available for searching immediately.
func (app *Vmstorage) ForceFlush(t *testing.T) {
t.Helper()
app.cli.Get(t, app.forceFlushURL, http.StatusOK)
}
// String returns the string representation of the vmstorage app state.
func (app *Vmstorage) String() string {
return fmt.Sprintf("{app: %s storageDataPath: %q httpListenAddr: %q vminsertAddr: %q vmselectAddr: %q}", []any{
app.app, app.storageDataPath, app.httpListenAddr, app.vminsertAddr, app.vmselectAddr}...)
}

View file

@ -16,3 +16,6 @@ dashboards-sync:
SRC=vmalert.json D_UID=LzldHAVnz TITLE="VictoriaMetrics - vmalert" $(MAKE) dashboard-copy
SRC=vmauth.json D_UID=nbuo5Mr4k TITLE="VictoriaMetrics - vmauth" $(MAKE) dashboard-copy
SRC=operator.json D_UID=1H179hunk TITLE="VictoriaMetrics - operator" $(MAKE) dashboard-copy
SRC=backupmanager.json D_UID=gF-lxRdVz TITLE="VictoriaMetrics - backupmanager" $(MAKE) dashboard-copy
SRC=clusterbytenant.json D_UID=IZFqd3lMz TITLE="VictoriaMetrics Cluster Per Tenant Statistic" $(MAKE) dashboard-copy
SRC=victorialogs.json D_UID=OqPIZTX4z TITLE="VictoriaLogs" $(MAKE) dashboard-copy

View file

@ -158,6 +158,7 @@
"color": {
"mode": "thresholds"
},
"min": 0,
"mappings": [
{
"options": {
@ -233,6 +234,7 @@
"mode": "thresholds"
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
@ -297,6 +299,7 @@
"mode": "thresholds"
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
@ -361,6 +364,7 @@
"color": {
"mode": "thresholds"
},
"min": 0,
"mappings": [
{
"options": {
@ -450,6 +454,7 @@
"type": "value"
}
],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
@ -523,6 +528,7 @@
"type": "value"
}
],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [

Some files were not shown because too many files have changed in this diff Show more