Merge tag 'v1.106.0' into pmm-6401-read-prometheus-data-files

This commit is contained in:
Zakhar Bessarab 2024-11-04 13:56:36 -03:00
commit cd513b9758
No known key found for this signature in database
GPG key ID: 932B34D6FE062023
247 changed files with 9099 additions and 2439 deletions

View file

@ -60,8 +60,8 @@ body:
For VictoriaMetrics health-state issues please provide full-length screenshots
of Grafana dashboards if possible:
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229/)
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176/)
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229)
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176)
See how to setup monitoring here:
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)

View file

@ -501,10 +501,12 @@ pprof-cpu:
fmt:
gofmt -l -w -s ./lib
gofmt -l -w -s ./app
gofmt -l -w -s ./apptest
vet:
go vet ./lib/...
go vet ./app/...
go vet ./apptest/...
check-all: fmt vet golangci-lint govulncheck
@ -525,6 +527,9 @@ test-full:
test-full-386:
DISABLE_FSYNC_FOR_TESTING=1 GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
integration-test: all
go test ./apptest/...
benchmark:
go test -bench=. ./lib/...
go test -bench=. ./app/...

View file

@ -23,7 +23,7 @@ Here are some resources and information about VictoriaMetrics:
- Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), [Docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
- Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/enterprise/)
- Changelog: [CHANGELOG](https://docs.victoriametrics.com/changelog/), and [How to upgrade](https://docs.victoriametrics.com/#how-to-upgrade-victoriametrics)
- Community: [Slack](https://slack.victoriametrics.com/), [Twitter](https://twitter.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
- Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.
@ -84,7 +84,7 @@ Some good benchmarks VictoriaMetrics achieved:
Feel free asking any questions regarding VictoriaMetrics:
* [Slack Inviter](https://slack.victoriametrics.com/) and [Slack channel](https://victoriametrics.slack.com/)
* [Twitter](https://twitter.com/VictoriaMetrics/)
* [X (Twitter)](https://x.com/VictoriaMetrics/)
* [Linkedin](https://www.linkedin.com/company/victoriametrics/)
* [Reddit](https://www.reddit.com/r/VictoriaMetrics/)
* [Telegram-en](https://t.me/VictoriaMetrics_en)

View file

@ -2,10 +2,10 @@
"name": "subquery-aggregation",
"issue": "https://github.com/VictoriaMetrics/VictoriaMetrics/issues/184",
"data": [
"forms_daily_count;item=x 1 {TIME_S-1m}",
"forms_daily_count;item=x 2 {TIME_S-2m}",
"forms_daily_count;item=y 3 {TIME_S-1m}",
"forms_daily_count;item=y 4 {TIME_S-2m}"],
"forms_daily_count;item=x 1 {TIME_S-59s}",
"forms_daily_count;item=x 2 {TIME_S-1m59s}",
"forms_daily_count;item=y 3 {TIME_S-59s}",
"forms_daily_count;item=y 4 {TIME_S-1m59s}"],
"query": ["/api/v1/query?query=min%20by%20(item)%20(min_over_time(forms_daily_count[10m:1m]))&time={TIME_S-1m}&latency_offset=1ms"],
"result_query": {
"status":"success",

View file

@ -103,7 +103,7 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
}
lmp := cp.NewLogMessageProcessor()
isGzip := r.Header.Get("Content-Encoding") == "gzip"
n, err := readBulkRequest(r.Body, isGzip, cp.TimeField, cp.MsgField, lmp)
n, err := readBulkRequest(r.Body, isGzip, cp.TimeField, cp.MsgFields, lmp)
lmp.MustClose()
if err != nil {
logger.Warnf("cannot decode log message #%d in /_bulk request: %s, stream fields: %s", n, err, cp.StreamFields)
@ -133,7 +133,7 @@ var (
bulkRequestDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/elasticsearch/_bulk"}`)
)
func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string, lmp insertutils.LogMessageProcessor) (int, error) {
func readBulkRequest(r io.Reader, isGzip bool, timeField string, msgFields []string, lmp insertutils.LogMessageProcessor) (int, error) {
// See https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
if isGzip {
@ -158,7 +158,7 @@ func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string, lmp i
n := 0
nCheckpoint := 0
for {
ok, err := readBulkLine(sc, timeField, msgField, lmp)
ok, err := readBulkLine(sc, timeField, msgFields, lmp)
wcr.DecConcurrency()
if err != nil || !ok {
rowsIngestedTotal.Add(n - nCheckpoint)
@ -174,7 +174,7 @@ func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string, lmp i
var lineBufferPool bytesutil.ByteBufferPool
func readBulkLine(sc *bufio.Scanner, timeField, msgField string, lmp insertutils.LogMessageProcessor) (bool, error) {
func readBulkLine(sc *bufio.Scanner, timeField string, msgFields []string, lmp insertutils.LogMessageProcessor) (bool, error) {
var line []byte
// Read the command, must be "create" or "index"
@ -219,7 +219,7 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string, lmp insertutils
if ts == 0 {
ts = time.Now().UnixNano()
}
logstorage.RenameField(p.Fields, msgField, "_msg")
logstorage.RenameField(p.Fields, msgFields, "_msg")
lmp.AddRow(ts, p.Fields)
logstorage.PutJSONParser(p)

View file

@ -15,7 +15,7 @@ func TestReadBulkRequest_Failure(t *testing.T) {
tlp := &insertutils.TestLogMessageProcessor{}
r := bytes.NewBufferString(data)
rows, err := readBulkRequest(r, false, "_time", "_msg", tlp)
rows, err := readBulkRequest(r, false, "_time", []string{"_msg"}, tlp)
if err == nil {
t.Fatalf("expecting non-empty error")
}
@ -36,11 +36,12 @@ func TestReadBulkRequest_Success(t *testing.T) {
f := func(data, timeField, msgField string, rowsExpected int, timestampsExpected []int64, resultExpected string) {
t.Helper()
msgFields := []string{"non_existing_foo", msgField, "non_exiting_bar"}
tlp := &insertutils.TestLogMessageProcessor{}
// Read the request without compression
r := bytes.NewBufferString(data)
rows, err := readBulkRequest(r, false, timeField, msgField, tlp)
rows, err := readBulkRequest(r, false, timeField, msgFields, tlp)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
@ -55,7 +56,7 @@ func TestReadBulkRequest_Success(t *testing.T) {
tlp = &insertutils.TestLogMessageProcessor{}
compressedData := compressData(data)
r = bytes.NewBufferString(compressedData)
rows, err = readBulkRequest(r, true, timeField, msgField, tlp)
rows, err = readBulkRequest(r, true, timeField, msgFields, tlp)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}

View file

@ -32,7 +32,7 @@ func benchmarkReadBulkRequest(b *testing.B, isGzip bool) {
dataBytes := bytesutil.ToUnsafeBytes(data)
timeField := "@timestamp"
msgField := "message"
msgFields := []string{"message"}
blp := &insertutils.BenchmarkLogMessageProcessor{}
b.ReportAllocs()
@ -41,7 +41,7 @@ func benchmarkReadBulkRequest(b *testing.B, isGzip bool) {
r := &bytes.Reader{}
for pb.Next() {
r.Reset(dataBytes)
_, err := readBulkRequest(r, isGzip, timeField, msgField, blp)
_, err := readBulkRequest(r, isGzip, timeField, msgFields, blp)
if err != nil {
panic(fmt.Errorf("unexpected error: %w", err))
}

View file

@ -1,7 +1,10 @@
package insertutils
import (
"flag"
"fmt"
"net/http"
"strconv"
"strings"
"sync"
"time"
@ -16,15 +19,21 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
)
var (
defaultMsgValue = flag.String("defaultMsgValue", "missing _msg field; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field",
"Default value for _msg field if the ingested log entry doesn't contain it; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field")
)
// CommonParams contains common HTTP parameters used by log ingestion APIs.
//
// See https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters
type CommonParams struct {
TenantID logstorage.TenantID
TimeField string
MsgField string
MsgFields []string
StreamFields []string
IgnoreFields []string
ExtraFields []logstorage.Field
Debug bool
DebugRequestURI string
@ -39,44 +48,25 @@ func GetCommonParams(r *http.Request) (*CommonParams, error) {
return nil, err
}
// Extract time field name from _time_field query arg or header
timeField := "_time"
if tf := r.FormValue("_time_field"); tf != "" {
timeField = tf
} else if tf = r.Header.Get("VL-Time-Field"); tf != "" {
if tf := httputils.GetRequestValue(r, "_time_field", "VL-Time-Field"); tf != "" {
timeField = tf
}
// Extract message field name from _msg_field query arg or header
msgField := ""
if msgf := r.FormValue("_msg_field"); msgf != "" {
msgField = msgf
} else if msgf = r.Header.Get("VL-Msg-Field"); msgf != "" {
msgField = msgf
msgFields := httputils.GetArray(r, "_msg_field", "VL-Msg-Field")
streamFields := httputils.GetArray(r, "_stream_fields", "VL-Stream-Fields")
ignoreFields := httputils.GetArray(r, "ignore_fields", "VL-Ignore-Fields")
extraFields, err := getExtraFields(r)
if err != nil {
return nil, err
}
streamFields := httputils.GetArray(r, "_stream_fields")
if len(streamFields) == 0 {
if sf := r.Header.Get("VL-Stream-Fields"); len(sf) > 0 {
streamFields = strings.Split(sf, ",")
}
}
ignoreFields := httputils.GetArray(r, "ignore_fields")
if len(ignoreFields) == 0 {
if f := r.Header.Get("VL-Ignore-Fields"); len(f) > 0 {
ignoreFields = strings.Split(f, ",")
}
}
debug := httputils.GetBool(r, "debug")
if !debug {
if dh := r.Header.Get("VL-Debug"); len(dh) > 0 {
hv := strings.ToLower(dh)
switch hv {
case "", "0", "f", "false", "no":
default:
debug = true
}
debug := false
if dv := httputils.GetRequestValue(r, "debug", "VL-Debug"); dv != "" {
debug, err = strconv.ParseBool(dv)
if err != nil {
return nil, fmt.Errorf("cannot parse debug=%q: %w", dv, err)
}
}
debugRequestURI := ""
@ -89,9 +79,10 @@ func GetCommonParams(r *http.Request) (*CommonParams, error) {
cp := &CommonParams{
TenantID: tenantID,
TimeField: timeField,
MsgField: msgField,
MsgFields: msgFields,
StreamFields: streamFields,
IgnoreFields: ignoreFields,
ExtraFields: extraFields,
Debug: debug,
DebugRequestURI: debugRequestURI,
DebugRemoteAddr: debugRemoteAddr,
@ -100,13 +91,35 @@ func GetCommonParams(r *http.Request) (*CommonParams, error) {
return cp, nil
}
func getExtraFields(r *http.Request) ([]logstorage.Field, error) {
efs := httputils.GetArray(r, "extra_fields", "VL-Extra-Fields")
if len(efs) == 0 {
return nil, nil
}
extraFields := make([]logstorage.Field, len(efs))
for i, ef := range efs {
n := strings.Index(ef, "=")
if n <= 0 || n == len(ef)-1 {
return nil, fmt.Errorf(`invalid extra_field format: %q; must be in the form "field=value"`, ef)
}
extraFields[i] = logstorage.Field{
Name: ef[:n],
Value: ef[n+1:],
}
}
return extraFields, nil
}
// GetCommonParamsForSyslog returns common params needed for parsing syslog messages and storing them to the given tenantID.
func GetCommonParamsForSyslog(tenantID logstorage.TenantID) *CommonParams {
// See https://docs.victoriametrics.com/victorialogs/logsql/#unpack_syslog-pipe
cp := &CommonParams{
TenantID: tenantID,
TimeField: "timestamp",
MsgField: "message",
MsgFields: []string{
"message",
},
StreamFields: []string{
"hostname",
"app_name",
@ -176,22 +189,6 @@ func (lmp *logMessageProcessor) AddRow(timestamp int64, fields []logstorage.Fiel
return
}
// _msg field must be non-empty according to VictoriaLogs data model.
// See https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field
msgExist := false
for i := range fields {
if fields[i].Name == "_msg" {
msgExist = len(fields[i].Value) > 0
break
}
}
if !msgExist {
rf := logstorage.RowFormatter(fields)
logger.Warnf("dropping log line without _msg field; %s", rf)
rowsDroppedTotalMsgNotValid.Inc()
return
}
lmp.lr.MustAdd(lmp.cp.TenantID, timestamp, fields)
if lmp.cp.Debug {
s := lmp.lr.GetRowString(0)
@ -226,7 +223,7 @@ func (lmp *logMessageProcessor) MustClose() {
//
// MustClose() must be called on the returned LogMessageProcessor when it is no longer needed.
func (cp *CommonParams) NewLogMessageProcessor() LogMessageProcessor {
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields, cp.ExtraFields, *defaultMsgValue)
lmp := &logMessageProcessor{
cp: cp,
lr: lr,
@ -241,5 +238,4 @@ func (cp *CommonParams) NewLogMessageProcessor() LogMessageProcessor {
var (
rowsDroppedTotalDebug = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)
rowsDroppedTotalTooManyFields = metrics.NewCounter(`vl_rows_dropped_total{reason="too_many_fields"}`)
rowsDroppedTotalMsgNotValid = metrics.NewCounter(`vl_rows_dropped_total{reason="msg_not_exist"}`)
)

View file

@ -0,0 +1,256 @@
package journald
import (
"bytes"
"encoding/binary"
"flag"
"fmt"
"io"
"net/http"
"regexp"
"slices"
"strconv"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
"github.com/VictoriaMetrics/metrics"
)
const (
journaldEntryMaxNameLen = 64
)
var (
bodyBufferPool bytesutil.ByteBufferPool
allowedJournaldEntryNameChars = regexp.MustCompile(`^[A-Z_][A-Z0-9_]+`)
)
var (
journaldStreamFields = flagutil.NewArrayString("journald.streamFields", "Journal fields to be used as stream fields. "+
"See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html.")
journaldIgnoreFields = flagutil.NewArrayString("journald.ignoreFields", "Journal fields to ignore. "+
"See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html.")
journaldTimeField = flag.String("journald.timeField", "__REALTIME_TIMESTAMP", "Journal field to be used as time field. "+
"See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html.")
journaldTenantID = flag.String("journald.tenantID", "0:0", "TenantID for logs ingested via the Journald endpoint.")
journaldIncludeEntryMetadata = flag.Bool("journald.includeEntryMetadata", false, "Include journal entry fields, which with double underscores.")
)
func getCommonParams(r *http.Request) (*insertutils.CommonParams, error) {
cp, err := insertutils.GetCommonParams(r)
if err != nil {
return nil, err
}
if cp.TenantID.AccountID == 0 && cp.TenantID.ProjectID == 0 {
tenantID, err := logstorage.ParseTenantID(*journaldTenantID)
if err != nil {
return nil, fmt.Errorf("cannot parse -journald.tenantID=%q for journald: %w", *journaldTenantID, err)
}
cp.TenantID = tenantID
}
if cp.TimeField != "" {
cp.TimeField = *journaldTimeField
}
if len(cp.StreamFields) == 0 {
cp.StreamFields = *journaldStreamFields
}
if len(cp.IgnoreFields) == 0 {
cp.IgnoreFields = *journaldIgnoreFields
}
cp.MsgFields = []string{"MESSAGE"}
return cp, nil
}
// RequestHandler processes Journald Export insert requests
func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
switch path {
case "/upload":
if r.Header.Get("Content-Type") != "application/vnd.fdo.journal" {
httpserver.Errorf(w, r, "only application/vnd.fdo.journal encoding is supported for Journald")
return true
}
handleJournald(r, w)
return true
default:
return false
}
}
// handleJournald parses Journal binary entries
func handleJournald(r *http.Request, w http.ResponseWriter) {
startTime := time.Now()
requestsJournaldTotal.Inc()
if err := vlstorage.CanWriteData(); err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
reader := r.Body
var err error
wcr := writeconcurrencylimiter.GetReader(reader)
data, err := io.ReadAll(wcr)
if err != nil {
httpserver.Errorf(w, r, "cannot read request body: %s", err)
return
}
writeconcurrencylimiter.PutReader(wcr)
bb := bodyBufferPool.Get()
defer bodyBufferPool.Put(bb)
if r.Header.Get("Content-Encoding") == "zstd" {
bb.B, err = zstd.Decompress(bb.B[:0], data)
if err != nil {
httpserver.Errorf(w, r, "cannot decompress zstd-encoded request with length %d: %s", len(data), err)
return
}
data = bb.B
}
cp, err := getCommonParams(r)
if err != nil {
httpserver.Errorf(w, r, "cannot parse common params from request: %s", err)
return
}
lmp := cp.NewLogMessageProcessor()
n, err := parseJournaldRequest(data, lmp, cp)
lmp.MustClose()
if err != nil {
errorsTotal.Inc()
httpserver.Errorf(w, r, "cannot parse Journald protobuf request: %s", err)
return
}
rowsIngestedJournaldTotal.Add(n)
// update requestJournaldDuration only for successfully parsed requests
// There is no need in updating requestJournaldDuration for request errors,
// since their timings are usually much smaller than the timing for successful request parsing.
requestJournaldDuration.UpdateDuration(startTime)
}
var (
rowsIngestedJournaldTotal = metrics.NewCounter(`vl_rows_ingested_total{type="journald", format="journald"}`)
requestsJournaldTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/journald/upload",format="journald"}`)
errorsTotal = metrics.NewCounter(`vl_http_errors_total{path="/insert/journald/upload",format="journald"}`)
requestJournaldDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/journald/upload",format="journald"}`)
)
// See https://systemd.io/JOURNAL_EXPORT_FORMATS/#journal-export-format
func parseJournaldRequest(data []byte, lmp insertutils.LogMessageProcessor, cp *insertutils.CommonParams) (rowsIngested int, err error) {
var fields []logstorage.Field
var ts int64
var size uint64
var name, value string
var line []byte
currentTimestamp := time.Now().UnixNano()
for len(data) > 0 {
idx := bytes.IndexByte(data, '\n')
switch {
case idx > 0:
// process fields
line = data[:idx]
data = data[idx+1:]
case idx == 0:
// next message or end of file
// double new line is a separator for the next message
if len(fields) > 0 {
if ts == 0 {
ts = currentTimestamp
}
lmp.AddRow(ts, fields)
rowsIngested++
fields = fields[:0]
}
// skip newline separator
data = data[1:]
continue
case idx < 0:
return rowsIngested, fmt.Errorf("missing new line separator, unread data left=%d", len(data))
}
idx = bytes.IndexByte(line, '=')
// could b either e key=value\n pair
// or just key\n
// with binary data at the buffer
if idx > 0 {
name = bytesutil.ToUnsafeString(line[:idx])
value = bytesutil.ToUnsafeString(line[idx+1:])
} else {
name = bytesutil.ToUnsafeString(line)
if len(data) == 0 {
return rowsIngested, fmt.Errorf("unexpected zero data for binary field value of key=%s", name)
}
// size of binary data encoded as le i64 at the begging
idx, err := binary.Decode(data, binary.LittleEndian, &size)
if err != nil {
return rowsIngested, fmt.Errorf("failed to extract binary field %q value size: %w", name, err)
}
// skip binary data sise
data = data[idx:]
if size == 0 {
return rowsIngested, fmt.Errorf("unexpected zero binary data size decoded %d", size)
}
if int(size) > len(data) {
return rowsIngested, fmt.Errorf("binary data size=%d cannot exceed size of the data at buffer=%d", size, len(data))
}
value = bytesutil.ToUnsafeString(data[:size])
data = data[int(size):]
// binary data must has new line separator for the new line or next field
if len(data) == 0 {
return rowsIngested, fmt.Errorf("unexpected empty buffer after binary field=%s read", name)
}
lastB := data[0]
if lastB != '\n' {
return rowsIngested, fmt.Errorf("expected new line separator after binary field=%s, got=%s", name, string(lastB))
}
data = data[1:]
}
// https://github.com/systemd/systemd/blob/main/src/libsystemd/sd-journal/journal-file.c#L1703
if len(name) > journaldEntryMaxNameLen {
return rowsIngested, fmt.Errorf("journald entry name should not exceed %d symbols, got: %q", journaldEntryMaxNameLen, name)
}
if !allowedJournaldEntryNameChars.MatchString(name) {
return rowsIngested, fmt.Errorf("journald entry name should consist of `A-Z0-9_` characters and must start from non-digit symbol")
}
if name == cp.TimeField {
ts, err = strconv.ParseInt(value, 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse Journald timestamp, %w", err)
}
ts *= 1e3
continue
}
if slices.Contains(cp.MsgFields, name) {
name = "_msg"
}
if *journaldIncludeEntryMetadata || !strings.HasPrefix(name, "__") {
fields = append(fields, logstorage.Field{
Name: name,
Value: value,
})
}
}
if len(fields) > 0 {
if ts == 0 {
ts = currentTimestamp
}
lmp.AddRow(ts, fields)
rowsIngested++
}
return rowsIngested, nil
}

View file

@ -0,0 +1,70 @@
package journald
import (
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
)
func TestPushJournaldOk(t *testing.T) {
f := func(src string, timestampsExpected []int64, resultExpected string) {
t.Helper()
tlp := &insertutils.TestLogMessageProcessor{}
cp := &insertutils.CommonParams{
TimeField: "__REALTIME_TIMESTAMP",
MsgFields: []string{"MESSAGE"},
}
n, err := parseJournaldRequest([]byte(src), tlp, cp)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if err := tlp.Verify(n, timestampsExpected, resultExpected); err != nil {
t.Fatal(err)
}
}
// Single event
f("__REALTIME_TIMESTAMP=91723819283\nMESSAGE=Test message\n",
[]int64{91723819283000},
"{\"_msg\":\"Test message\"}",
)
// Multiple events
f("__REALTIME_TIMESTAMP=91723819283\nMESSAGE=Test message\n\n__REALTIME_TIMESTAMP=91723819284\nMESSAGE=Test message2\n",
[]int64{91723819283000, 91723819284000},
"{\"_msg\":\"Test message\"}\n{\"_msg\":\"Test message2\"}",
)
// Parse binary data
f("__CURSOR=s=e0afe8412a6a49d2bfcf66aa7927b588;i=1f06;b=f778b6e2f7584a77b991a2366612a7b5;m=300bdfd420;t=62526e1182354;x=930dc44b370963b7\n__REALTIME_TIMESTAMP=1729698775704404\n__MONOTONIC_TIMESTAMP=206357648416\n__SEQNUM=7942\n__SEQNUM_ID=e0afe8412a6a49d2bfcf66aa7927b588\n_BOOT_ID=f778b6e2f7584a77b991a2366612a7b5\n_UID=0\n_GID=0\n_MACHINE_ID=a4a970370c30a925df02a13c67167847\n_HOSTNAME=ecd5e4555787\n_RUNTIME_SCOPE=system\n_TRANSPORT=journal\n_CAP_EFFECTIVE=1ffffffffff\n_SYSTEMD_CGROUP=/init.scope\n_SYSTEMD_UNIT=init.scope\n_SYSTEMD_SLICE=-.slice\nCODE_FILE=<stdin>\nCODE_LINE=1\nCODE_FUNC=<module>\nSYSLOG_IDENTIFIER=python3\n_COMM=python3\n_EXE=/usr/bin/python3.12\n_CMDLINE=python3\nMESSAGE\n\x13\x00\x00\x00\x00\x00\x00\x00foo\nbar\n\n\nasda\nasda\n_PID=2763\n_SOURCE_REALTIME_TIMESTAMP=1729698775704375\n\n",
[]int64{1729698775704404000},
"{\"_BOOT_ID\":\"f778b6e2f7584a77b991a2366612a7b5\",\"_UID\":\"0\",\"_GID\":\"0\",\"_MACHINE_ID\":\"a4a970370c30a925df02a13c67167847\",\"_HOSTNAME\":\"ecd5e4555787\",\"_RUNTIME_SCOPE\":\"system\",\"_TRANSPORT\":\"journal\",\"_CAP_EFFECTIVE\":\"1ffffffffff\",\"_SYSTEMD_CGROUP\":\"/init.scope\",\"_SYSTEMD_UNIT\":\"init.scope\",\"_SYSTEMD_SLICE\":\"-.slice\",\"CODE_FILE\":\"\\u003cstdin>\",\"CODE_LINE\":\"1\",\"CODE_FUNC\":\"\\u003cmodule>\",\"SYSLOG_IDENTIFIER\":\"python3\",\"_COMM\":\"python3\",\"_EXE\":\"/usr/bin/python3.12\",\"_CMDLINE\":\"python3\",\"_msg\":\"foo\\nbar\\n\\n\\nasda\\nasda\",\"_PID\":\"2763\",\"_SOURCE_REALTIME_TIMESTAMP\":\"1729698775704375\"}",
)
}
func TestPushJournald_Failure(t *testing.T) {
f := func(data string) {
t.Helper()
tlp := &insertutils.TestLogMessageProcessor{}
cp := &insertutils.CommonParams{
TimeField: "__REALTIME_TIMESTAMP",
MsgFields: []string{"MESSAGE"},
}
_, err := parseJournaldRequest([]byte(data), tlp, cp)
if err == nil {
t.Fatalf("expected non nil error")
}
}
// missing new line terminator for binary encoded message
f("__CURSOR=s=e0afe8412a6a49d2bfcf66aa7927b588;i=1f06;b=f778b6e2f7584a77b991a2366612a7b5;m=300bdfd420;t=62526e1182354;x=930dc44b370963b7\n__REALTIME_TIMESTAMP=1729698775704404\nMESSAGE\n\x13\x00\x00\x00\x00\x00\x00\x00foo\nbar\n\n\nasdaasda2")
// missing new line terminator
f("__REALTIME_TIMESTAMP=91723819283\n=Test message")
// empty field name
f("__REALTIME_TIMESTAMP=91723819283\n=Test message\n")
// field name starting with number
f("__REALTIME_TIMESTAMP=91723819283\n1incorrect=Test message\n")
// field name exceeds 64 limit
f("__REALTIME_TIMESTAMP=91723819283\ntoolooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooongcorrecooooooooooooong=Test message\n")
// Only allow A-Z0-9 and '_'
f("__REALTIME_TIMESTAMP=91723819283\nbadC!@$!@$as=Test message\n")
}

View file

@ -53,7 +53,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) {
}
lmp := cp.NewLogMessageProcessor()
err = processStreamInternal(reader, cp.TimeField, cp.MsgField, lmp)
err = processStreamInternal(reader, cp.TimeField, cp.MsgFields, lmp)
lmp.MustClose()
if err != nil {
@ -66,7 +66,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) {
}
}
func processStreamInternal(r io.Reader, timeField, msgField string, lmp insertutils.LogMessageProcessor) error {
func processStreamInternal(r io.Reader, timeField string, msgFields []string, lmp insertutils.LogMessageProcessor) error {
wcr := writeconcurrencylimiter.GetReader(r)
defer writeconcurrencylimiter.PutReader(wcr)
@ -79,7 +79,7 @@ func processStreamInternal(r io.Reader, timeField, msgField string, lmp insertut
n := 0
for {
ok, err := readLine(sc, timeField, msgField, lmp)
ok, err := readLine(sc, timeField, msgFields, lmp)
wcr.DecConcurrency()
if err != nil {
errorsTotal.Inc()
@ -93,7 +93,7 @@ func processStreamInternal(r io.Reader, timeField, msgField string, lmp insertut
}
}
func readLine(sc *bufio.Scanner, timeField, msgField string, lmp insertutils.LogMessageProcessor) (bool, error) {
func readLine(sc *bufio.Scanner, timeField string, msgFields []string, lmp insertutils.LogMessageProcessor) (bool, error) {
var line []byte
for len(line) == 0 {
if !sc.Scan() {
@ -116,7 +116,7 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, lmp insertutils.Log
if err != nil {
return false, fmt.Errorf("cannot get timestamp: %w", err)
}
logstorage.RenameField(p.Fields, msgField, "_msg")
logstorage.RenameField(p.Fields, msgFields, "_msg")
lmp.AddRow(ts, p.Fields)
logstorage.PutJSONParser(p)

View file

@ -11,9 +11,10 @@ func TestProcessStreamInternal_Success(t *testing.T) {
f := func(data, timeField, msgField string, rowsExpected int, timestampsExpected []int64, resultExpected string) {
t.Helper()
msgFields := []string{msgField}
tlp := &insertutils.TestLogMessageProcessor{}
r := bytes.NewBufferString(data)
if err := processStreamInternal(r, timeField, msgField, tlp); err != nil {
if err := processStreamInternal(r, timeField, msgFields, tlp); err != nil {
t.Fatalf("unexpected error: %s", err)
}
@ -34,6 +35,18 @@ func TestProcessStreamInternal_Success(t *testing.T) {
{"_msg":"baz"}
{"_msg":"xyz","x":"y"}`
f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected)
// Non-existing msgField
data = `{"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"}
{"@timestamp":"2023-06-06T04:48:12.735+01:00","message":"baz"}
`
timeField = "@timestamp"
msgField = "foobar"
rowsExpected = 2
timestampsExpected = []int64{1686026891735000000, 1686023292735000000}
resultExpected = `{"log.offset":"71770","log.file.path":"/var/log/auth.log","message":"foobar"}
{"message":"baz"}`
f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected)
}
func TestProcessStreamInternal_Failure(t *testing.T) {
@ -42,7 +55,7 @@ func TestProcessStreamInternal_Failure(t *testing.T) {
tlp := &insertutils.TestLogMessageProcessor{}
r := bytes.NewBufferString(data)
if err := processStreamInternal(r, "time", "", tlp); err == nil {
if err := processStreamInternal(r, "time", nil, tlp); err == nil {
t.Fatalf("expecting non-nil error")
}
}

View file

@ -5,6 +5,7 @@ import (
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/elasticsearch"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/journald"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/jsonline"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/loki"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/opentelemetry"
@ -45,6 +46,9 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
case strings.HasPrefix(path, "/opentelemetry/"):
path = strings.TrimPrefix(path, "/opentelemetry")
return opentelemetry.RequestHandler(path, w, r)
case strings.HasPrefix(path, "/journald/"):
path = strings.TrimPrefix(path, "/journald")
return journald.RequestHandler(path, w, r)
default:
return false
}

View file

@ -514,13 +514,15 @@ func processLine(line []byte, currentYear int, timezone *time.Location, useLocal
}
ts = nsecs
}
logstorage.RenameField(p.Fields, "message", "_msg")
logstorage.RenameField(p.Fields, msgFields, "_msg")
lmp.AddRow(ts, p.Fields)
logstorage.PutSyslogParser(p)
return nil
}
var msgFields = []string{"message"}
var (
rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="syslog"}`)

View file

@ -69,8 +69,8 @@ func main() {
fatalf("cannot initialize readline: %s", err)
}
fmt.Fprintf(rl, "sending queries to %s\n", *datasourceURL)
fmt.Fprintf(rl, "sending queries to -datasource.url=%s\n", *datasourceURL)
fmt.Fprintf(rl, `type ? and press enter to see available commands`+"\n")
runReadlineLoop(rl, &incompleteLine)
if err := rl.Close(); err != nil {
@ -252,7 +252,7 @@ func isHelpCommand(s string) bool {
}
func printCommandsHelp(w io.Writer) {
fmt.Fprintf(w, "%s", `List of available commands:
fmt.Fprintf(w, "%s", `Available commands:
\q - quit
\h - show this help
\s - singleline json output mode
@ -260,6 +260,8 @@ func printCommandsHelp(w io.Writer) {
\c - compact output
\logfmt - logfmt output mode
\tail <query> - live tail <query> results
See https://docs.victoriametrics.com/victorialogs/querying/vlogscli/ for more details
`)
}

View file

@ -1017,6 +1017,20 @@ func parseCommonArgs(r *http.Request) (*logstorage.Query, []logstorage.TenantID,
q.AddTimeFilter(start, end)
}
// Parse optional extra_filters
extraFilters, err := getExtraFilters(r, "extra_filters")
if err != nil {
return nil, nil, err
}
q.AddExtraFilters(extraFilters)
// Parse optional extra_stream_filters
extraStreamFilters, err := getExtraFilters(r, "extra_stream_filters")
if err != nil {
return nil, nil, err
}
q.AddExtraStreamFilters(extraStreamFilters)
return q, tenantIDs, nil
}
@ -1032,3 +1046,16 @@ func getTimeNsec(r *http.Request, argName string) (int64, bool, error) {
}
return nsecs, true, nil
}
func getExtraFilters(r *http.Request, argName string) ([]logstorage.Field, error) {
s := r.FormValue(argName)
if s == "" {
return nil, nil
}
var p logstorage.JSONParser
if err := p.ParseLogMessage([]byte(s)); err != nil {
return nil, fmt.Errorf("cannot parse %s: %w", argName, err)
}
return p.Fields, nil
}

View file

@ -23,7 +23,7 @@ var (
"See also -search.maxQueueDuration")
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the search request waits for execution when -search.maxConcurrentRequests "+
"limit is reached; see also -search.maxQueryDuration")
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution. It can be overridden on a per-query basis via 'timeout' query arg")
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution. It can be overridden to a smaller value on a per-query basis via 'timeout' query arg")
)
func getDefaultMaxConcurrentRequests() int {

View file

@ -1,13 +1,13 @@
{
"files": {
"main.css": "./static/css/main.faf86aa5.css",
"main.js": "./static/js/main.2810cc52.js",
"main.js": "./static/js/main.aaafe439.js",
"static/js/685.f772060c.chunk.js": "./static/js/685.f772060c.chunk.js",
"static/media/MetricsQL.md": "./static/media/MetricsQL.a00044c91d9781cf8557.md",
"index.html": "./index.html"
},
"entrypoints": [
"static/css/main.faf86aa5.css",
"static/js/main.2810cc52.js"
"static/js/main.aaafe439.js"
]
}

View file

@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><link rel="apple-touch-icon" href="./favicon.svg"/><link rel="mask-icon" href="./favicon.svg" color="#000000"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="Explore your log data with VictoriaLogs UI"/><link rel="manifest" href="./manifest.json"/><title>UI for VictoriaLogs</title><meta name="twitter:card" content="summary"><meta name="twitter:title" content="UI for VictoriaLogs"><meta name="twitter:site" content="@https://victoriametrics.com/products/victorialogs/"><meta name="twitter:description" content="Explore your log data with VictoriaLogs UI"><meta name="twitter:image" content="./preview.jpg"><meta property="og:type" content="website"><meta property="og:title" content="UI for VictoriaLogs"><meta property="og:url" content="https://victoriametrics.com/products/victorialogs/"><meta property="og:description" content="Explore your log data with VictoriaLogs UI"><script defer="defer" src="./static/js/main.2810cc52.js"></script><link href="./static/css/main.faf86aa5.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><link rel="apple-touch-icon" href="./favicon.svg"/><link rel="mask-icon" href="./favicon.svg" color="#000000"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="Explore your log data with VictoriaLogs UI"/><link rel="manifest" href="./manifest.json"/><title>UI for VictoriaLogs</title><meta name="twitter:card" content="summary"><meta name="twitter:title" content="UI for VictoriaLogs"><meta name="twitter:site" content="@https://victoriametrics.com/products/victorialogs/"><meta name="twitter:description" content="Explore your log data with VictoriaLogs UI"><meta name="twitter:image" content="./preview.jpg"><meta property="og:type" content="website"><meta property="og:title" content="UI for VictoriaLogs"><meta property="og:url" content="https://victoriametrics.com/products/victorialogs/"><meta property="og:description" content="Explore your log data with VictoriaLogs UI"><script defer="defer" src="./static/js/main.aaafe439.js"></script><link href="./static/css/main.faf86aa5.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

View file

@ -270,6 +270,9 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
// tear down vmstorage and clean the data dir
defer tearDown()
if tg.Interval == nil {
tg.Interval = promutils.NewDuration(evalInterval)
}
err := writeInputSeries(tg.InputSeries, tg.Interval, testStartTime, testPromWriteHTTPPath)
if err != nil {
return []error{err}

View file

@ -3,6 +3,7 @@ package config
import (
"bytes"
"crypto/md5"
"flag"
"fmt"
"hash/fnv"
"io"
@ -17,6 +18,10 @@ import (
"gopkg.in/yaml.v2"
)
var (
defaultRuleType = flag.String("rule.defaultRuleType", "prometheus", `Default type for rule expressions, can be overridden via "type" parameter on the group level, see https://docs.victoriametrics.com/vmalert/#groups. Supported values: "graphite", "prometheus" and "vlogs".`)
)
// Group contains list of Rules grouped into
// entity with one name and evaluation interval
type Group struct {
@ -59,11 +64,9 @@ func (g *Group) UnmarshalYAML(unmarshal func(any) error) error {
if err != nil {
return fmt.Errorf("failed to marshal group configuration for checksum: %w", err)
}
// change default value to prometheus datasource.
if g.Type.Get() == "" {
g.Type.Set(NewPrometheusType())
g.Type = NewRawType(*defaultRuleType)
}
h := md5.New()
h.Write(b)
g.Checksum = fmt.Sprintf("%x", h.Sum(nil))

View file

@ -122,6 +122,7 @@ func TestParse_Failure(t *testing.T) {
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
f([]string{"testdata/dir/rules4-bad.rules"}, "either `record` or `alert` must be set")
f([]string{"testdata/rules/rules1-bad.rules"}, "bad graphite expr")
f([]string{"testdata/rules/vlog-rules0-bad.rules"}, "bad LogsQL expr")
f([]string{"testdata/dir/rules6-bad.rules"}, "missing ':' in header")
f([]string{"testdata/rules/rules-multi-doc-bad.rules"}, "unknown fields")
f([]string{"testdata/rules/rules-multi-doc-duplicates-bad.rules"}, "duplicate")
@ -240,7 +241,7 @@ func TestGroupValidate_Failure(t *testing.T) {
}, false, "duplicate")
f(&Group{
Name: "test graphite prometheus bad expr",
Name: "test graphite with prometheus expr",
Type: NewGraphiteType(),
Rules: []Rule{
{
@ -267,6 +268,20 @@ func TestGroupValidate_Failure(t *testing.T) {
},
}, false, "either `record` or `alert` must be set")
f(&Group{
Name: "test vlogs with prometheus expr",
Type: NewVLogsType(),
Rules: []Rule{
{
Expr: "sum(up == 0 ) by (host)",
For: promutils.NewDuration(10 * time.Millisecond),
},
{
Expr: "sumSeries(time('foo.bar',10))",
},
},
}, false, "invalid rule")
// validate expressions
f(&Group{
Name: "test",
@ -297,6 +312,16 @@ func TestGroupValidate_Failure(t *testing.T) {
}},
},
}, true, "bad graphite expr")
f(&Group{
Name: "test vlogs",
Type: NewVLogsType(),
Rules: []Rule{
{Alert: "alert", Expr: "stats count(*) as requests", Labels: map[string]string{
"description": "some-description",
}},
},
}, true, "bad LogsQL expr")
}
func TestGroupValidate_Success(t *testing.T) {
@ -336,7 +361,7 @@ func TestGroupValidate_Success(t *testing.T) {
},
}, false, false)
// validate annotiations
// validate annotations
f(&Group{
Name: "test",
Rules: []Rule{
@ -363,6 +388,15 @@ func TestGroupValidate_Success(t *testing.T) {
}},
},
}, false, true)
f(&Group{
Name: "test victorialogs",
Type: NewVLogsType(),
Rules: []Rule{
{Alert: "alert", Expr: " _time: 1m | stats count(*) as requests", Labels: map[string]string{
"description": "{{ value|query }}",
}},
},
}, false, true)
}
func TestHashRule_NotEqual(t *testing.T) {

View file

@ -0,0 +1,10 @@
groups:
- name: InvalidStatsLogsql
type: vlogs
interval: 5m
rules:
- record: MissingFilter
expr: 'stats count(*) as requests'
- record: MissingStatsPipe
expr: 'service: "nginx"'

View file

@ -0,0 +1,29 @@
groups:
- name: RequestCount
type: vlogs
interval: 5m
rules:
- record: nginxRequestCount
expr: 'env: "test" AND service: "nginx" | stats count(*) as requests'
annotations:
description: "Service nginx on env test accepted {{$labels.requests}} requests in the last 5 minutes"
- record: prodRequestCount
expr: 'env: "prod" | stats by (service) count(*) as requests'
annotations:
description: "Service {{$labels.service}} on env prod accepted {{$labels.requests}} requests in the last 5 minutes"
- name: ServiceLog
type: vlogs
interval: 5m
rules:
- alert: HasErrorLog
expr: 'env: "prod" AND status:~"error|warn" | stats by (service) count(*) as errorLog | filter errorLog:>0'
annotations:
description: "Service {{$labels.service}} generated {{$labels.errorLog}} error logs in the last 5 minutes"
- name: ServiceRequest
type: vlogs
interval: 10m
rules:
- alert: TooManyFailedRequest
expr: '* | extract "ip=<ip> " | extract "status_code=<code>;" | stats by (ip) count() if (code:!~200) as failed, count() as total| math failed / total as failed_percentage| filter failed_percentage :> 0.01 | fields ip,failed_percentage'
annotations:
description: "Connection from address {{$labels.ip}} has {{$value}} failed requests ratio in last 10 minutes"

View file

@ -5,6 +5,7 @@ import (
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/graphiteql"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/metricsql"
)
@ -27,6 +28,13 @@ func NewGraphiteType() Type {
}
}
// NewVLogsType returns victorialogs datasource type
func NewVLogsType() Type {
return Type{
Name: "vlogs",
}
}
// NewRawType returns datasource type from raw string
// without validation.
func NewRawType(d string) Type {
@ -62,6 +70,10 @@ func (t *Type) ValidateExpr(expr string) error {
if _, err := metricsql.Parse(expr); err != nil {
return fmt.Errorf("bad prometheus expr: %q, err: %w", expr, err)
}
case "vlogs":
if _, err := logstorage.ParseStatsQuery(expr); err != nil {
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
}
default:
return fmt.Errorf("unknown datasource type=%q", t.Name)
}
@ -74,13 +86,10 @@ func (t *Type) UnmarshalYAML(unmarshal func(any) error) error {
if err := unmarshal(&s); err != nil {
return err
}
if s == "" {
s = "prometheus"
}
switch s {
case "graphite", "prometheus":
case "graphite", "prometheus", "vlogs":
default:
return fmt.Errorf("unknown datasource type=%q, want %q or %q", s, "prometheus", "graphite")
return fmt.Errorf("unknown datasource type=%q, want prometheus, graphite or vlogs", s)
}
t.Name = s
return nil

View file

@ -0,0 +1,333 @@
package datasource
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
)
type datasourceType string
const (
datasourcePrometheus datasourceType = "prometheus"
datasourceGraphite datasourceType = "graphite"
datasourceVLogs datasourceType = "vlogs"
)
func toDatasourceType(s string) datasourceType {
switch s {
case string(datasourcePrometheus):
return datasourcePrometheus
case string(datasourceGraphite):
return datasourceGraphite
case string(datasourceVLogs):
return datasourceVLogs
default:
logger.Panicf("BUG: unknown datasource type %q", s)
}
return ""
}
// Client is a datasource entity for reading data,
// supported clients are enumerated in datasourceType.
// WARN: when adding a new field, remember to check if Clone() method needs to be updated.
type Client struct {
c *http.Client
authCfg *promauth.Config
datasourceURL string
appendTypePrefix bool
queryStep time.Duration
dataSourceType datasourceType
// ApplyIntervalAsTimeFilter is only valid for vlogs datasource.
// Set to true if there is no [timeFilter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) in the rule expression,
// and we will add evaluation interval as an additional timeFilter when querying.
applyIntervalAsTimeFilter bool
// evaluationInterval will help setting request's `step` param,
// or adding time filter for LogsQL expression.
evaluationInterval time.Duration
// extraParams contains params to be attached to each HTTP request
extraParams url.Values
// extraHeaders are headers to be attached to each HTTP request
extraHeaders []keyValue
// whether to print additional log messages
// for each sent request
debug bool
}
type keyValue struct {
key string
value string
}
// Clone clones shared http client and other configuration to the new client.
func (c *Client) Clone() *Client {
ns := &Client{
c: c.c,
authCfg: c.authCfg,
datasourceURL: c.datasourceURL,
appendTypePrefix: c.appendTypePrefix,
queryStep: c.queryStep,
dataSourceType: c.dataSourceType,
evaluationInterval: c.evaluationInterval,
// init map so it can be populated below
extraParams: url.Values{},
debug: c.debug,
}
if len(c.extraHeaders) > 0 {
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
copy(ns.extraHeaders, c.extraHeaders)
}
for k, v := range c.extraParams {
ns.extraParams[k] = v
}
return ns
}
// ApplyParams - changes given querier params.
func (c *Client) ApplyParams(params QuerierParams) *Client {
if params.DataSourceType != "" {
c.dataSourceType = toDatasourceType(params.DataSourceType)
}
c.evaluationInterval = params.EvaluationInterval
c.applyIntervalAsTimeFilter = params.ApplyIntervalAsTimeFilter
if params.QueryParams != nil {
if c.extraParams == nil {
c.extraParams = url.Values{}
}
for k, vl := range params.QueryParams {
// custom query params are prior to default ones
if c.extraParams.Has(k) {
c.extraParams.Del(k)
}
for _, v := range vl {
// don't use .Set() instead of Del/Add since it is allowed
// for GET params to be duplicated
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4908
c.extraParams.Add(k, v)
}
}
}
if params.Headers != nil {
for key, value := range params.Headers {
kv := keyValue{key: key, value: value}
c.extraHeaders = append(c.extraHeaders, kv)
}
}
c.debug = params.Debug
return c
}
// BuildWithParams - implements interface.
func (c *Client) BuildWithParams(params QuerierParams) Querier {
return c.Clone().ApplyParams(params)
}
// NewPrometheusClient returns a new prometheus datasource client.
func NewPrometheusClient(baseURL string, authCfg *promauth.Config, appendTypePrefix bool, c *http.Client) *Client {
return &Client{
c: c,
authCfg: authCfg,
datasourceURL: strings.TrimSuffix(baseURL, "/"),
appendTypePrefix: appendTypePrefix,
queryStep: *queryStep,
dataSourceType: datasourcePrometheus,
extraParams: url.Values{},
}
}
// Query executes the given query and returns parsed response
func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error) {
req, err := c.newQueryRequest(ctx, query, ts)
if err != nil {
return Result{}, nil, err
}
resp, err := c.do(req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
// Return unexpected error to the caller.
return Result{}, nil, err
}
// Something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = c.newQueryRequest(ctx, query, ts)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
resp, err = c.do(req)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
}
// Process the received response.
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusResponse
case datasourceGraphite:
parseFn = parseGraphiteResponse
case datasourceVLogs:
parseFn = parseVLogsResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
}
result, err := parseFn(req, resp)
_ = resp.Body.Close()
return result, req, err
}
// QueryRange executes the given query on the given time range.
// For Prometheus type see https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
// Graphite type isn't supported.
func (c *Client) QueryRange(ctx context.Context, query string, start, end time.Time) (res Result, err error) {
if c.dataSourceType == datasourceGraphite {
return res, fmt.Errorf("%q is not supported for QueryRange", c.dataSourceType)
}
// TODO: disable range query LogsQL with time filter now
if c.dataSourceType == datasourceVLogs && !c.applyIntervalAsTimeFilter {
return res, fmt.Errorf("range query is not supported for LogsQL expression %q because it contains time filter. Remove time filter from the expression and try again", query)
}
if start.IsZero() {
return res, fmt.Errorf("start param is missing")
}
if end.IsZero() {
return res, fmt.Errorf("end param is missing")
}
req, err := c.newQueryRangeRequest(ctx, query, start, end)
if err != nil {
return res, err
}
resp, err := c.do(req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
// Return unexpected error to the caller.
return res, err
}
// Something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = c.newQueryRangeRequest(ctx, query, start, end)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
resp, err = c.do(req)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
}
// Process the received response.
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusResponse
case datasourceVLogs:
parseFn = parseVLogsResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
}
res, err = parseFn(req, resp)
_ = resp.Body.Close()
return res, err
}
func (c *Client) do(req *http.Request) (*http.Response, error) {
ru := req.URL.Redacted()
if *showDatasourceURL {
ru = req.URL.String()
}
if c.debug {
logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, ru)
}
resp, err := c.c.Do(req)
if err != nil {
return nil, fmt.Errorf("error getting response from %s: %w", ru, err)
}
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, ru, body)
}
return resp, nil
}
func (c *Client) newQueryRangeRequest(ctx context.Context, query string, start, end time.Time) (*http.Request, error) {
req, err := c.newRequest(ctx)
if err != nil {
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %w", c.datasourceURL, err)
}
switch c.dataSourceType {
case datasourcePrometheus:
c.setPrometheusRangeReqParams(req, query, start, end)
case datasourceVLogs:
c.setVLogsRangeReqParams(req, query, start, end)
default:
logger.Panicf("BUG: unsupported datasource type %q to create range query request", c.dataSourceType)
}
return req, nil
}
func (c *Client) newQueryRequest(ctx context.Context, query string, ts time.Time) (*http.Request, error) {
req, err := c.newRequest(ctx)
if err != nil {
return nil, fmt.Errorf("cannot create query request to datasource %q: %w", c.datasourceURL, err)
}
switch c.dataSourceType {
case datasourcePrometheus:
c.setPrometheusInstantReqParams(req, query, ts)
case datasourceGraphite:
c.setGraphiteReqParams(req, query)
case datasourceVLogs:
c.setVLogsInstantReqParams(req, query, ts)
default:
logger.Panicf("BUG: unsupported datasource type %q to create query request", c.dataSourceType)
}
return req, nil
}
func (c *Client) newRequest(ctx context.Context) (*http.Request, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.datasourceURL, nil)
if err != nil {
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", c.datasourceURL, err)
}
req.Header.Set("Content-Type", "application/json")
if c.authCfg != nil {
err = c.authCfg.SetHeaders(req, true)
if err != nil {
return nil, err
}
}
for _, h := range c.extraHeaders {
req.Header.Set(h.key, h.value)
}
return req, nil
}
// setReqParams adds query and other extra params for the request.
func (c *Client) setReqParams(r *http.Request, query string) {
q := r.URL.Query()
for k, vs := range c.extraParams {
if q.Has(k) { // extraParams are prior to params in URL
q.Del(k)
}
for _, v := range vs {
q.Add(k, v)
}
}
q.Set("query", query)
r.URL.RawQuery = q.Encode()
}

View file

@ -46,7 +46,7 @@ const (
graphitePrefix = "/graphite"
)
func (s *VMStorage) setGraphiteReqParams(r *http.Request, query string) {
func (s *Client) setGraphiteReqParams(r *http.Request, query string) {
if s.appendTypePrefix {
r.URL.Path += graphitePrefix
}

View file

@ -14,7 +14,7 @@ import (
)
var (
disablePathAppend = flag.Bool("remoteRead.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/query' path "+
disablePathAppend = flag.Bool("remoteRead.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/query' or '/select/logsql/stats_query' path "+
"to the configured -datasource.url and -remoteRead.url")
disableStepParam = flag.Bool("datasource.disableStepParam", false, "Whether to disable adding 'step' param to the issued instant queries. "+
"This might be useful when using vmalert with datasources that do not support 'step' param for instant queries, like Google Managed Prometheus. "+
@ -171,7 +171,7 @@ const (
func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
r := &promResponse{}
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
return res, fmt.Errorf("error parsing prometheus metrics for %s: %w", req.URL.Redacted(), err)
return res, fmt.Errorf("error parsing response from %s: %w", req.URL.Redacted(), err)
}
if r.Status == statusError {
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
@ -218,7 +218,7 @@ func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result
return res, nil
}
func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
func (s *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
if s.appendTypePrefix {
r.URL.Path += "/prometheus"
}
@ -238,10 +238,10 @@ func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string,
q.Set("step", fmt.Sprintf("%ds", int(s.queryStep.Seconds())))
}
r.URL.RawQuery = q.Encode()
s.setPrometheusReqParams(r, query)
s.setReqParams(r, query)
}
func (s *VMStorage) setPrometheusRangeReqParams(r *http.Request, query string, start, end time.Time) {
func (s *Client) setPrometheusRangeReqParams(r *http.Request, query string, start, end time.Time) {
if s.appendTypePrefix {
r.URL.Path += "/prometheus"
}
@ -257,19 +257,5 @@ func (s *VMStorage) setPrometheusRangeReqParams(r *http.Request, query string, s
q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds())))
}
r.URL.RawQuery = q.Encode()
s.setPrometheusReqParams(r, query)
}
func (s *VMStorage) setPrometheusReqParams(r *http.Request, query string) {
q := r.URL.Query()
for k, vs := range s.extraParams {
if q.Has(k) { // extraParams are prior to params in URL
q.Del(k)
}
for _, v := range vs {
q.Add(k, v)
}
}
q.Set("query", query)
r.URL.RawQuery = q.Encode()
s.setReqParams(r, query)
}

View file

@ -24,8 +24,10 @@ var (
Username: basicAuthName,
Password: promauth.NewSecret(basicAuthPass),
}
query = "vm_rows"
vmQuery = "vm_rows"
queryRender = "constantLine(10)"
vlogsQuery = "_time: 5m | stats by (foo) count() total"
vlogsRangeQuery = "* | stats by (foo) count() total"
)
func TestVMInstantQuery(t *testing.T) {
@ -42,8 +44,8 @@ func TestVMInstantQuery(t *testing.T) {
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
}
if r.URL.Query().Get("query") != query {
t.Fatalf("expected %s in query param, got %s", query, r.URL.Query().Get("query"))
if r.URL.Query().Get("query") != vmQuery {
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
}
timeParam := r.URL.Query().Get("time")
if timeParam == "" {
@ -78,6 +80,31 @@ func TestVMInstantQuery(t *testing.T) {
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
}
})
mux.HandleFunc("/select/logsql/stats_query", func(w http.ResponseWriter, r *http.Request) {
c++
if r.Method != http.MethodPost {
t.Fatalf("expected POST method got %s", r.Method)
}
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
}
if r.URL.Query().Get("query") != vlogsQuery {
t.Fatalf("expected %s in query param, got %s", vlogsQuery, r.URL.Query().Get("query"))
}
timeParam := r.URL.Query().Get("time")
if timeParam == "" {
t.Fatalf("expected 'time' in query param, got nil instead")
}
if _, err := time.Parse(time.RFC3339, timeParam); err != nil {
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
}
switch c {
case 9:
w.Write([]byte("[]"))
case 10:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
}
})
srv := httptest.NewServer(mux)
defer srv.Close()
@ -86,13 +113,13 @@ func TestVMInstantQuery(t *testing.T) {
if err != nil {
t.Fatalf("unexpected: %s", err)
}
s := NewVMStorage(srv.URL, authCfg, 0, false, srv.Client())
s := NewPrometheusClient(srv.URL, authCfg, false, srv.Client())
p := datasourcePrometheus
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(p), EvaluationInterval: 15 * time.Second})
ts := time.Now()
expErr := func(err string) {
expErr := func(query, err string) {
_, _, gotErr := pq.Query(ctx, query, ts)
if gotErr == nil {
t.Fatalf("expected %q got nil", err)
@ -102,13 +129,13 @@ func TestVMInstantQuery(t *testing.T) {
}
}
expErr("500") // 0
expErr("error parsing prometheus metrics") // 1
expErr("response error") // 2
expErr("unknown status") // 3
expErr("unexpected end of JSON input") // 4
expErr(vmQuery, "500") // 0
expErr(vmQuery, "error parsing response") // 1
expErr(vmQuery, "response error") // 2
expErr(vmQuery, "unknown status") // 3
expErr(vmQuery, "unexpected end of JSON input") // 4
res, _, err := pq.Query(ctx, query, ts) // 5 - vector
res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
if err != nil {
t.Fatalf("unexpected %s", err)
}
@ -129,7 +156,7 @@ func TestVMInstantQuery(t *testing.T) {
}
metricsEqual(t, res.Data, expected)
res, req, err := pq.Query(ctx, query, ts) // 6 - scalar
res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
if err != nil {
t.Fatalf("unexpected %s", err)
}
@ -154,7 +181,7 @@ func TestVMInstantQuery(t *testing.T) {
res.SeriesFetched)
}
res, _, err = pq.Query(ctx, query, ts) // 7 - scalar with stats
res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
if err != nil {
t.Fatalf("unexpected %s", err)
}
@ -175,6 +202,7 @@ func TestVMInstantQuery(t *testing.T) {
*res.SeriesFetched)
}
// test graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
res, _, err = gq.Query(ctx, queryRender, ts) // 8 - graphite
@ -192,6 +220,33 @@ func TestVMInstantQuery(t *testing.T) {
},
}
metricsEqual(t, res.Data, exp)
// test victorialogs
vlogs := datasourceVLogs
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
expErr(vlogsQuery, "error parsing response") // 9
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 10
if err != nil {
t.Fatalf("unexpected %s", err)
}
if len(res.Data) != 2 {
t.Fatalf("expected 2 metrics got %d in %+v", len(res.Data), res.Data)
}
expected = []Metric{
{
Labels: []Label{{Value: "total", Name: "stats_result"}, {Value: "bar", Name: "foo"}},
Timestamps: []int64{1583786142},
Values: []float64{13763},
},
{
Labels: []Label{{Value: "total", Name: "stats_result"}, {Value: "baz", Name: "foo"}},
Timestamps: []int64{1583786140},
Values: []float64{2000},
},
}
metricsEqual(t, res.Data, expected)
}
func TestVMInstantQueryWithRetry(t *testing.T) {
@ -202,8 +257,8 @@ func TestVMInstantQueryWithRetry(t *testing.T) {
c := -1
mux.HandleFunc("/api/v1/query", func(w http.ResponseWriter, r *http.Request) {
c++
if r.URL.Query().Get("query") != query {
t.Fatalf("expected %s in query param, got %s", query, r.URL.Query().Get("query"))
if r.URL.Query().Get("query") != vmQuery {
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
}
switch c {
case 0:
@ -225,11 +280,11 @@ func TestVMInstantQueryWithRetry(t *testing.T) {
srv := httptest.NewServer(mux)
defer srv.Close()
s := NewVMStorage(srv.URL, nil, 0, false, srv.Client())
s := NewPrometheusClient(srv.URL, nil, false, srv.Client())
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourcePrometheus)})
expErr := func(err string) {
_, _, gotErr := pq.Query(ctx, query, time.Now())
_, _, gotErr := pq.Query(ctx, vmQuery, time.Now())
if gotErr == nil {
t.Fatalf("expected %q got nil", err)
}
@ -239,7 +294,7 @@ func TestVMInstantQueryWithRetry(t *testing.T) {
}
expValue := func(v float64) {
res, _, err := pq.Query(ctx, query, time.Now())
res, _, err := pq.Query(ctx, vmQuery, time.Now())
if err != nil {
t.Fatalf("unexpected %s", err)
}
@ -300,8 +355,8 @@ func TestVMRangeQuery(t *testing.T) {
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
}
if r.URL.Query().Get("query") != query {
t.Fatalf("expected %s in query param, got %s", query, r.URL.Query().Get("query"))
if r.URL.Query().Get("query") != vmQuery {
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
}
startTS := r.URL.Query().Get("start")
if startTS == "" {
@ -326,6 +381,40 @@ func TestVMRangeQuery(t *testing.T) {
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
}
})
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
c++
if r.Method != http.MethodPost {
t.Fatalf("expected POST method got %s", r.Method)
}
if name, pass, _ := r.BasicAuth(); name != basicAuthName || pass != basicAuthPass {
t.Fatalf("expected %s:%s as basic auth got %s:%s", basicAuthName, basicAuthPass, name, pass)
}
if r.URL.Query().Get("query") != vlogsRangeQuery {
t.Fatalf("expected %s in query param, got %s", vmQuery, r.URL.Query().Get("query"))
}
startTS := r.URL.Query().Get("start")
if startTS == "" {
t.Fatalf("expected 'start' in query param, got nil instead")
}
if _, err := time.Parse(time.RFC3339, startTS); err != nil {
t.Fatalf("failed to parse 'start' query param: %s", err)
}
endTS := r.URL.Query().Get("end")
if endTS == "" {
t.Fatalf("expected 'end' in query param, got nil instead")
}
if _, err := time.Parse(time.RFC3339, endTS); err != nil {
t.Fatalf("failed to parse 'end' query param: %s", err)
}
step := r.URL.Query().Get("step")
if step != "60s" {
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
}
switch c {
case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
}
})
srv := httptest.NewServer(mux)
defer srv.Close()
@ -334,19 +423,19 @@ func TestVMRangeQuery(t *testing.T) {
if err != nil {
t.Fatalf("unexpected: %s", err)
}
s := NewVMStorage(srv.URL, authCfg, *queryStep, false, srv.Client())
s := NewPrometheusClient(srv.URL, authCfg, false, srv.Client())
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourcePrometheus), EvaluationInterval: 15 * time.Second})
_, err = pq.QueryRange(ctx, query, time.Now(), time.Time{})
_, err = pq.QueryRange(ctx, vmQuery, time.Now(), time.Time{})
expectError(t, err, "is missing")
_, err = pq.QueryRange(ctx, query, time.Time{}, time.Now())
_, err = pq.QueryRange(ctx, vmQuery, time.Time{}, time.Now())
expectError(t, err, "is missing")
start, end := time.Now().Add(-time.Minute), time.Now()
res, err := pq.QueryRange(ctx, query, start, end)
res, err := pq.QueryRange(ctx, vmQuery, start, end)
if err != nil {
t.Fatalf("unexpected %s", err)
}
@ -363,33 +452,66 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
}
// test unsupported graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
_, err = gq.QueryRange(ctx, queryRender, start, end)
expectError(t, err, "is not supported")
// unsupported logsql
gq = s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceVLogs), EvaluationInterval: 60 * time.Second})
res, err = gq.QueryRange(ctx, vlogsRangeQuery, start, end)
expectError(t, err, "is not supported")
// supported logsql
gq = s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceVLogs), EvaluationInterval: 60 * time.Second, ApplyIntervalAsTimeFilter: true})
res, err = gq.QueryRange(ctx, vlogsRangeQuery, start, end)
if err != nil {
t.Fatalf("unexpected %s", err)
}
m = res.Data
if len(m) != 1 {
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
}
expected = Metric{
Labels: []Label{{Value: "total", Name: "stats_result"}},
Timestamps: []int64{1583786142},
Values: []float64{10},
}
if !reflect.DeepEqual(m[0], expected) {
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
}
}
func TestRequestParams(t *testing.T) {
query := "up"
vlogsQuery := "_time: 5m | stats count() total"
timestamp := time.Date(2001, 2, 3, 4, 5, 6, 0, time.UTC)
f := func(isQueryRange bool, vm *VMStorage, checkFn func(t *testing.T, r *http.Request)) {
f := func(isQueryRange bool, c *Client, checkFn func(t *testing.T, r *http.Request)) {
t.Helper()
req, err := vm.newRequest(ctx)
req, err := c.newRequest(ctx)
if err != nil {
t.Fatalf("error in newRequest: %s", err)
}
switch vm.dataSourceType {
case "", datasourcePrometheus:
switch c.dataSourceType {
case datasourcePrometheus:
if isQueryRange {
vm.setPrometheusRangeReqParams(req, query, timestamp, timestamp)
c.setPrometheusRangeReqParams(req, query, timestamp, timestamp)
} else {
vm.setPrometheusInstantReqParams(req, query, timestamp)
c.setPrometheusInstantReqParams(req, query, timestamp)
}
case datasourceGraphite:
vm.setGraphiteReqParams(req, query)
c.setGraphiteReqParams(req, query)
case datasourceVLogs:
if isQueryRange {
c.setVLogsRangeReqParams(req, vlogsQuery, timestamp, timestamp)
} else {
c.setVLogsInstantReqParams(req, vlogsQuery, timestamp)
}
}
checkFn(t, req)
@ -399,19 +521,19 @@ func TestRequestParams(t *testing.T) {
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
storage := VMStorage{
storage := Client{
extraParams: url.Values{"round_digits": {"10"}},
}
// prometheus path
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourcePrometheus,
}, func(t *testing.T, r *http.Request) {
checkEqualString(t, "/api/v1/query", r.URL.Path)
})
// prometheus prefix
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourcePrometheus,
appendTypePrefix: true,
}, func(t *testing.T, r *http.Request) {
@ -419,14 +541,14 @@ func TestRequestParams(t *testing.T) {
})
// prometheus range path
f(true, &VMStorage{
f(true, &Client{
dataSourceType: datasourcePrometheus,
}, func(t *testing.T, r *http.Request) {
checkEqualString(t, "/api/v1/query_range", r.URL.Path)
})
// prometheus range prefix
f(true, &VMStorage{
f(true, &Client{
dataSourceType: datasourcePrometheus,
appendTypePrefix: true,
}, func(t *testing.T, r *http.Request) {
@ -434,14 +556,14 @@ func TestRequestParams(t *testing.T) {
})
// graphite path
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourceGraphite,
}, func(t *testing.T, r *http.Request) {
checkEqualString(t, graphitePath, r.URL.Path)
})
// graphite prefix
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourceGraphite,
appendTypePrefix: true,
}, func(t *testing.T, r *http.Request) {
@ -449,20 +571,26 @@ func TestRequestParams(t *testing.T) {
})
// default params
f(false, &VMStorage{}, func(t *testing.T, r *http.Request) {
f(false, &Client{dataSourceType: datasourcePrometheus}, func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {query}, "time": {timestamp.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
f(false, &Client{dataSourceType: datasourcePrometheus, applyIntervalAsTimeFilter: true}, func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {query}, "time": {timestamp.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
// default range params
f(true, &VMStorage{}, func(t *testing.T, r *http.Request) {
f(true, &Client{dataSourceType: datasourcePrometheus}, func(t *testing.T, r *http.Request) {
ts := timestamp.Format(time.RFC3339)
exp := url.Values{"query": {query}, "start": {ts}, "end": {ts}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
// basic auth
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourcePrometheus,
authCfg: authCfg,
}, func(t *testing.T, r *http.Request) {
u, p, _ := r.BasicAuth()
@ -471,7 +599,8 @@ func TestRequestParams(t *testing.T) {
})
// basic auth range
f(true, &VMStorage{
f(true, &Client{
dataSourceType: datasourcePrometheus,
authCfg: authCfg,
}, func(t *testing.T, r *http.Request) {
u, p, _ := r.BasicAuth()
@ -480,7 +609,8 @@ func TestRequestParams(t *testing.T) {
})
// evaluation interval
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourcePrometheus,
evaluationInterval: 15 * time.Second,
}, func(t *testing.T, r *http.Request) {
evalInterval := 15 * time.Second
@ -489,7 +619,8 @@ func TestRequestParams(t *testing.T) {
})
// step override
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourcePrometheus,
queryStep: time.Minute,
}, func(t *testing.T, r *http.Request) {
exp := url.Values{
@ -501,7 +632,8 @@ func TestRequestParams(t *testing.T) {
})
// step to seconds
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourcePrometheus,
evaluationInterval: 3 * time.Hour,
}, func(t *testing.T, r *http.Request) {
evalInterval := 3 * time.Hour
@ -510,7 +642,8 @@ func TestRequestParams(t *testing.T) {
})
// prometheus extra params
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourcePrometheus,
extraParams: url.Values{"round_digits": {"10"}},
}, func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {query}, "round_digits": {"10"}, "time": {timestamp.Format(time.RFC3339)}}
@ -518,7 +651,8 @@ func TestRequestParams(t *testing.T) {
})
// prometheus extra params range
f(true, &VMStorage{
f(true, &Client{
dataSourceType: datasourcePrometheus,
extraParams: url.Values{
"nocache": {"1"},
"max_lookback": {"1h"},
@ -536,6 +670,7 @@ func TestRequestParams(t *testing.T) {
// custom params overrides the original params
f(false, storage.Clone().ApplyParams(QuerierParams{
DataSourceType: string(datasourcePrometheus),
QueryParams: url.Values{"round_digits": {"2"}},
}), func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {query}, "round_digits": {"2"}, "time": {timestamp.Format(time.RFC3339)}}
@ -544,6 +679,7 @@ func TestRequestParams(t *testing.T) {
// allow duplicates in query params
f(false, storage.Clone().ApplyParams(QuerierParams{
DataSourceType: string(datasourcePrometheus),
QueryParams: url.Values{"extra_labels": {"env=dev", "foo=bar"}},
}), func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {query}, "round_digits": {"10"}, "extra_labels": {"env=dev", "foo=bar"}, "time": {timestamp.Format(time.RFC3339)}}
@ -551,7 +687,7 @@ func TestRequestParams(t *testing.T) {
})
// graphite extra params
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourceGraphite,
extraParams: url.Values{
"nocache": {"1"},
@ -563,7 +699,7 @@ func TestRequestParams(t *testing.T) {
})
// graphite extra params allows to override from
f(false, &VMStorage{
f(false, &Client{
dataSourceType: datasourceGraphite,
extraParams: url.Values{
"from": {"-10m"},
@ -572,10 +708,38 @@ func TestRequestParams(t *testing.T) {
exp := fmt.Sprintf("format=json&from=-10m&target=%s&until=now", query)
checkEqualString(t, exp, r.URL.RawQuery)
})
// test vlogs
f(false, &Client{
dataSourceType: datasourceVLogs,
evaluationInterval: time.Minute,
}, func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {vlogsQuery}, "time": {timestamp.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
f(false, &Client{
dataSourceType: datasourceVLogs,
evaluationInterval: time.Minute,
applyIntervalAsTimeFilter: true,
}, func(t *testing.T, r *http.Request) {
ts := timestamp.Format(time.RFC3339)
exp := url.Values{"query": {vlogsQuery}, "time": {ts}, "start": {timestamp.Add(-time.Minute).Format(time.RFC3339)}, "end": {ts}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
f(true, &Client{
dataSourceType: datasourceVLogs,
evaluationInterval: time.Minute,
}, func(t *testing.T, r *http.Request) {
ts := timestamp.Format(time.RFC3339)
exp := url.Values{"query": {vlogsQuery}, "start": {ts}, "end": {ts}, "step": {"60s"}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
})
}
func TestHeaders(t *testing.T) {
f := func(vmFn func() *VMStorage, checkFn func(t *testing.T, r *http.Request)) {
f := func(vmFn func() *Client, checkFn func(t *testing.T, r *http.Request)) {
t.Helper()
vm := vmFn()
@ -587,12 +751,12 @@ func TestHeaders(t *testing.T) {
}
// basic auth
f(func() *VMStorage {
f(func() *Client {
cfg, err := utils.AuthConfig(utils.WithBasicAuth("foo", "bar", ""))
if err != nil {
t.Fatalf("Error get auth config: %s", err)
}
return &VMStorage{authCfg: cfg}
return NewPrometheusClient("", cfg, false, nil)
}, func(t *testing.T, r *http.Request) {
u, p, _ := r.BasicAuth()
checkEqualString(t, "foo", u)
@ -600,12 +764,12 @@ func TestHeaders(t *testing.T) {
})
// bearer auth
f(func() *VMStorage {
f(func() *Client {
cfg, err := utils.AuthConfig(utils.WithBearer("foo", ""))
if err != nil {
t.Fatalf("Error get auth config: %s", err)
}
return &VMStorage{authCfg: cfg}
return NewPrometheusClient("", cfg, false, nil)
}, func(t *testing.T, r *http.Request) {
reqToken := r.Header.Get("Authorization")
splitToken := strings.Split(reqToken, "Bearer ")
@ -617,11 +781,13 @@ func TestHeaders(t *testing.T) {
})
// custom extraHeaders
f(func() *VMStorage {
return &VMStorage{extraHeaders: []keyValue{
f(func() *Client {
c := NewPrometheusClient("", nil, false, nil)
c.extraHeaders = []keyValue{
{key: "Foo", value: "bar"},
{key: "Baz", value: "qux"},
}}
}
return c
}, func(t *testing.T, r *http.Request) {
h1 := r.Header.Get("Foo")
checkEqualString(t, "bar", h1)
@ -630,17 +796,16 @@ func TestHeaders(t *testing.T) {
})
// custom header overrides basic auth
f(func() *VMStorage {
f(func() *Client {
cfg, err := utils.AuthConfig(utils.WithBasicAuth("foo", "bar", ""))
if err != nil {
t.Fatalf("Error get auth config: %s", err)
}
return &VMStorage{
authCfg: cfg,
extraHeaders: []keyValue{
c := NewPrometheusClient("", cfg, false, nil)
c.extraHeaders = []keyValue{
{key: "Authorization", value: "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="},
},
}
return c
}, func(t *testing.T, r *http.Request) {
u, p, _ := r.BasicAuth()
checkEqualString(t, "Aladdin", u)

View file

@ -0,0 +1,61 @@
package datasource
import (
"fmt"
"net/http"
"time"
)
func (s *Client) setVLogsInstantReqParams(r *http.Request, query string, timestamp time.Time) {
// there is no type path prefix in victorialogs APIs right now, ignore appendTypePrefix.
if !*disablePathAppend {
r.URL.Path += "/select/logsql/stats_query"
}
q := r.URL.Query()
// set `time` param explicitly, it will be used as the timestamp of query results.
q.Set("time", timestamp.Format(time.RFC3339))
// set the `start` and `end` params if applyIntervalAsTimeFilter is enabled(time filter is missing in the rule expr),
// so the query will be executed in time range [timestamp - evaluationInterval, timestamp].
if s.applyIntervalAsTimeFilter && s.evaluationInterval > 0 {
q.Set("start", timestamp.Add(-s.evaluationInterval).Format(time.RFC3339))
q.Set("end", timestamp.Format(time.RFC3339))
}
r.URL.RawQuery = q.Encode()
s.setReqParams(r, query)
}
func (s *Client) setVLogsRangeReqParams(r *http.Request, query string, start, end time.Time) {
// there is no type path prefix in victorialogs APIs right now, ignore appendTypePrefix.
if !*disablePathAppend {
r.URL.Path += "/select/logsql/stats_query_range"
}
q := r.URL.Query()
q.Add("start", start.Format(time.RFC3339))
q.Add("end", end.Format(time.RFC3339))
// set step as evaluationInterval by default
if s.evaluationInterval > 0 {
q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds())))
}
r.URL.RawQuery = q.Encode()
s.setReqParams(r, query)
}
func parseVLogsResponse(req *http.Request, resp *http.Response) (res Result, err error) {
res, err = parsePrometheusResponse(req, resp)
if err != nil {
return Result{}, err
}
for i := range res.Data {
m := &res.Data[i]
for j := range m.Labels {
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
// since there could be multiple stats results in a single query, for instance:
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
if m.Labels[j].Name == "__name__" {
m.Labels[j].Name = "stats_result"
break
}
}
}
return
}

View file

@ -43,6 +43,10 @@ type QuerierBuilder interface {
// QuerierParams params for Querier.
type QuerierParams struct {
DataSourceType string
// ApplyIntervalAsTimeFilter is only valid for vlogs datasource.
// Set to true if there is no [timeFilter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) in the rule expression,
// and we will add evaluation interval as an additional timeFilter when querying.
ApplyIntervalAsTimeFilter bool
EvaluationInterval time.Duration
QueryParams url.Values
Headers map[string]string

View file

@ -133,13 +133,12 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
return nil, fmt.Errorf("failed to set request auth header to datasource %q: %w", *addr, err)
}
return &VMStorage{
return &Client{
c: &http.Client{Transport: tr},
authCfg: authCfg,
datasourceURL: strings.TrimSuffix(*addr, "/"),
appendTypePrefix: *appendTypePrefix,
queryStep: *queryStep,
dataSourceType: datasourcePrometheus,
extraParams: extraParams,
}, nil
}

View file

@ -1,272 +0,0 @@
package datasource
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
)
type datasourceType string
const (
datasourcePrometheus datasourceType = "prometheus"
datasourceGraphite datasourceType = "graphite"
)
func toDatasourceType(s string) datasourceType {
if s == string(datasourceGraphite) {
return datasourceGraphite
}
return datasourcePrometheus
}
// VMStorage represents vmstorage entity with ability to read and write metrics
// WARN: when adding a new field, remember to update Clone() method.
type VMStorage struct {
c *http.Client
authCfg *promauth.Config
datasourceURL string
appendTypePrefix bool
queryStep time.Duration
dataSourceType datasourceType
// evaluationInterval will help setting request's `step` param.
evaluationInterval time.Duration
// extraParams contains params to be attached to each HTTP request
extraParams url.Values
// extraHeaders are headers to be attached to each HTTP request
extraHeaders []keyValue
// whether to print additional log messages
// for each sent request
debug bool
}
type keyValue struct {
key string
value string
}
// Clone makes clone of VMStorage, shares http client.
func (s *VMStorage) Clone() *VMStorage {
ns := &VMStorage{
c: s.c,
authCfg: s.authCfg,
datasourceURL: s.datasourceURL,
appendTypePrefix: s.appendTypePrefix,
queryStep: s.queryStep,
dataSourceType: s.dataSourceType,
evaluationInterval: s.evaluationInterval,
// init map so it can be populated below
extraParams: url.Values{},
debug: s.debug,
}
if len(s.extraHeaders) > 0 {
ns.extraHeaders = make([]keyValue, len(s.extraHeaders))
copy(ns.extraHeaders, s.extraHeaders)
}
for k, v := range s.extraParams {
ns.extraParams[k] = v
}
return ns
}
// ApplyParams - changes given querier params.
func (s *VMStorage) ApplyParams(params QuerierParams) *VMStorage {
s.dataSourceType = toDatasourceType(params.DataSourceType)
s.evaluationInterval = params.EvaluationInterval
if params.QueryParams != nil {
if s.extraParams == nil {
s.extraParams = url.Values{}
}
for k, vl := range params.QueryParams {
// custom query params are prior to default ones
if s.extraParams.Has(k) {
s.extraParams.Del(k)
}
for _, v := range vl {
// don't use .Set() instead of Del/Add since it is allowed
// for GET params to be duplicated
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4908
s.extraParams.Add(k, v)
}
}
}
if params.Headers != nil {
for key, value := range params.Headers {
kv := keyValue{key: key, value: value}
s.extraHeaders = append(s.extraHeaders, kv)
}
}
s.debug = params.Debug
return s
}
// BuildWithParams - implements interface.
func (s *VMStorage) BuildWithParams(params QuerierParams) Querier {
return s.Clone().ApplyParams(params)
}
// NewVMStorage is a constructor for VMStorage
func NewVMStorage(baseURL string, authCfg *promauth.Config, queryStep time.Duration, appendTypePrefix bool, c *http.Client) *VMStorage {
return &VMStorage{
c: c,
authCfg: authCfg,
datasourceURL: strings.TrimSuffix(baseURL, "/"),
appendTypePrefix: appendTypePrefix,
queryStep: queryStep,
dataSourceType: datasourcePrometheus,
extraParams: url.Values{},
}
}
// Query executes the given query and returns parsed response
func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error) {
req, err := s.newQueryRequest(ctx, query, ts)
if err != nil {
return Result{}, nil, err
}
resp, err := s.do(req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
// Return unexpected error to the caller.
return Result{}, nil, err
}
// Something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = s.newQueryRequest(ctx, query, ts)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
resp, err = s.do(req)
if err != nil {
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
}
// Process the received response.
parseFn := parsePrometheusResponse
if s.dataSourceType != datasourcePrometheus {
parseFn = parseGraphiteResponse
}
result, err := parseFn(req, resp)
_ = resp.Body.Close()
return result, req, err
}
// QueryRange executes the given query on the given time range.
// For Prometheus type see https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
// Graphite type isn't supported.
func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end time.Time) (res Result, err error) {
if s.dataSourceType != datasourcePrometheus {
return res, fmt.Errorf("%q is not supported for QueryRange", s.dataSourceType)
}
if start.IsZero() {
return res, fmt.Errorf("start param is missing")
}
if end.IsZero() {
return res, fmt.Errorf("end param is missing")
}
req, err := s.newQueryRangeRequest(ctx, query, start, end)
if err != nil {
return res, err
}
resp, err := s.do(req)
if err != nil {
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) && !netutil.IsTrivialNetworkError(err) {
// Return unexpected error to the caller.
return res, err
}
// Something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, err = s.newQueryRangeRequest(ctx, query, start, end)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
resp, err = s.do(req)
if err != nil {
return res, fmt.Errorf("second attempt: %w", err)
}
}
// Process the received response.
res, err = parsePrometheusResponse(req, resp)
_ = resp.Body.Close()
return res, err
}
func (s *VMStorage) do(req *http.Request) (*http.Response, error) {
ru := req.URL.Redacted()
if *showDatasourceURL {
ru = req.URL.String()
}
if s.debug {
logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, ru)
}
resp, err := s.c.Do(req)
if err != nil {
return nil, fmt.Errorf("error getting response from %s: %w", ru, err)
}
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, ru, body)
}
return resp, nil
}
func (s *VMStorage) newQueryRangeRequest(ctx context.Context, query string, start, end time.Time) (*http.Request, error) {
req, err := s.newRequest(ctx)
if err != nil {
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %w", s.datasourceURL, err)
}
s.setPrometheusRangeReqParams(req, query, start, end)
return req, nil
}
func (s *VMStorage) newQueryRequest(ctx context.Context, query string, ts time.Time) (*http.Request, error) {
req, err := s.newRequest(ctx)
if err != nil {
return nil, fmt.Errorf("cannot create query request to datasource %q: %w", s.datasourceURL, err)
}
switch s.dataSourceType {
case "", datasourcePrometheus:
s.setPrometheusInstantReqParams(req, query, ts)
case datasourceGraphite:
s.setGraphiteReqParams(req, query)
default:
logger.Panicf("BUG: engine not found: %q", s.dataSourceType)
}
return req, nil
}
func (s *VMStorage) newRequest(ctx context.Context) (*http.Request, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, s.datasourceURL, nil)
if err != nil {
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", s.datasourceURL, err)
}
req.Header.Set("Content-Type", "application/json")
if s.authCfg != nil {
err = s.authCfg.SetHeaders(req, true)
if err != nil {
return nil, err
}
}
for _, h := range s.extraHeaders {
req.Header.Set(h.key, h.value)
}
return req, nil
}

View file

@ -66,7 +66,7 @@ absolute path to all .tpl files in root.
evaluationInterval = flag.Duration("evaluationInterval", time.Minute, "How often to evaluate the rules")
validateTemplates = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions for different types.")
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier. By default, hostname is used as address.")
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager `+

View file

@ -86,5 +86,5 @@ func Init() (datasource.QuerierBuilder, error) {
return nil, fmt.Errorf("failed to configure auth: %w", err)
}
c := &http.Client{Transport: tr}
return datasource.NewVMStorage(*addr, authCfg, 0, false, c), nil
return datasource.NewPrometheusClient(*addr, authCfg, false, c), nil
}

View file

@ -15,6 +15,7 @@ import (
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
@ -22,11 +23,12 @@ import (
"github.com/VictoriaMetrics/metrics"
)
var defaultConcurrency = cgroup.AvailableCPUs() * 2
const (
defaultConcurrency = 4
defaultMaxBatchSize = 1e3
defaultMaxQueueSize = 1e5
defaultFlushInterval = 5 * time.Second
defaultMaxBatchSize = 1e4
defaultMaxQueueSize = 1e6
defaultFlushInterval = 2 * time.Second
defaultWriteTimeout = 30 * time.Second
)

View file

@ -34,10 +34,10 @@ var (
idleConnectionTimeout = flag.Duration("remoteWrite.idleConnTimeout", 50*time.Second, `Defines a duration for idle (keep-alive connections) to exist. Consider settings this value less to the value of "-http.idleConnTimeout". It must prevent possible "write: broken pipe" and "read: connection reset by peer" errors.`)
maxQueueSize = flag.Int("remoteWrite.maxQueueSize", 1e6, "Defines the max number of pending datapoints to remote write endpoint")
maxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e4, "Defines max number of timeseries to be flushed at once")
concurrency = flag.Int("remoteWrite.concurrency", 4, "Defines number of writers for concurrent writing into remote write endpoint")
flushInterval = flag.Duration("remoteWrite.flushInterval", 5*time.Second, "Defines interval of flushes to remote write endpoint")
maxQueueSize = flag.Int("remoteWrite.maxQueueSize", defaultMaxQueueSize, "Defines the max number of pending datapoints to remote write endpoint")
maxBatchSize = flag.Int("remoteWrite.maxBatchSize", defaultMaxBatchSize, "Defines max number of timeseries to be flushed at once")
concurrency = flag.Int("remoteWrite.concurrency", defaultConcurrency, "Defines number of writers for concurrent writing into remote write endpoint")
flushInterval = flag.Duration("remoteWrite.flushInterval", defaultFlushInterval, "Defines interval of flushes to remote write endpoint")
tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
tlsCertFile = flag.String("remoteWrite.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url")

View file

@ -73,6 +73,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
Debug: cfg.Debug,
q: qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: group.Type.String(),
ApplyIntervalAsTimeFilter: setIntervalAsTimeFilter(group.Type.String(), cfg.Expr),
EvaluationInterval: group.Interval,
QueryParams: group.Params,
Headers: group.Headers,

View file

@ -213,7 +213,6 @@ func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
continue
}
q := qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: g.Type.String(),
EvaluationInterval: g.Interval,
QueryParams: g.Params,
Headers: g.Headers,

View file

@ -10,6 +10,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
@ -65,6 +66,7 @@ func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
metrics: &recordingRuleMetrics{},
q: qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: group.Type.String(),
ApplyIntervalAsTimeFilter: setIntervalAsTimeFilter(group.Type.String(), cfg.Expr),
EvaluationInterval: group.Interval,
QueryParams: group.Params,
Headers: group.Headers,
@ -213,3 +215,12 @@ func (rr *RecordingRule) updateWith(r Rule) error {
rr.q = nr.q
return nil
}
// setIntervalAsTimeFilter returns true if given LogsQL has a time filter.
func setIntervalAsTimeFilter(dType, expr string) bool {
if dType != "vlogs" {
return false
}
q, _ := logstorage.ParseStatsQuery(expr)
return !q.ContainAnyTimeFilter()
}

View file

@ -266,3 +266,25 @@ func TestRecordingRuleExec_Negative(t *testing.T) {
t.Fatalf("cannot execute recroding rule: %s", err)
}
}
func TestSetIntervalAsTimeFilter(t *testing.T) {
f := func(s, dType string, expected bool) {
t.Helper()
if setIntervalAsTimeFilter(dType, s) != expected {
t.Fatalf("unexpected result for hasTimeFilter(%q); want %v", s, expected)
}
}
f(`* | count()`, "prometheus", false)
f(`* | count()`, "vlogs", true)
f(`error OR _time:5m | count()`, "vlogs", true)
f(`(_time: 5m AND error) OR (_time: 5m AND warn) | count()`, "vlogs", true)
f(`* | error OR _time:5m | count()`, "vlogs", true)
f(`_time:5m | count()`, "vlogs", false)
f(`_time:2023-04-25T22:45:59Z | count()`, "vlogs", false)
f(`error AND _time:5m | count()`, "vlogs", false)
f(`* | error AND _time:5m | count()`, "vlogs", false)
}

View file

@ -217,6 +217,8 @@ func recordingToAPI(rr *rule.RecordingRule) apiRule {
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", rr.ID()),
GroupID: fmt.Sprintf("%d", rr.GroupID),
GroupName: rr.GroupName,
File: rr.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()

View file

@ -1,9 +1,56 @@
package main
import (
"fmt"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"reflect"
"testing"
)
func TestRecordingToApi(t *testing.T) {
fq := &datasource.FakeQuerier{}
fq.Add(datasource.Metric{
Values: []float64{1}, Timestamps: []int64{0},
})
g := &rule.Group{
Name: "group",
File: "rules.yaml",
Concurrency: 1,
}
entriesLimit := 44
rr := rule.NewRecordingRule(fq, g, config.Rule{
ID: 1248,
Record: "record_name",
Expr: "up",
Labels: map[string]string{"label": "value"},
UpdateEntriesLimit: &entriesLimit,
})
expectedRes := apiRule{
Name: "record_name",
Query: "up",
Labels: map[string]string{"label": "value"},
Health: "ok",
Type: ruleTypeRecording,
DatasourceType: "prometheus",
ID: "1248",
GroupID: fmt.Sprintf("%d", g.ID()),
GroupName: "group",
File: "rules.yaml",
MaxUpdates: 44,
Updates: make([]rule.StateEntry, 0),
}
res := recordingToAPI(rr)
if !reflect.DeepEqual(res, expectedRes) {
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expectedRes, res)
}
}
func TestUrlValuesToStrings(t *testing.T) {
mapQueryParams := map[string][]string{
"param1": {"param1"},

View file

@ -120,7 +120,7 @@ func (p *vmNativeProcessor) runSingle(ctx context.Context, f native.Filter, srcU
if p.disablePerMetricRequests {
pr := bar.NewProxyReader(reader)
if pr != nil {
reader = bar.NewProxyReader(reader)
reader = pr
fmt.Printf("Continue import process with filter %s:\n", f.String())
}
}
@ -193,7 +193,15 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
var metrics = map[string][][]time.Time{
"": ranges,
}
format := nativeSingleProcessTpl
barPrefix := "Requests to make"
if p.interCluster {
barPrefix = fmt.Sprintf("Requests to make for tenant %s", tenantID)
}
if !p.disablePerMetricRequests {
format = fmt.Sprintf(nativeWithBackoffTpl, barPrefix)
metrics, err = p.explore(ctx, p.src, tenantID, ranges)
if err != nil {
return fmt.Errorf("failed to explore metric names: %s", err)
@ -223,15 +231,7 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
log.Print(foundSeriesMsg)
}
barPrefix := "Requests to make"
if p.interCluster {
barPrefix = fmt.Sprintf("Requests to make for tenant %s", tenantID)
}
bar := barpool.NewSingleProgress(fmt.Sprintf(nativeWithBackoffTpl, barPrefix), requestsToMake)
if p.disablePerMetricRequests {
bar = barpool.NewSingleProgress(nativeSingleProcessTpl, 0)
}
bar := barpool.NewSingleProgress(format, requestsToMake)
bar.Start()
defer bar.Finish()
@ -362,16 +362,17 @@ func byteCountSI(b int64) string {
}
func buildMatchWithFilter(filter string, metricName string) (string, error) {
if filter == metricName {
return filter, nil
}
nameFilter := fmt.Sprintf("__name__=%q", metricName)
tfss, err := searchutils.ParseMetricSelector(filter)
if err != nil {
return "", err
}
if filter == metricName || metricName == "" {
return filter, nil
}
nameFilter := fmt.Sprintf("__name__=%q", metricName)
var filters []string
for _, tfs := range tfss {
var a []string

View file

@ -293,6 +293,9 @@ func TestBuildMatchWithFilter_Success(t *testing.T) {
// only label with regexp
f(`{cluster=~".*"}`, "http_request_count_total", `{cluster=~".*",__name__="http_request_count_total"}`)
// only label with regexp, empty metric name
f(`{cluster=~".*"}`, "", `{cluster=~".*"}`)
// many labels in filter with regexp
f(`{cluster=~".*",job!=""}`, "http_request_count_total", `{cluster=~".*",job!="",__name__="http_request_count_total"}`)
@ -307,4 +310,7 @@ func TestBuildMatchWithFilter_Success(t *testing.T) {
// metric name has negative regexp
f(`{__name__!~".*"}`, "http_request_count_total", `{__name__="http_request_count_total"}`)
// metric name has negative regex and metric name is empty
f(`{__name__!~".*"}`, "", `{__name__!~".*"}`)
}

View file

@ -15,7 +15,7 @@ import (
var (
maxExportDuration = flag.Duration("search.maxExportDuration", time.Hour*24*30, "The maximum duration for /api/v1/export call")
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution. It can be overridden on a per-query basis via 'timeout' query arg")
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution. It can be overridden to a smaller value on a per-query basis via 'timeout' query arg")
maxStatusRequestDuration = flag.Duration("search.maxStatusRequestDuration", time.Minute*5, "The maximum duration for /api/v1/status/* requests")
maxLabelsAPIDuration = flag.Duration("search.maxLabelsAPIDuration", time.Second*5, "The maximum duration for /api/v1/labels, /api/v1/label/.../values and /api/v1/series requests. "+
"See also -search.maxLabelsAPISeries and -search.ignoreExtraFiltersAtLabelsAPI")

View file

@ -1,13 +1,13 @@
{
"files": {
"main.css": "./static/css/main.d781989c.css",
"main.js": "./static/js/main.68e2aae8.js",
"main.js": "./static/js/main.7ec4e6eb.js",
"static/js/685.f772060c.chunk.js": "./static/js/685.f772060c.chunk.js",
"static/media/MetricsQL.md": "./static/media/MetricsQL.a00044c91d9781cf8557.md",
"index.html": "./index.html"
},
"entrypoints": [
"static/css/main.d781989c.css",
"static/js/main.68e2aae8.js"
"static/js/main.7ec4e6eb.js"
]
}

View file

@ -0,0 +1,5 @@
{
"license": {
"type": "opensource"
}
}

View file

@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><link rel="apple-touch-icon" href="./favicon.svg"/><link rel="mask-icon" href="./favicon.svg" color="#000000"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="Explore and troubleshoot your VictoriaMetrics data"/><link rel="manifest" href="./manifest.json"/><title>vmui</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:site" content="@https://victoriametrics.com/"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:image" content="./preview.jpg"><meta property="og:type" content="website"><meta property="og:title" content="UI for VictoriaMetrics"><meta property="og:url" content="https://victoriametrics.com/"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><script defer="defer" src="./static/js/main.68e2aae8.js"></script><link href="./static/css/main.d781989c.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><link rel="apple-touch-icon" href="./favicon.svg"/><link rel="mask-icon" href="./favicon.svg" color="#000000"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="Explore and troubleshoot your VictoriaMetrics data"/><link rel="manifest" href="./manifest.json"/><title>vmui</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:site" content="@https://victoriametrics.com/"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:image" content="./preview.jpg"><meta property="og:type" content="website"><meta property="og:title" content="UI for VictoriaMetrics"><meta property="og:url" content="https://victoriametrics.com/"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><script defer="defer" src="./static/js/main.7ec4e6eb.js"></script><link href="./static/css/main.d781989c.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,6 @@
import { useAppDispatch, useAppState } from "../state/common/StateContext";
import { useEffect, useState } from "preact/compat";
import { ErrorTypes } from "../types";
import { getUrlWithoutTenant } from "../utils/tenants";
const useFetchFlags = () => {
const { serverUrl } = useAppState();
@ -17,7 +16,7 @@ const useFetchFlags = () => {
setIsLoading(true);
try {
const url = getUrlWithoutTenant(serverUrl);
const url = new URL(serverUrl).origin;
const response = await fetch(`${url}/flags`);
const data = await response.text();
const flags = data.split("\n").filter(flag => flag.trim() !== "")

View file

@ -28,7 +28,7 @@ const ExploreLogs: FC = () => {
const [limit, setLimit] = useStateSearchParams(defaultLimit, "limit");
const [query, setQuery] = useStateSearchParams("*", "query");
const [tmpQuery, setTmpQuery] = useState("");
const [isUpdatingQuery, setIsUpdatingQuery] = useState(false);
const [period, setPeriod] = useState<TimeParams>(periodState);
const [queryError, setQueryError] = useState<ErrorTypes | string>("");
@ -70,26 +70,28 @@ const ExploreLogs: FC = () => {
const handleApplyFilter = (val: string) => {
setQuery(prev => `_stream: ${val === "other" ? "{}" : val} AND (${prev})`);
setIsUpdatingQuery(true);
};
const handleUpdateQuery = useCallback(() => {
const handleUpdateQuery = () => {
if (isLoading || dataLogHits.isLoading) {
abortController.abort && abortController.abort();
dataLogHits.abortController.abort && dataLogHits.abortController.abort();
} else {
setQuery(tmpQuery);
handleRunQuery();
}
}, [isLoading, dataLogHits.isLoading]);
};
useEffect(() => {
if (query) handleRunQuery();
if (!query) return;
handleRunQuery();
}, [periodState]);
useEffect(() => {
if (!isUpdatingQuery) return;
handleRunQuery();
setTmpQuery(query);
}, [query]);
setIsUpdatingQuery(false);
}, [query, isUpdatingQuery]);
useEffect(() => {
!hideChart && fetchLogHits(period);
@ -98,10 +100,10 @@ const ExploreLogs: FC = () => {
return (
<div className="vm-explore-logs">
<ExploreLogsHeader
query={tmpQuery}
query={query}
error={queryError}
limit={limit}
onChange={setTmpQuery}
onChange={setQuery}
onChangeLimit={handleChangeLimit}
onRun={handleUpdateQuery}
isLoading={isLoading || dataLogHits.isLoading}

View file

@ -1,4 +1,5 @@
import router, { routerOptions } from "./index";
import { getTenantIdFromUrl } from "../utils/tenants";
export enum NavigationItemType {
internalLink,
@ -24,10 +25,12 @@ interface NavigationConfig {
* Special case for alert link
*/
const getAlertLink = (url: string, showAlertLink: boolean) => {
// see more https://docs.victoriametrics.com/cluster-victoriametrics/?highlight=vmalertproxyurl#vmalert
// see more https://docs.victoriametrics.com/cluster-victoriametrics/#vmalert
const isCluster = !!getTenantIdFromUrl(url);
const value = isCluster ? `${url}/vmalert` : url.replace(/\/prometheus$/, "/vmalert");
return {
label: "Alerts",
value: `${url}/vmalert`,
value,
type: NavigationItemType.externalLink,
hide: !showAlertLink,
};

40
apptest/README.md Normal file
View file

@ -0,0 +1,40 @@
# App Integration Tests
The `apptest` package contains the integration tests for the VictoriaMetrics
applications (such as vmstorage, vminsert, and vmselect).
An integration test aims at verifying the behavior of an application as a whole,
as apposed to a unit test that verifies the behavior of a building block of an
application.
To achieve that an integration test starts an application in a separate process
and then issues HTTP requets to it and verifies the responses, examines the
metrics the app exposes and/or files it creates, etc.
Note that an object of testing may be not just a single app, but several apps
working together. A good example is VictoriaMetrics cluster. An integration test
may reproduce an arbitrary cluster configuration and verify how the components
work together as a system.
The package provides a collection of helpers to start applications and make
queries to them:
- `app.go` - contains the generic code for staring an application and should
not be used by integration tests directly.
- `{vmstorage,vminsert,etc}.go` - build on top of `app.go` and provide the
code for staring a specific application.
- `client.go` - provides helper functions for sending HTTP requests to
applications.
The integration tests themselves reside in `*_test.go` files. Apart from having
the `_test` suffix, there are no strict rules of how to name a file, but the
name should reflect the prevailing purpose of the tests located in that file.
For example, `sharding_test.go` aims at testing data sharding.
Since integration tests start applications in a separate process, they require
the application binary files to be built and put into the `bin` directory. The
build rule used for running integration tests, `make integration-test`,
accounts for that, it builds all application binaries before running the tests.
But if you want to run the tests without `make`, i.e. by executing
`go test ./app/apptest`, you will need to build the binaries first (for example,
by executing `make all`).

249
apptest/app.go Normal file
View file

@ -0,0 +1,249 @@
package apptest
import (
"bufio"
"fmt"
"io"
"log"
"os"
"os/exec"
"reflect"
"regexp"
"strings"
"time"
)
// Regular expressions for runtime information to extract from the app logs.
var (
storageDataPathRE = regexp.MustCompile(`successfully opened storage "(.*)"`)
httpListenAddrRE = regexp.MustCompile(`started server at http://(.*:\d{1,5})/`)
vminsertAddrRE = regexp.MustCompile(`accepting vminsert conns at (.*:\d{1,5})$`)
vmselectAddrRE = regexp.MustCompile(`accepting vmselect conns at (.*:\d{1,5})$`)
)
// app represents an instance of some VictoriaMetrics server (such as vmstorage,
// vminsert, or vmselect).
type app struct {
instance string
binary string
flags []string
process *os.Process
}
// appOptions holds the optional configuration of an app, such as default flags
// to set and things to extract from the app's log.
type appOptions struct {
defaultFlags map[string]string
extractREs []*regexp.Regexp
}
// startApp starts an instance of an app using the app binary file path and
// flags. When the opts are set, it also sets the default flag values and
// extracts runtime information from the app's log.
//
// If the app has started successfully and all the requested items has been
// extracted from logs, the function returns the instance of the app and the
// extracted items. The extracted items are returned in the same order as the
// corresponding extract regular expression have been provided in the opts.
//
// The function returns an error if the application has failed to start or the
// function has timed out extracting items from the log (normally because no log
// records match the regular expression).
func startApp(instance string, binary string, flags []string, opts *appOptions) (*app, []string, error) {
flags = setDefaultFlags(flags, opts.defaultFlags)
cmd := exec.Command(binary, flags...)
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, nil, err
}
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, nil, err
}
if err := cmd.Start(); err != nil {
return nil, nil, err
}
app := &app{
instance: instance,
binary: binary,
flags: flags,
process: cmd.Process,
}
go app.processOutput("stdout", stdout, app.writeToStderr)
lineProcessors := make([]lineProcessor, len(opts.extractREs))
reExtractors := make([]*reExtractor, len(opts.extractREs))
timeout := time.NewTimer(5 * time.Second).C
for i, re := range opts.extractREs {
reExtractors[i] = newREExtractor(re, timeout)
lineProcessors[i] = reExtractors[i].extractRE
}
go app.processOutput("stderr", stderr, append(lineProcessors, app.writeToStderr)...)
extracts, err := extractREs(reExtractors, timeout)
if err != nil {
app.Stop()
return nil, nil, err
}
return app, extracts, nil
}
// setDefaultFlags adds flags with default values to `flags` if it does not
// initially contain them.
func setDefaultFlags(flags []string, defaultFlags map[string]string) []string {
for _, flag := range flags {
for name := range defaultFlags {
if strings.HasPrefix(flag, name) {
delete(defaultFlags, name)
continue
}
}
}
for name, value := range defaultFlags {
flags = append(flags, name+"="+value)
}
return flags
}
// stop sends the app process a SIGINT signal and waits until it terminates
// gracefully.
func (app *app) Stop() {
if err := app.process.Signal(os.Interrupt); err != nil {
log.Fatalf("Could not send SIGINT signal to %s process: %v", app.instance, err)
}
if _, err := app.process.Wait(); err != nil {
log.Fatalf("Could not wait for %s process completion: %v", app.instance, err)
}
}
// String returns the string representation of the app state.
func (app *app) String() string {
return fmt.Sprintf("{instance: %q binary: %q flags: %q}", app.instance, app.binary, app.flags)
}
// lineProcessor is a function that is applied to the each line of the app
// output (stdout or stderr). The function returns true to indicate the caller
// that it has completed its work and should not be called again.
type lineProcessor func(line string) (done bool)
// processOutput invokes a set of processors on each line of app output (stdout
// or stderr). Once a line processor is done (returns true) it is never invoked
// again.
//
// A simple use case for this is to pipe the output of the child process to the
// output of the parent process. A more sophisticated one is to retrieve some
// runtime information from the child process logs, such as the server's
// host:port.
func (app *app) processOutput(outputName string, output io.Reader, lps ...lineProcessor) {
activeLPs := map[int]lineProcessor{}
for i, lp := range lps {
activeLPs[i] = lp
}
scanner := bufio.NewScanner(output)
for scanner.Scan() {
line := scanner.Text()
for i, process := range activeLPs {
if process(line) {
delete(activeLPs, i)
}
}
}
if err := scanner.Err(); err != nil {
log.Printf("could not scan %s %s: %v", app.instance, outputName, err)
}
}
// writeToStderr is a line processor that writes the line to the stderr.
// The function always returns false to indicate its caller that each line must
// be written to the stderr.
func (app *app) writeToStderr(line string) bool {
fmt.Fprintf(os.Stderr, "%s %s\n", app.instance, line)
return false
}
// extractREs waits until all reExtractors return the result and then returns
// the combined result with items ordered the same way as reExtractors.
//
// The function returns an error if timeout occurs sooner then all reExtractors
// finish its work.
func extractREs(reExtractors []*reExtractor, timeout <-chan time.Time) ([]string, error) {
n := len(reExtractors)
notFoundREs := make(map[int]string)
extracts := make([]string, n)
cases := make([]reflect.SelectCase, n+1)
for i, x := range reExtractors {
cases[i] = x.selectCase
notFoundREs[i] = x.re.String()
}
cases[n] = reflect.SelectCase{
Dir: reflect.SelectRecv,
Chan: reflect.ValueOf(timeout),
}
for notFound := n; notFound > 0; {
i, value, _ := reflect.Select(cases)
if i == n {
// n-th select case means timeout.
values := func(m map[int]string) []string {
s := []string{}
for _, v := range m {
s = append(s, v)
}
return s
}
return nil, fmt.Errorf("could not extract some or all regexps from stderr: %q", values(notFoundREs))
}
extracts[i] = value.String()
delete(notFoundREs, i)
notFound--
}
return extracts, nil
}
// reExtractor extracts some information based on a regular expression from the
// app output within a timeout.
type reExtractor struct {
re *regexp.Regexp
result chan string
timeout <-chan time.Time
selectCase reflect.SelectCase
}
// newREExtractor create a new reExtractor based on a regexp and a timeout.
func newREExtractor(re *regexp.Regexp, timeout <-chan time.Time) *reExtractor {
result := make(chan string)
return &reExtractor{
re: re,
result: result,
timeout: timeout,
selectCase: reflect.SelectCase{
Dir: reflect.SelectRecv,
Chan: reflect.ValueOf(result),
},
}
}
// extractRE is a line processor that extracts some information from a line
// based on a regular expression. The function returns trun (to request the
// caller to not to be called again) either when the match is found or due to
// the timeout. The found match is written to the x.result channel and it is
// important that this channel is monitored by a separate goroutine, otherwise
// the function will block.
func (x *reExtractor) extractRE(line string) bool {
submatch := x.re.FindSubmatch([]byte(line))
if len(submatch) == 2 {
select {
case x.result <- string(submatch[1]):
case <-x.timeout:
}
return true
}
return false
}

130
apptest/client.go Normal file
View file

@ -0,0 +1,130 @@
package apptest
import (
"io"
"net/http"
"net/url"
"strconv"
"strings"
"testing"
)
// Client is used for interacting with the apps over the network.
//
// At the moment it only supports HTTP protocol but may be exptended to support
// RPCs, etc.
type Client struct {
httpCli *http.Client
}
// NewClient creates a new client.
func NewClient() *Client {
return &Client{
httpCli: &http.Client{
Transport: &http.Transport{},
},
}
}
// CloseConnections closes client connections.
func (c *Client) CloseConnections() {
c.httpCli.CloseIdleConnections()
}
// Get sends a HTTP GET request. Once the function receives a response, it
// checks whether the response status code matches the expected one and returns
// the response body to the caller.
func (c *Client) Get(t *testing.T, url string, wantStatusCode int) string {
t.Helper()
return c.do(t, http.MethodGet, url, "", "", wantStatusCode)
}
// Post sends a HTTP POST request. Once the function receives a response, it
// checks whether the response status code matches the expected one and returns
// the response body to the caller.
func (c *Client) Post(t *testing.T, url, contentType, data string, wantStatusCode int) string {
t.Helper()
return c.do(t, http.MethodPost, url, contentType, data, wantStatusCode)
}
// PostForm sends a HTTP POST request containing the POST-form data. Once the
// function receives a response, it checks whether the response status code
// matches the expected one and returns the response body to the caller.
func (c *Client) PostForm(t *testing.T, url string, data url.Values, wantStatusCode int) string {
t.Helper()
return c.Post(t, url, "application/x-www-form-urlencoded", data.Encode(), wantStatusCode)
}
// do prepares a HTTP request, sends it to the server, receives the response
// from the server, ensures then response code matches the expected one, reads
// the rentire response body and returns it to the caller.
func (c *Client) do(t *testing.T, method, url, contentType, data string, wantStatusCode int) string {
t.Helper()
req, err := http.NewRequest(method, url, strings.NewReader(data))
if err != nil {
t.Fatalf("could not create a HTTP request: %v", err)
}
if len(contentType) > 0 {
req.Header.Add("Content-Type", contentType)
}
res, err := c.httpCli.Do(req)
if err != nil {
t.Fatalf("could not send HTTP request: %v", err)
}
body := readAllAndClose(t, res.Body)
if got, want := res.StatusCode, wantStatusCode; got != want {
t.Fatalf("unexpected response code: got %d, want %d (body: %s)", got, want, body)
}
return body
}
// readAllAndClose reads everything from the response body and then closes it.
func readAllAndClose(t *testing.T, responseBody io.ReadCloser) string {
t.Helper()
defer responseBody.Close()
b, err := io.ReadAll(responseBody)
if err != nil {
t.Fatalf("could not read response body: %d", err)
}
return string(b)
}
// ServesMetrics is used to retrive the app's metrics.
//
// This type is expected to be embdded by the apps that serve metrics.
type ServesMetrics struct {
metricsURL string
cli *Client
}
// GetIntMetric retrieves the value of a metric served by an app at /metrics URL.
// The value is then converted to int.
func (app *ServesMetrics) GetIntMetric(t *testing.T, metricName string) int {
return int(app.GetMetric(t, metricName))
}
// GetMetric retrieves the value of a metric served by an app at /metrics URL.
func (app *ServesMetrics) GetMetric(t *testing.T, metricName string) float64 {
t.Helper()
metrics := app.cli.Get(t, app.metricsURL, http.StatusOK)
for _, metric := range strings.Split(metrics, "\n") {
value, found := strings.CutPrefix(metric, metricName)
if found {
value = strings.Trim(value, " ")
res, err := strconv.ParseFloat(value, 64)
if err != nil {
t.Fatalf("could not parse metric value %s: %v", metric, err)
}
return res
}
}
t.Fatalf("metic not found: %s", metricName)
return 0
}

42
apptest/testcase.go Normal file
View file

@ -0,0 +1,42 @@
package apptest
import (
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
)
// TestCase holds the state and defines clean-up procedure common for all test
// cases.
type TestCase struct {
t *testing.T
cli *Client
}
// NewTestCase creates a new test case.
func NewTestCase(t *testing.T) *TestCase {
return &TestCase{t, NewClient()}
}
// Dir returns the directory name that should be used by as the -storageDataDir.
func (tc *TestCase) Dir() string {
return tc.t.Name()
}
// Client returns an instance of the client that can be used for interacting with
// the app(s) under test.
func (tc *TestCase) Client() *Client {
return tc.cli
}
// Close performs the test case clean up, such as closing all client connections
// and removing the -storageDataDir directory.
//
// Note that the -storageDataDir is not removed in case of test case failure to
// allow for furher manual debugging.
func (tc *TestCase) Close() {
tc.cli.CloseConnections()
if !tc.t.Failed() {
fs.MustRemoveAll(tc.Dir())
}
}

View file

@ -0,0 +1,68 @@
package tests
import (
"fmt"
"math/rand/v2"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/apptest"
)
func TestMultilevelSelect(t *testing.T) {
tc := apptest.NewTestCase(t)
defer tc.Close()
// Set up the following multi-level cluster configuration:
//
// vmselect (L2) -> vmselect (L1) -> vmstorage <- vminsert
//
// vmisert writes data into vmstorage.
// vmselect (L2) reads that data via vmselect (L1).
cli := tc.Client()
vmstorage := apptest.MustStartVmstorage(t, "vmstorage", []string{
"-storageDataPath=" + tc.Dir() + "/vmstorage",
}, cli)
defer vmstorage.Stop()
vminsert := apptest.MustStartVminsert(t, "vminsert", []string{
"-storageNode=" + vmstorage.VminsertAddr(),
}, cli)
defer vminsert.Stop()
vmselectL1 := apptest.MustStartVmselect(t, "vmselect-level1", []string{
"-storageNode=" + vmstorage.VmselectAddr(),
}, cli)
defer vmselectL1.Stop()
vmselectL2 := apptest.MustStartVmselect(t, "vmselect-level2", []string{
"-storageNode=" + vmselectL1.ClusternativeListenAddr(),
}, cli)
defer vmselectL2.Stop()
// Insert 1000 unique time series.Wait for 2 seconds to let vmstorage
// flush pending items so they become searchable.
const numMetrics = 1000
records := make([]string, numMetrics)
for i := range numMetrics {
records[i] = fmt.Sprintf("metric_%d %d", i, rand.IntN(1000))
}
vminsert.PrometheusAPIV1ImportPrometheus(t, "0", records)
time.Sleep(2 * time.Second)
// Retrieve all time series and verify that vmselect (L1) serves the complete
// set of time series.
seriesL1 := vmselectL1.PrometheusAPIV1Series(t, "0", `{__name__=~".*"}`)
if got, want := len(seriesL1.Data), numMetrics; got != want {
t.Fatalf("unexpected level-1 series count: got %d, want %d", got, want)
}
// Retrieve all time series and verify that vmselect (L2) serves the complete
// set of time series.
seriesL2 := vmselectL2.PrometheusAPIV1Series(t, "0", `{__name__=~".*"}`)
if got, want := len(seriesL2.Data), numMetrics; got != want {
t.Fatalf("unexpected level-2 series count: got %d, want %d", got, want)
}
}

View file

@ -0,0 +1,84 @@
package tests
import (
"fmt"
"math/rand/v2"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/apptest"
)
func TestVminsertShardsDataVmselectBuildsFullResultFromShards(t *testing.T) {
tc := apptest.NewTestCase(t)
defer tc.Close()
// Set up the following cluster configuration:
//
// - two vmstorage instances
// - vminsert points to the two vmstorages, its replication setting
// is off which means it will only shard the incoming data across the two
// vmstorages.
// - vmselect points to the two vmstorages and is expected to query both
// vmstorages and build the full result out of the two partial results.
cli := tc.Client()
vmstorage1 := apptest.MustStartVmstorage(t, "vmstorage-1", []string{
"-storageDataPath=" + tc.Dir() + "/vmstorage-1",
}, cli)
defer vmstorage1.Stop()
vmstorage2 := apptest.MustStartVmstorage(t, "vmstorage-2", []string{
"-storageDataPath=" + tc.Dir() + "/vmstorage-2",
}, cli)
defer vmstorage2.Stop()
vminsert := apptest.MustStartVminsert(t, "vminsert", []string{
"-storageNode=" + vmstorage1.VminsertAddr() + "," + vmstorage2.VminsertAddr(),
}, cli)
defer vminsert.Stop()
vmselect := apptest.MustStartVmselect(t, "vmselect", []string{
"-storageNode=" + vmstorage1.VmselectAddr() + "," + vmstorage2.VmselectAddr(),
}, cli)
defer vmselect.Stop()
// Insert 1000 unique time series and verify the that inserted data has been
// indeed sharded by checking various metrics exposed by vminsert and
// vmstorage.
// Also wait for 2 seconds to let vminsert and vmstorage servers to update
// the values of the metrics they expose and to let vmstorages flush pending
// items so they become searchable.
const numMetrics = 1000
records := make([]string, numMetrics)
for i := range numMetrics {
records[i] = fmt.Sprintf("metric_%d %d", i, rand.IntN(1000))
}
vminsert.PrometheusAPIV1ImportPrometheus(t, "0", records)
time.Sleep(2 * time.Second)
numMetrics1 := vmstorage1.GetIntMetric(t, "vm_vminsert_metrics_read_total")
if numMetrics1 == 0 {
t.Fatalf("storage-1 has no time series")
}
numMetrics2 := vmstorage2.GetIntMetric(t, "vm_vminsert_metrics_read_total")
if numMetrics2 == 0 {
t.Fatalf("storage-2 has no time series")
}
if numMetrics1+numMetrics2 != numMetrics {
t.Fatalf("unxepected total number of metrics: vmstorage-1 (%d) + vmstorage-2 (%d) != %d", numMetrics1, numMetrics2, numMetrics)
}
// Retrieve all time series and verify that vmselect serves the complete set
//of time series.
series := vmselect.PrometheusAPIV1Series(t, "0", `{__name__=~".*"}`)
if got, want := series.Status, "success"; got != want {
t.Fatalf("unexpected /ap1/v1/series response status: got %s, want %s", got, want)
}
if got, want := series.IsPartial, false; got != want {
t.Fatalf("unexpected /ap1/v1/series response isPartial value: got %t, want %t", got, want)
}
if got, want := len(series.Data), numMetrics; got != want {
t.Fatalf("unexpected /ap1/v1/series response series count: got %d, want %d", got, want)
}
}

77
apptest/vminsert.go Normal file
View file

@ -0,0 +1,77 @@
package apptest
import (
"fmt"
"net/http"
"regexp"
"strings"
"testing"
)
// Vminsert holds the state of a vminsert app and provides vminsert-specific
// functions.
type Vminsert struct {
*app
*ServesMetrics
httpListenAddr string
cli *Client
}
// MustStartVminsert is a test helper function that starts an instance of
// vminsert and fails the test if the app fails to start.
func MustStartVminsert(t *testing.T, instance string, flags []string, cli *Client) *Vminsert {
t.Helper()
app, err := StartVminsert(instance, flags, cli)
if err != nil {
t.Fatalf("Could not start %s: %v", instance, err)
}
return app
}
// StartVminsert starts an instance of vminsert with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr)
func StartVminsert(instance string, flags []string, cli *Client) (*Vminsert, error) {
app, stderrExtracts, err := startApp(instance, "../../bin/vminsert", flags, &appOptions{
defaultFlags: map[string]string{
"-httpListenAddr": "127.0.0.1:0",
},
extractREs: []*regexp.Regexp{
httpListenAddrRE,
},
})
if err != nil {
return nil, err
}
return &Vminsert{
app: app,
ServesMetrics: &ServesMetrics{
metricsURL: fmt.Sprintf("http://%s/metrics", stderrExtracts[0]),
cli: cli,
},
httpListenAddr: stderrExtracts[0],
cli: cli,
}, nil
}
// PrometheusAPIV1ImportPrometheus is a test helper function that inserts a
// collection of records in Prometheus text exposition format for the given
// tenant by sending a HTTP POST request to
// /prometheus/api/v1/import/prometheus vminsert endpoint.
//
// See https://docs.victoriametrics.com/url-examples/#apiv1importprometheus
func (app *Vminsert) PrometheusAPIV1ImportPrometheus(t *testing.T, tenant string, records []string) {
t.Helper()
url := fmt.Sprintf("http://%s/insert/%s/prometheus/api/v1/import/prometheus", app.httpListenAddr, tenant)
app.cli.Post(t, url, "text/plain", strings.Join(records, "\n"), http.StatusNoContent)
}
// String returns the string representation of the vminsert app state.
func (app *Vminsert) String() string {
return fmt.Sprintf("{app: %s httpListenAddr: %q}", app.app, app.httpListenAddr)
}

101
apptest/vmselect.go Normal file
View file

@ -0,0 +1,101 @@
package apptest
import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"regexp"
"testing"
)
// Vmselect holds the state of a vmselect app and provides vmselect-specific
// functions.
type Vmselect struct {
*app
*ServesMetrics
httpListenAddr string
clusternativeListenAddr string
cli *Client
}
// MustStartVmselect is a test helper function that starts an instance of
// vmselect and fails the test if the app fails to start.
func MustStartVmselect(t *testing.T, instance string, flags []string, cli *Client) *Vmselect {
t.Helper()
app, err := StartVmselect(instance, flags, cli)
if err != nil {
t.Fatalf("Could not start %s: %v", instance, err)
}
return app
}
// StartVmselect starts an instance of vmselect with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr)
func StartVmselect(instance string, flags []string, cli *Client) (*Vmselect, error) {
app, stderrExtracts, err := startApp(instance, "../../bin/vmselect", flags, &appOptions{
defaultFlags: map[string]string{
"-httpListenAddr": "127.0.0.1:0",
"-clusternativeListenAddr": "127.0.0.1:0",
},
extractREs: []*regexp.Regexp{
httpListenAddrRE,
vmselectAddrRE,
},
})
if err != nil {
return nil, err
}
return &Vmselect{
app: app,
ServesMetrics: &ServesMetrics{
metricsURL: fmt.Sprintf("http://%s/metrics", stderrExtracts[0]),
cli: cli,
},
httpListenAddr: stderrExtracts[0],
clusternativeListenAddr: stderrExtracts[1],
cli: cli,
}, nil
}
// ClusternativeListenAddr returns the address at which the vmselect process is
// listening for connections from other vmselect apps.
func (app *Vmselect) ClusternativeListenAddr() string {
return app.clusternativeListenAddr
}
// PrometheusAPIV1SeriesResponse is an inmemory representation of the
// /prometheus/api/v1/series response.
type PrometheusAPIV1SeriesResponse struct {
Status string
IsPartial bool
Data []map[string]string
}
// PrometheusAPIV1Series sends a query to a /prometheus/api/v1/series endpoint
// and returns the list of time series that match the query.
//
// See https://docs.victoriametrics.com/url-examples/#apiv1series
func (app *Vmselect) PrometheusAPIV1Series(t *testing.T, tenant, matchQuery string) *PrometheusAPIV1SeriesResponse {
t.Helper()
seriesURL := fmt.Sprintf("http://%s/select/%s/prometheus/api/v1/series", app.httpListenAddr, tenant)
values := url.Values{}
values.Add("match[]", matchQuery)
jsonRes := app.cli.PostForm(t, seriesURL, values, http.StatusOK)
var res PrometheusAPIV1SeriesResponse
if err := json.Unmarshal([]byte(jsonRes), &res); err != nil {
t.Fatalf("could not unmarshal /api/v1/series response: %v", err)
}
return &res
}
// String returns the string representation of the vmselect app state.
func (app *Vmselect) String() string {
return fmt.Sprintf("{app: %s httpListenAddr: %q}", app.app, app.httpListenAddr)
}

87
apptest/vmstorage.go Normal file
View file

@ -0,0 +1,87 @@
package apptest
import (
"fmt"
"os"
"regexp"
"testing"
"time"
)
// Vmstorage holds the state of a vmstorage app and provides vmstorage-specific
// functions.
type Vmstorage struct {
*app
*ServesMetrics
storageDataPath string
httpListenAddr string
vminsertAddr string
vmselectAddr string
}
// MustStartVmstorage is a test helper function that starts an instance of
// vmstorage and fails the test if the app fails to start.
func MustStartVmstorage(t *testing.T, instance string, flags []string, cli *Client) *Vmstorage {
t.Helper()
app, err := StartVmstorage(instance, flags, cli)
if err != nil {
t.Fatalf("Could not start %s: %v", instance, err)
}
return app
}
// StartVmstorage starts an instance of vmstorage with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr)
func StartVmstorage(instance string, flags []string, cli *Client) (*Vmstorage, error) {
app, stderrExtracts, err := startApp(instance, "../../bin/vmstorage", flags, &appOptions{
defaultFlags: map[string]string{
"-storageDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
"-httpListenAddr": "127.0.0.1:0",
"-vminsertAddr": "127.0.0.1:0",
"-vmselectAddr": "127.0.0.1:0",
},
extractREs: []*regexp.Regexp{
storageDataPathRE,
httpListenAddrRE,
vminsertAddrRE,
vmselectAddrRE,
},
})
if err != nil {
return nil, err
}
return &Vmstorage{
app: app,
ServesMetrics: &ServesMetrics{
metricsURL: fmt.Sprintf("http://%s/metrics", stderrExtracts[1]),
cli: cli,
},
storageDataPath: stderrExtracts[0],
httpListenAddr: stderrExtracts[1],
vminsertAddr: stderrExtracts[2],
vmselectAddr: stderrExtracts[3],
}, nil
}
// VminsertAddr returns the address at which the vmstorage process is listening
// for vminsert connections.
func (app *Vmstorage) VminsertAddr() string {
return app.vminsertAddr
}
// VmselectAddr returns the address at which the vmstorage process is listening
// for vmselect connections.
func (app *Vmstorage) VmselectAddr() string {
return app.vmselectAddr
}
// String returns the string representation of the vmstorage app state.
func (app *Vmstorage) String() string {
return fmt.Sprintf("{app: %s storageDataPath: %q httpListenAddr: %q vminsertAddr: %q vmselectAddr: %q}", []any{
app.app, app.storageDataPath, app.httpListenAddr, app.vminsertAddr, app.vmselectAddr}...)
}

View file

@ -1108,13 +1108,118 @@
"title": "Log stream churn rate",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 22
},
"id": 62,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)",
"format": "time_series",
"instant": false,
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Restarts ($job)",
"type": "timeseries"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 22
"y": 30
},
"id": 28,
"panels": [
@ -1168,8 +1273,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1185,7 +1289,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 3
"y": 11
},
"id": 38,
"options": {
@ -1275,8 +1379,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1292,7 +1395,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 3
"y": 11
},
"id": 40,
"options": {
@ -1432,8 +1535,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1449,7 +1551,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 11
"y": 19
},
"id": 42,
"options": {
@ -1538,8 +1640,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1555,7 +1656,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 11
"y": 19
},
"id": 44,
"options": {
@ -1648,8 +1749,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1681,7 +1781,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 19
"y": 27
},
"id": 46,
"options": {
@ -1773,8 +1873,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1806,7 +1905,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 19
"y": 27
},
"id": 48,
"options": {
@ -1911,8 +2010,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1928,7 +2026,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 27
"y": 35
},
"id": 50,
"options": {
@ -2017,8 +2115,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2047,7 +2144,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 27
"y": 35
},
"id": 52,
"options": {
@ -2150,8 +2247,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2167,7 +2263,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 35
"y": 43
},
"id": 54,
"options": {
@ -2256,8 +2352,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2286,7 +2381,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 35
"y": 43
},
"id": 56,
"options": {
@ -2393,8 +2488,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2410,7 +2504,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 43
"y": 51
},
"id": 58,
"options": {
@ -2501,8 +2595,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2518,7 +2611,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 43
"y": 51
},
"id": 60,
"options": {
@ -2610,8 +2703,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2627,7 +2719,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 51
"y": 59
},
"id": 61,
"options": {

View file

@ -4124,6 +4124,111 @@
],
"title": "Rows ignored for last 1h ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 70
},
"id": 214,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)",
"format": "time_series",
"instant": false,
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Restarts ($job)",
"type": "timeseries"
}
],
"title": "Troubleshooting",
@ -5037,7 +5142,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "min(vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n))",
"expr": "min((vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} - vm_free_disk_space_limit_bytes{job=~\"$job_storage\", instance=~\"$instance\"}) \n/ \nignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n))",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -5927,7 +6032,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "max(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"expr": "max(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "max",
@ -5940,7 +6045,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "min(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"expr": "min(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
@ -5954,7 +6059,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "quantile(0.5,\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"expr": "quantile(0.5,\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
@ -9373,7 +9478,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n)",
"expr": "(vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job_storage\", instance=~\"$instance\"}) \n/ \nignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,

View file

@ -1569,7 +1569,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 3
"y": 11
},
"id": 112,
"links": [],
@ -1677,7 +1677,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 3
"y": 11
},
"id": 44,
"options": {
@ -1844,7 +1844,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 11
"y": 19
},
"id": 123,
"links": [],
@ -1951,7 +1951,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 11
"y": 19
},
"id": 114,
"options": {
@ -2044,8 +2044,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2077,7 +2076,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 19
"y": 27
},
"id": 75,
"options": {
@ -2169,8 +2168,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2202,7 +2200,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 19
"y": 27
},
"id": 57,
"options": {
@ -2309,8 +2307,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2326,7 +2323,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 27
"y": 35
},
"id": 47,
"options": {
@ -2415,8 +2412,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2445,7 +2441,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 27
"y": 35
},
"id": 76,
"options": {
@ -2552,8 +2548,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2569,7 +2564,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 35
"y": 43
},
"id": 48,
"options": {
@ -2658,8 +2653,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2688,7 +2682,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 35
"y": 43
},
"id": 124,
"options": {
@ -2795,8 +2789,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2812,7 +2805,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 43
"y": 51
},
"id": 49,
"options": {
@ -2903,8 +2896,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2920,7 +2912,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 43
"y": 51
},
"id": 37,
"options": {
@ -3010,8 +3002,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -3040,7 +3031,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 51
"y": 59
},
"id": 127,
"options": {
@ -3146,8 +3137,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -3163,7 +3153,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 51
"y": 59
},
"id": 125,
"options": {
@ -3253,8 +3243,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -3270,7 +3259,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 59
"y": 67
},
"id": 128,
"options": {
@ -3386,7 +3375,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3394,8 +3384,7 @@
}
]
},
"unit": "short",
"unitScale": true
"unit": "short"
},
"overrides": []
},
@ -3403,7 +3392,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 4
"y": 32
},
"id": 66,
"options": {
@ -3505,7 +3494,8 @@
"mode": "absolute",
"steps": [
{
"color": "transparent"
"color": "transparent",
"value": null
},
{
"color": "red",
@ -3513,8 +3503,7 @@
}
]
},
"unit": "percentunit",
"unitScale": true
"unit": "percentunit"
},
"overrides": []
},
@ -3522,10 +3511,9 @@
"h": 8,
"w": 12,
"x": 12,
"y": 4
"y": 32
},
"id": 68,
"links": [],
"options": {
"legend": {
"calcs": [
@ -3615,7 +3603,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3623,8 +3612,7 @@
}
]
},
"unit": "short",
"unitScale": true
"unit": "short"
},
"overrides": []
},
@ -3632,10 +3620,9 @@
"h": 8,
"w": 12,
"x": 0,
"y": 12
"y": 40
},
"id": 116,
"links": [],
"options": {
"legend": {
"calcs": [
@ -3723,7 +3710,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3731,8 +3719,7 @@
}
]
},
"unit": "short",
"unitScale": true
"unit": "short"
},
"overrides": []
},
@ -3740,10 +3727,9 @@
"h": 8,
"w": 12,
"x": 12,
"y": 12
"y": 40
},
"id": 60,
"links": [],
"options": {
"legend": {
"calcs": [
@ -3830,7 +3816,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3838,8 +3825,7 @@
}
]
},
"unit": "percentunit",
"unitScale": true
"unit": "percentunit"
},
"overrides": []
},
@ -3847,7 +3833,7 @@
"h": 9,
"w": 12,
"x": 0,
"y": 20
"y": 48
},
"id": 90,
"options": {
@ -3938,7 +3924,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3946,8 +3933,7 @@
}
]
},
"unit": "percentunit",
"unitScale": true
"unit": "percentunit"
},
"overrides": []
},
@ -3955,10 +3941,9 @@
"h": 9,
"w": 12,
"x": 12,
"y": 20
"y": 48
},
"id": 118,
"links": [],
"options": {
"legend": {
"calcs": [
@ -4022,15 +4007,15 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
}
},
"overrides": [
{
@ -4075,7 +4060,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 29
"y": 57
},
"id": 126,
"options": {
@ -4090,7 +4075,7 @@
},
"showHeader": true
},
"pluginVersion": "10.3.1",
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": {
@ -4161,7 +4146,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -4169,8 +4155,7 @@
}
]
},
"unit": "short",
"unitScale": true
"unit": "short"
},
"overrides": []
},
@ -4178,10 +4163,9 @@
"h": 8,
"w": 12,
"x": 12,
"y": 29
"y": 57
},
"id": 74,
"links": [],
"options": {
"legend": {
"calcs": [
@ -4218,6 +4202,111 @@
],
"title": "Labels limit exceeded",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 65
},
"id": 129,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)",
"format": "time_series",
"instant": false,
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Restarts ($job)",
"type": "timeseries"
}
],
"targets": [
@ -4313,7 +4402,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 5
"y": 13
},
"id": 10,
"links": [],
@ -4422,7 +4511,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 5
"y": 13
},
"id": 73,
"links": [],
@ -4452,7 +4541,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"} \n/ ignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job\", instance=~\"$instance\"}[1d]) \n * scalar(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) \n / sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n )",
"expr": "(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job\", instance=~\"$instance\"}) \n/ ignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job\", instance=~\"$instance\"}[1d]) \n * scalar(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) \n / sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n )",
"format": "time_series",
"hide": false,
"interval": "",
@ -4532,7 +4621,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 13
"y": 21
},
"id": 53,
"links": [],
@ -4575,7 +4664,7 @@
"type": "prometheus",
"uid": "$ds"
},
"expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}",
"expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"} - vm_free_disk_space_limit_bytes{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -4686,7 +4775,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 13
"y": 21
},
"id": 34,
"links": [],
@ -4824,7 +4913,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 21
"y": 29
},
"id": 30,
"links": [],
@ -4946,7 +5035,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 21
"y": 29
},
"id": 36,
"links": [],
@ -5054,7 +5143,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 29
"y": 37
},
"id": 58,
"links": [],
@ -5164,7 +5253,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 29
"y": 37
},
"id": 62,
"options": {
@ -5284,7 +5373,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 37
"y": 45
},
"id": 59,
"links": [],
@ -5404,7 +5493,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 37
"y": 45
},
"id": 64,
"options": {
@ -5510,7 +5599,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 45
"y": 53
},
"id": 99,
"links": [],
@ -5620,7 +5709,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 45
"y": 53
},
"id": 103,
"links": [],
@ -5730,7 +5819,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 53
"y": 61
},
"id": 122,
"links": [],
@ -5840,7 +5929,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 53
"y": 61
},
"id": 105,
"links": [],

View file

@ -4125,6 +4125,111 @@
],
"title": "Rows ignored for last 1h ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 70
},
"id": 214,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.1.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)",
"format": "time_series",
"instant": false,
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Restarts ($job)",
"type": "timeseries"
}
],
"title": "Troubleshooting",
@ -5038,7 +5143,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "min(vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n))",
"expr": "min((vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} - vm_free_disk_space_limit_bytes{job=~\"$job_storage\", instance=~\"$instance\"}) \n/ \nignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n))",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -5928,7 +6033,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "max(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"expr": "max(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "max",
@ -5941,7 +6046,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "min(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"expr": "min(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
@ -5955,7 +6060,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "quantile(0.5,\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"expr": "quantile(0.5,\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
@ -9374,7 +9479,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n)",
"expr": "(vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job_storage\", instance=~\"$instance\"}) \n/ \nignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,

View file

@ -1570,7 +1570,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 3
"y": 11
},
"id": 112,
"links": [],
@ -1678,7 +1678,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 3
"y": 11
},
"id": 44,
"options": {
@ -1845,7 +1845,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 11
"y": 19
},
"id": 123,
"links": [],
@ -1952,7 +1952,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 11
"y": 19
},
"id": 114,
"options": {
@ -2045,8 +2045,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2078,7 +2077,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 19
"y": 27
},
"id": 75,
"options": {
@ -2170,8 +2169,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2203,7 +2201,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 19
"y": 27
},
"id": 57,
"options": {
@ -2310,8 +2308,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2327,7 +2324,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 27
"y": 35
},
"id": 47,
"options": {
@ -2416,8 +2413,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2446,7 +2442,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 27
"y": 35
},
"id": 76,
"options": {
@ -2553,8 +2549,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2570,7 +2565,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 35
"y": 43
},
"id": 48,
"options": {
@ -2659,8 +2654,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2689,7 +2683,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 35
"y": 43
},
"id": 124,
"options": {
@ -2796,8 +2790,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2813,7 +2806,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 43
"y": 51
},
"id": 49,
"options": {
@ -2904,8 +2897,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2921,7 +2913,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 43
"y": 51
},
"id": 37,
"options": {
@ -3011,8 +3003,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -3041,7 +3032,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 51
"y": 59
},
"id": 127,
"options": {
@ -3147,8 +3138,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -3164,7 +3154,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 51
"y": 59
},
"id": 125,
"options": {
@ -3254,8 +3244,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -3271,7 +3260,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 59
"y": 67
},
"id": 128,
"options": {
@ -3387,7 +3376,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3395,8 +3385,7 @@
}
]
},
"unit": "short",
"unitScale": true
"unit": "short"
},
"overrides": []
},
@ -3404,7 +3393,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 4
"y": 32
},
"id": 66,
"options": {
@ -3506,7 +3495,8 @@
"mode": "absolute",
"steps": [
{
"color": "transparent"
"color": "transparent",
"value": null
},
{
"color": "red",
@ -3514,8 +3504,7 @@
}
]
},
"unit": "percentunit",
"unitScale": true
"unit": "percentunit"
},
"overrides": []
},
@ -3523,10 +3512,9 @@
"h": 8,
"w": 12,
"x": 12,
"y": 4
"y": 32
},
"id": 68,
"links": [],
"options": {
"legend": {
"calcs": [
@ -3616,7 +3604,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3624,8 +3613,7 @@
}
]
},
"unit": "short",
"unitScale": true
"unit": "short"
},
"overrides": []
},
@ -3633,10 +3621,9 @@
"h": 8,
"w": 12,
"x": 0,
"y": 12
"y": 40
},
"id": 116,
"links": [],
"options": {
"legend": {
"calcs": [
@ -3724,7 +3711,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3732,8 +3720,7 @@
}
]
},
"unit": "short",
"unitScale": true
"unit": "short"
},
"overrides": []
},
@ -3741,10 +3728,9 @@
"h": 8,
"w": 12,
"x": 12,
"y": 12
"y": 40
},
"id": 60,
"links": [],
"options": {
"legend": {
"calcs": [
@ -3831,7 +3817,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3839,8 +3826,7 @@
}
]
},
"unit": "percentunit",
"unitScale": true
"unit": "percentunit"
},
"overrides": []
},
@ -3848,7 +3834,7 @@
"h": 9,
"w": 12,
"x": 0,
"y": 20
"y": 48
},
"id": 90,
"options": {
@ -3939,7 +3925,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3947,8 +3934,7 @@
}
]
},
"unit": "percentunit",
"unitScale": true
"unit": "percentunit"
},
"overrides": []
},
@ -3956,10 +3942,9 @@
"h": 9,
"w": 12,
"x": 12,
"y": 20
"y": 48
},
"id": 118,
"links": [],
"options": {
"legend": {
"calcs": [
@ -4023,15 +4008,15 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
}
},
"overrides": [
{
@ -4076,7 +4061,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 29
"y": 57
},
"id": 126,
"options": {
@ -4091,7 +4076,7 @@
},
"showHeader": true
},
"pluginVersion": "10.3.1",
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": {
@ -4162,7 +4147,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -4170,8 +4156,7 @@
}
]
},
"unit": "short",
"unitScale": true
"unit": "short"
},
"overrides": []
},
@ -4179,10 +4164,9 @@
"h": 8,
"w": 12,
"x": 12,
"y": 29
"y": 57
},
"id": 74,
"links": [],
"options": {
"legend": {
"calcs": [
@ -4219,6 +4203,111 @@
],
"title": "Labels limit exceeded",
"type": "timeseries"
},
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 65
},
"id": 129,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.1.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)",
"format": "time_series",
"instant": false,
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Restarts ($job)",
"type": "timeseries"
}
],
"targets": [
@ -4314,7 +4403,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 5
"y": 13
},
"id": 10,
"links": [],
@ -4423,7 +4512,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 5
"y": 13
},
"id": 73,
"links": [],
@ -4453,7 +4542,7 @@
"uid": "$ds"
},
"editorMode": "code",
"expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"} \n/ ignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job\", instance=~\"$instance\"}[1d]) \n * scalar(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) \n / sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n )",
"expr": "(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job\", instance=~\"$instance\"}) \n/ ignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job\", instance=~\"$instance\"}[1d]) \n * scalar(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) \n / sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n )",
"format": "time_series",
"hide": false,
"interval": "",
@ -4533,7 +4622,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 13
"y": 21
},
"id": 53,
"links": [],
@ -4576,7 +4665,7 @@
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}",
"expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"} - vm_free_disk_space_limit_bytes{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -4687,7 +4776,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 13
"y": 21
},
"id": 34,
"links": [],
@ -4825,7 +4914,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 21
"y": 29
},
"id": 30,
"links": [],
@ -4947,7 +5036,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 21
"y": 29
},
"id": 36,
"links": [],
@ -5055,7 +5144,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 29
"y": 37
},
"id": 58,
"links": [],
@ -5165,7 +5254,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 29
"y": 37
},
"id": 62,
"options": {
@ -5285,7 +5374,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 37
"y": 45
},
"id": 59,
"links": [],
@ -5405,7 +5494,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 37
"y": 45
},
"id": 64,
"options": {
@ -5511,7 +5600,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 45
"y": 53
},
"id": 99,
"links": [],
@ -5621,7 +5710,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 45
"y": 53
},
"id": 103,
"links": [],
@ -5731,7 +5820,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 53
"y": 61
},
"id": 122,
"links": [],
@ -5841,7 +5930,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 53
"y": 61
},
"id": 105,
"links": [],

View file

@ -951,7 +951,7 @@
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"description": "Shows the persistent queue size of pending samples in bytes >2MB which hasn't been flushed to remote storage yet. \n\nIncreasing of value might be a sign of connectivity issues. In such cases, vmagent starts to flush pending data on disk with attempt to send it later once connection is restored.\n\nRemote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true.\n\nClick on the line and choose Drilldown to show persistent queue size per instance.\n",
"description": "Shows the persistent queue size of pending samples in bytes >2MB which hasn't been flushed to remote storage yet. \n\nIncreasing of value might be a sign of connectivity issues. In such cases, vmagent starts to flush pending data on disk with attempt to send it later once connection is restored.\n\nRemote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true.\n\nClick on the line and choose Drilldown to show the persistent queue size per instance.\n",
"fieldConfig": {
"defaults": {
"color": {
@ -1336,8 +1336,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1440,8 +1439,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1619,7 +1617,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1635,7 +1634,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 35
"y": 3
},
"id": 109,
"options": {
@ -1750,7 +1749,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 35
"y": 3
},
"id": 111,
"options": {
@ -1875,7 +1874,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 43
"y": 11
},
"id": 81,
"options": {
@ -2011,7 +2010,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 43
"y": 11
},
"id": 7,
"options": {
@ -2130,7 +2129,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 51
"y": 19
},
"id": 83,
"options": {
@ -2236,7 +2235,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 51
"y": 19
},
"id": 39,
"options": {
@ -2343,7 +2342,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 59
"y": 27
},
"id": 135,
"options": {
@ -2450,7 +2449,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 59
"y": 27
},
"id": 149,
"options": {
@ -2556,7 +2555,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 67
"y": 35
},
"id": 41,
"options": {
@ -2669,7 +2668,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -2685,7 +2685,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 20
"y": 4
},
"id": 92,
"options": {
@ -2773,7 +2773,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -2789,7 +2790,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 20
"y": 4
},
"id": 95,
"options": {
@ -2880,7 +2881,8 @@
"mode": "absolute",
"steps": [
{
"color": "transparent"
"color": "transparent",
"value": null
},
{
"color": "red",
@ -2896,7 +2898,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 28
"y": 12
},
"id": 98,
"options": {
@ -2987,7 +2989,8 @@
"mode": "absolute",
"steps": [
{
"color": "transparent"
"color": "transparent",
"value": null
},
{
"color": "red",
@ -3003,7 +3006,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 28
"y": 12
},
"id": 99,
"options": {
@ -3055,6 +3058,7 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -3068,6 +3072,7 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -3091,7 +3096,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3107,7 +3113,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 36
"y": 20
},
"id": 79,
"options": {
@ -3159,6 +3165,7 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -3172,6 +3179,7 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -3195,7 +3203,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3211,7 +3220,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 36
"y": 20
},
"id": 18,
"links": [
@ -3269,6 +3278,7 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -3282,6 +3292,7 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -3305,7 +3316,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3321,7 +3333,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 44
"y": 28
},
"id": 127,
"options": {
@ -3371,6 +3383,7 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -3384,6 +3397,7 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -3407,7 +3421,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3423,7 +3438,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 44
"y": 28
},
"id": 50,
"options": {
@ -3483,7 +3498,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3521,13 +3537,15 @@
},
"gridPos": {
"h": 7,
"w": 24,
"w": 12,
"x": 0,
"y": 52
"y": 36
},
"id": 129,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
@ -3542,7 +3560,7 @@
}
]
},
"pluginVersion": "9.2.6",
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": {
@ -3585,6 +3603,111 @@
}
],
"type": "table"
},
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 36
},
"id": 150,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.1.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)",
"format": "time_series",
"instant": false,
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Restarts ($job)",
"type": "timeseries"
}
],
"targets": [
@ -3678,7 +3801,7 @@
"h": 7,
"w": 12,
"x": 0,
"y": 45
"y": 53
},
"id": 48,
"options": {
@ -3784,7 +3907,7 @@
"h": 7,
"w": 12,
"x": 12,
"y": 45
"y": 53
},
"id": 76,
"options": {
@ -3888,7 +4011,7 @@
"h": 7,
"w": 12,
"x": 0,
"y": 52
"y": 60
},
"id": 132,
"options": {
@ -3994,7 +4117,7 @@
"h": 7,
"w": 12,
"x": 12,
"y": 52
"y": 60
},
"id": 133,
"options": {
@ -4099,7 +4222,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 59
"y": 67
},
"id": 20,
"options": {
@ -4203,7 +4326,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 59
"y": 67
},
"id": 126,
"options": {
@ -4306,7 +4429,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 67
"y": 75
},
"id": 46,
"options": {
@ -4409,7 +4532,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 67
"y": 75
},
"id": 148,
"options": {
@ -4512,7 +4635,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 75
"y": 83
},
"id": 31,
"options": {
@ -4691,7 +4814,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 22
"y": 30
},
"id": 73,
"options": {
@ -4808,7 +4931,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 14
"y": 22
},
"id": 131,
"options": {
@ -4912,7 +5035,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 22
"y": 30
},
"id": 130,
"options": {
@ -5029,7 +5152,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 22
"y": 30
},
"id": 77,
"options": {
@ -5163,7 +5286,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 15
"y": 23
},
"id": 146,
"options": {
@ -5265,7 +5388,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 15
"y": 23
},
"id": 143,
"options": {
@ -5361,7 +5484,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 23
"y": 31
},
"id": 147,
"options": {
@ -5464,7 +5587,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 23
"y": 31
},
"id": 139,
"options": {
@ -5575,7 +5698,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 31
"y": 39
},
"id": 142,
"options": {
@ -5672,7 +5795,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 31
"y": 39
},
"id": 137,
"options": {
@ -5785,7 +5908,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 39
"y": 47
},
"id": 141,
"options": {
@ -5916,7 +6039,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 16
"y": 24
},
"id": 60,
"options": {
@ -6020,7 +6143,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 16
"y": 24
},
"id": 66,
"options": {
@ -6124,7 +6247,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 24
"y": 32
},
"id": 61,
"options": {
@ -6228,7 +6351,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 24
"y": 32
},
"id": 65,
"options": {
@ -6331,7 +6454,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 32
"y": 40
},
"id": 88,
"options": {
@ -6430,7 +6553,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 32
"y": 40
},
"id": 84,
"options": {
@ -6533,7 +6656,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 40
"y": 48
},
"id": 90,
"options": {
@ -6599,7 +6722,7 @@
"h": 2,
"w": 24,
"x": 0,
"y": 25
"y": 33
},
"id": 115,
"options": {
@ -6677,7 +6800,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 27
"y": 35
},
"id": 119,
"options": {
@ -6781,7 +6904,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 27
"y": 35
},
"id": 117,
"options": {
@ -6887,7 +7010,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 35
"y": 43
},
"id": 125,
"links": [
@ -7009,7 +7132,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 35
"y": 43
},
"id": 123,
"options": {
@ -7139,7 +7262,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 43
"y": 51
},
"id": 121,
"options": {
@ -7212,8 +7335,8 @@
{
"current": {
"selected": false,
"text": "VictoriaMetrics",
"value": "P4169E866C3094E38"
"text": "VictoriaMetrics - cluster",
"value": "PAF93674D0B4E9963"
},
"hide": 0,
"includeAll": false,

View file

@ -1126,234 +1126,6 @@
"title": "Rules execution errors ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 25
},
"id": 50,
"options": {
"footer": {
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "job"
}
]
},
"pluginVersion": "9.2.7",
"targets": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Non-default flags",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
},
"job": {
"aggregations": [],
"operation": "groupby"
},
"name": {
"aggregations": [],
"operation": "groupby"
},
"value": {
"aggregations": [],
"operation": "groupby"
}
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"description": "Missed evaluation means that group evaluation time takes longer than the configured evaluation interval. \nThis may result in missed alerting notifications or recording rules samples. Try increasing evaluation interval or concurrency for such groups. See https://docs.victoriametrics.com/vmalert/#groups\n\nIf rule expressions are taking longer than expected, please see https://docs.victoriametrics.com/troubleshooting/#slow-queries.\"",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 25
},
"id": 58,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.2.6",
"targets": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(increase(vmalert_iteration_missed_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, group) > 0",
"interval": "1m",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Missed evaluations ($instance)",
"type": "timeseries"
},
{
"collapsed": true,
"datasource": {
@ -1364,7 +1136,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 32
"y": 25
},
"id": 43,
"panels": [
@ -1435,7 +1207,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 3
"y": 11
},
"id": 37,
"links": [
@ -1532,8 +1304,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1549,7 +1320,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 3
"y": 11
},
"id": 57,
"links": [
@ -1663,7 +1434,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 11
"y": 19
},
"id": 35,
"links": [
@ -1779,7 +1550,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 11
"y": 19
},
"id": 56,
"links": [
@ -1912,7 +1683,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 19
"y": 27
},
"id": 39,
"links": [],
@ -2020,7 +1791,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 19
"y": 27
},
"id": 41,
"links": [],
@ -2128,7 +1899,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 27
"y": 35
},
"id": 59,
"links": [],
@ -2219,8 +1990,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2236,7 +2006,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 27
"y": 35
},
"id": 61,
"options": {
@ -2288,6 +2058,359 @@
"title": "Resource usage",
"type": "row"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 26
},
"id": 62,
"panels": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 27
},
"id": 50,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "job"
}
]
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Non-default flags",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
},
"job": {
"aggregations": [],
"operation": "groupby"
},
"name": {
"aggregations": [],
"operation": "groupby"
},
"value": {
"aggregations": [],
"operation": "groupby"
}
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"description": "Missed evaluation means that group evaluation time takes longer than the configured evaluation interval. \nThis may result in missed alerting notifications or recording rules samples. Try increasing evaluation interval or concurrency for such groups. See https://docs.victoriametrics.com/vmalert/#groups\n\nIf rule expressions are taking longer than expected, please see https://docs.victoriametrics.com/troubleshooting/#slow-queries.\"",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 27
},
"id": 58,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.2.6",
"targets": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(increase(vmalert_iteration_missed_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, group) > 0",
"interval": "1m",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Missed evaluations ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 34
},
"id": 63,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.1.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)",
"format": "time_series",
"instant": false,
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Restarts ($job)",
"type": "timeseries"
}
],
"title": "Troubleshooting",
"type": "row"
},
{
"collapsed": true,
"datasource": {
@ -2298,7 +2421,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 33
"y": 27
},
"id": 17,
"panels": [
@ -2364,7 +2487,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 44
"y": 52
},
"id": 14,
"options": {
@ -2466,7 +2589,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 44
"y": 52
},
"id": 13,
"options": {
@ -2568,7 +2691,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 52
"y": 60
},
"id": 20,
"options": {
@ -2671,7 +2794,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 52
"y": 60
},
"id": 32,
"options": {
@ -2770,7 +2893,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 60
"y": 68
},
"id": 26,
"options": {
@ -2831,7 +2954,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 34
"y": 28
},
"id": 28,
"panels": [
@ -2897,7 +3020,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 59
"y": 67
},
"id": 31,
"options": {
@ -2999,7 +3122,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 59
"y": 67
},
"id": 33,
"options": {
@ -3100,7 +3223,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 67
"y": 75
},
"id": 30,
"options": {
@ -3157,7 +3280,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 35
"y": 29
},
"id": 55,
"panels": [
@ -3221,7 +3344,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 25
"y": 33
},
"id": 52,
"options": {
@ -3313,7 +3436,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 25
"y": 33
},
"id": 53,
"options": {
@ -3410,7 +3533,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 52
"y": 60
},
"id": 54,
"options": {
@ -3513,7 +3636,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 52
"y": 60
},
"id": 60,
"options": {
@ -3566,8 +3689,8 @@
{
"current": {
"selected": false,
"text": "VictoriaMetrics",
"value": "P4169E866C3094E38"
"text": "VictoriaMetrics - cluster",
"value": "PAF93674D0B4E9963"
},
"hide": 0,
"includeAll": false,

View file

@ -1209,8 +1209,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1226,7 +1225,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 11
"y": 19
},
"id": 25,
"options": {
@ -1315,8 +1314,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1332,7 +1330,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 11
"y": 19
},
"id": 26,
"options": {
@ -1424,8 +1422,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1441,7 +1438,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 19
"y": 27
},
"id": 27,
"options": {
@ -1581,8 +1578,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1614,7 +1610,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 19
"y": 27
},
"id": 28,
"options": {
@ -1717,8 +1713,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1734,7 +1729,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 27
"y": 35
},
"id": 23,
"options": {
@ -1823,8 +1818,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1840,7 +1834,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 27
"y": 35
},
"id": 24,
"options": {
@ -1932,8 +1926,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1965,7 +1958,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 35
"y": 43
},
"id": 20,
"options": {
@ -2057,8 +2050,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2074,7 +2066,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 35
"y": 43
},
"id": 21,
"options": {
@ -2162,8 +2154,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2179,7 +2170,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 43
"y": 51
},
"id": 29,
"options": {
@ -2453,6 +2444,111 @@
],
"title": "Log errors",
"type": "timeseries"
},
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 47
},
"id": 37,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.1.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)",
"format": "time_series",
"instant": false,
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Restarts ($job)",
"type": "timeseries"
}
],
"title": "Troubleshooting",

View file

@ -1335,8 +1335,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1439,8 +1438,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1618,7 +1616,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1634,7 +1633,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 35
"y": 3
},
"id": 109,
"options": {
@ -1749,7 +1748,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 35
"y": 3
},
"id": 111,
"options": {
@ -1874,7 +1873,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 43
"y": 11
},
"id": 81,
"options": {
@ -2010,7 +2009,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 43
"y": 11
},
"id": 7,
"options": {
@ -2129,7 +2128,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 51
"y": 19
},
"id": 83,
"options": {
@ -2235,7 +2234,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 51
"y": 19
},
"id": 39,
"options": {
@ -2342,7 +2341,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 59
"y": 27
},
"id": 135,
"options": {
@ -2449,7 +2448,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 59
"y": 27
},
"id": 149,
"options": {
@ -2555,7 +2554,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 67
"y": 35
},
"id": 41,
"options": {
@ -2668,7 +2667,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -2684,7 +2684,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 20
"y": 4
},
"id": 92,
"options": {
@ -2772,7 +2772,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -2788,7 +2789,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 20
"y": 4
},
"id": 95,
"options": {
@ -2879,7 +2880,8 @@
"mode": "absolute",
"steps": [
{
"color": "transparent"
"color": "transparent",
"value": null
},
{
"color": "red",
@ -2895,7 +2897,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 28
"y": 12
},
"id": 98,
"options": {
@ -2986,7 +2988,8 @@
"mode": "absolute",
"steps": [
{
"color": "transparent"
"color": "transparent",
"value": null
},
{
"color": "red",
@ -3002,7 +3005,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 28
"y": 12
},
"id": 99,
"options": {
@ -3054,6 +3057,7 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -3067,6 +3071,7 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -3090,7 +3095,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3106,7 +3112,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 36
"y": 20
},
"id": 79,
"options": {
@ -3158,6 +3164,7 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -3171,6 +3178,7 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -3194,7 +3202,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3210,7 +3219,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 36
"y": 20
},
"id": 18,
"links": [
@ -3268,6 +3277,7 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -3281,6 +3291,7 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -3304,7 +3315,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3320,7 +3332,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 44
"y": 28
},
"id": 127,
"options": {
@ -3370,6 +3382,7 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -3383,6 +3396,7 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -3406,7 +3420,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3422,7 +3437,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 44
"y": 28
},
"id": 50,
"options": {
@ -3482,7 +3497,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -3520,13 +3536,15 @@
},
"gridPos": {
"h": 7,
"w": 24,
"w": 12,
"x": 0,
"y": 52
"y": 36
},
"id": 129,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
@ -3541,7 +3559,7 @@
}
]
},
"pluginVersion": "9.2.6",
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": {
@ -3584,6 +3602,111 @@
}
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 36
},
"id": 150,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)",
"format": "time_series",
"instant": false,
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Restarts ($job)",
"type": "timeseries"
}
],
"targets": [
@ -3677,7 +3800,7 @@
"h": 7,
"w": 12,
"x": 0,
"y": 45
"y": 53
},
"id": 48,
"options": {
@ -3783,7 +3906,7 @@
"h": 7,
"w": 12,
"x": 12,
"y": 45
"y": 53
},
"id": 76,
"options": {
@ -3887,7 +4010,7 @@
"h": 7,
"w": 12,
"x": 0,
"y": 52
"y": 60
},
"id": 132,
"options": {
@ -3993,7 +4116,7 @@
"h": 7,
"w": 12,
"x": 12,
"y": 52
"y": 60
},
"id": 133,
"options": {
@ -4098,7 +4221,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 59
"y": 67
},
"id": 20,
"options": {
@ -4202,7 +4325,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 59
"y": 67
},
"id": 126,
"options": {
@ -4305,7 +4428,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 67
"y": 75
},
"id": 46,
"options": {
@ -4408,7 +4531,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 67
"y": 75
},
"id": 148,
"options": {
@ -4511,7 +4634,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 75
"y": 83
},
"id": 31,
"options": {
@ -4690,7 +4813,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 22
"y": 30
},
"id": 73,
"options": {
@ -4807,7 +4930,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 14
"y": 22
},
"id": 131,
"options": {
@ -4911,7 +5034,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 22
"y": 30
},
"id": 130,
"options": {
@ -5028,7 +5151,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 22
"y": 30
},
"id": 77,
"options": {
@ -5162,7 +5285,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 15
"y": 23
},
"id": 146,
"options": {
@ -5264,7 +5387,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 15
"y": 23
},
"id": 143,
"options": {
@ -5360,7 +5483,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 23
"y": 31
},
"id": 147,
"options": {
@ -5463,7 +5586,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 23
"y": 31
},
"id": 139,
"options": {
@ -5574,7 +5697,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 31
"y": 39
},
"id": 142,
"options": {
@ -5671,7 +5794,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 31
"y": 39
},
"id": 137,
"options": {
@ -5784,7 +5907,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 39
"y": 47
},
"id": 141,
"options": {
@ -5915,7 +6038,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 16
"y": 24
},
"id": 60,
"options": {
@ -6019,7 +6142,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 16
"y": 24
},
"id": 66,
"options": {
@ -6123,7 +6246,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 24
"y": 32
},
"id": 61,
"options": {
@ -6227,7 +6350,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 24
"y": 32
},
"id": 65,
"options": {
@ -6330,7 +6453,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 32
"y": 40
},
"id": 88,
"options": {
@ -6429,7 +6552,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 32
"y": 40
},
"id": 84,
"options": {
@ -6532,7 +6655,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 40
"y": 48
},
"id": 90,
"options": {
@ -6598,7 +6721,7 @@
"h": 2,
"w": 24,
"x": 0,
"y": 25
"y": 33
},
"id": 115,
"options": {
@ -6676,7 +6799,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 27
"y": 35
},
"id": 119,
"options": {
@ -6780,7 +6903,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 27
"y": 35
},
"id": 117,
"options": {
@ -6886,7 +7009,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 35
"y": 43
},
"id": 125,
"links": [
@ -7008,7 +7131,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 35
"y": 43
},
"id": 123,
"options": {
@ -7138,7 +7261,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 43
"y": 51
},
"id": 121,
"options": {
@ -7211,8 +7334,8 @@
{
"current": {
"selected": false,
"text": "VictoriaMetrics",
"value": "P4169E866C3094E38"
"text": "VictoriaMetrics - cluster",
"value": "PAF93674D0B4E9963"
},
"hide": 0,
"includeAll": false,

View file

@ -1125,234 +1125,6 @@
"title": "Rules execution errors ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 25
},
"id": 50,
"options": {
"footer": {
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "job"
}
]
},
"pluginVersion": "9.2.7",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Non-default flags",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
},
"job": {
"aggregations": [],
"operation": "groupby"
},
"name": {
"aggregations": [],
"operation": "groupby"
},
"value": {
"aggregations": [],
"operation": "groupby"
}
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Missed evaluation means that group evaluation time takes longer than the configured evaluation interval. \nThis may result in missed alerting notifications or recording rules samples. Try increasing evaluation interval or concurrency for such groups. See https://docs.victoriametrics.com/vmalert/#groups\n\nIf rule expressions are taking longer than expected, please see https://docs.victoriametrics.com/troubleshooting/#slow-queries.\"",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 25
},
"id": 58,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.2.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(increase(vmalert_iteration_missed_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, group) > 0",
"interval": "1m",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Missed evaluations ($instance)",
"type": "timeseries"
},
{
"collapsed": true,
"datasource": {
@ -1363,7 +1135,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 32
"y": 25
},
"id": 43,
"panels": [
@ -1434,7 +1206,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 3
"y": 11
},
"id": 37,
"links": [
@ -1531,8 +1303,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1548,7 +1319,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 3
"y": 11
},
"id": 57,
"links": [
@ -1662,7 +1433,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 11
"y": 19
},
"id": 35,
"links": [
@ -1778,7 +1549,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 11
"y": 19
},
"id": 56,
"links": [
@ -1911,7 +1682,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 19
"y": 27
},
"id": 39,
"links": [],
@ -2019,7 +1790,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 19
"y": 27
},
"id": 41,
"links": [],
@ -2127,7 +1898,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 27
"y": 35
},
"id": 59,
"links": [],
@ -2218,8 +1989,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2235,7 +2005,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 27
"y": 35
},
"id": 61,
"options": {
@ -2287,6 +2057,359 @@
"title": "Resource usage",
"type": "row"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 26
},
"id": 62,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 27
},
"id": 50,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "job"
}
]
},
"pluginVersion": "10.4.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Non-default flags",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
},
"job": {
"aggregations": [],
"operation": "groupby"
},
"name": {
"aggregations": [],
"operation": "groupby"
},
"value": {
"aggregations": [],
"operation": "groupby"
}
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Missed evaluation means that group evaluation time takes longer than the configured evaluation interval. \nThis may result in missed alerting notifications or recording rules samples. Try increasing evaluation interval or concurrency for such groups. See https://docs.victoriametrics.com/vmalert/#groups\n\nIf rule expressions are taking longer than expected, please see https://docs.victoriametrics.com/troubleshooting/#slow-queries.\"",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 27
},
"id": 58,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.2.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(increase(vmalert_iteration_missed_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, group) > 0",
"interval": "1m",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Missed evaluations ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 34
},
"id": 63,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)",
"format": "time_series",
"instant": false,
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Restarts ($job)",
"type": "timeseries"
}
],
"title": "Troubleshooting",
"type": "row"
},
{
"collapsed": true,
"datasource": {
@ -2297,7 +2420,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 33
"y": 27
},
"id": 17,
"panels": [
@ -2363,7 +2486,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 44
"y": 52
},
"id": 14,
"options": {
@ -2465,7 +2588,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 44
"y": 52
},
"id": 13,
"options": {
@ -2567,7 +2690,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 52
"y": 60
},
"id": 20,
"options": {
@ -2670,7 +2793,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 52
"y": 60
},
"id": 32,
"options": {
@ -2769,7 +2892,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 60
"y": 68
},
"id": 26,
"options": {
@ -2830,7 +2953,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 34
"y": 28
},
"id": 28,
"panels": [
@ -2896,7 +3019,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 59
"y": 67
},
"id": 31,
"options": {
@ -2998,7 +3121,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 59
"y": 67
},
"id": 33,
"options": {
@ -3099,7 +3222,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 67
"y": 75
},
"id": 30,
"options": {
@ -3156,7 +3279,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 35
"y": 29
},
"id": 55,
"panels": [
@ -3220,7 +3343,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 25
"y": 33
},
"id": 52,
"options": {
@ -3312,7 +3435,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 25
"y": 33
},
"id": 53,
"options": {
@ -3409,7 +3532,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 52
"y": 60
},
"id": 54,
"options": {
@ -3512,7 +3635,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 52
"y": 60
},
"id": 60,
"options": {
@ -3565,8 +3688,8 @@
{
"current": {
"selected": false,
"text": "VictoriaMetrics",
"value": "P4169E866C3094E38"
"text": "VictoriaMetrics - cluster",
"value": "PAF93674D0B4E9963"
},
"hide": 0,
"includeAll": false,

View file

@ -1208,8 +1208,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1225,7 +1224,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 11
"y": 19
},
"id": 25,
"options": {
@ -1314,8 +1313,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1331,7 +1329,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 11
"y": 19
},
"id": 26,
"options": {
@ -1423,8 +1421,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1440,7 +1437,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 19
"y": 27
},
"id": 27,
"options": {
@ -1580,8 +1577,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1613,7 +1609,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 19
"y": 27
},
"id": 28,
"options": {
@ -1716,8 +1712,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1733,7 +1728,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 27
"y": 35
},
"id": 23,
"options": {
@ -1822,8 +1817,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1839,7 +1833,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 27
"y": 35
},
"id": 24,
"options": {
@ -1931,8 +1925,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1964,7 +1957,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 35
"y": 43
},
"id": 20,
"options": {
@ -2056,8 +2049,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2073,7 +2065,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 35
"y": 43
},
"id": 21,
"options": {
@ -2161,8 +2153,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2178,7 +2169,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 43
"y": 51
},
"id": 29,
"options": {
@ -2452,6 +2443,111 @@
],
"title": "Log errors",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 47
},
"id": 37,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "9.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)",
"format": "time_series",
"instant": false,
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Restarts ($job)",
"type": "timeseries"
}
],
"title": "Troubleshooting",

View file

@ -105,7 +105,7 @@ vmauth config is available [here](ttps://github.com/VictoriaMetrics/VictoriaMetr
## vmalert
vmalert evaluates alerting rules [alerts.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts.yml)
vmalert evaluates alerting rules [alerts.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/alerts.yml)
to track VictoriaMetrics health state. It is connected with AlertManager for firing alerts,
and with VictoriaMetrics for executing queries and storing alert's state.
@ -153,19 +153,19 @@ make docker-cluster-vm-datasource-down # shutdown cluster
See below a list of recommended alerting rules for various VictoriaMetrics components for running in production.
Some alerting rules thresholds are just recommendations and could require an adjustment.
The list of alerting rules is the following:
* [alerts-health.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-health.yml):
* [alerts-health.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/alerts-health.yml):
alerting rules related to all VictoriaMetrics components for tracking their "health" state;
* [alerts.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts.yml):
* [alerts.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/alerts.yml):
alerting rules related to [single-server VictoriaMetrics](https://docs.victoriametrics.com/single-server-victoriametrics/) installation;
* [alerts-cluster.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-cluster.yml):
* [alerts-cluster.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/alerts-cluster.yml):
alerting rules related to [cluster version of VictoriaMetrics](https://docs.victoriametrics.com/cluster-victoriametrics/);
* [alerts-vmagent.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmagent.yml):
* [alerts-vmagent.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/alerts-vmagent.yml):
alerting rules related to [vmagent](https://docs.victoriametrics.com/vmagent/) component;
* [alerts-vmalert.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmalert.yml):
* [alerts-vmalert.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/alerts-vmalert.yml):
alerting rules related to [vmalert](https://docs.victoriametrics.com/vmalert/) component;
* [alerts-vmauth.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmauth.yml):
* [alerts-vmauth.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/alerts-vmauth.yml):
alerting rules related to [vmauth](https://docs.victoriametrics.com/vmauth/) component;
* [alerts-vlogs.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml):
* [alerts-vlogs.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/alerts-vlogs.yml):
alerting rules related to [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/);
Please, also see [how to monitor](https://docs.victoriametrics.com/single-server-victoriametrics/#monitoring)

View file

@ -0,0 +1,9 @@
# route requests between VictoriaMetrics and VictoriaLogs
unauthorized_user:
url_map:
- src_paths:
- "/api/v1/query.*"
url_prefix: "http://victoriametrics:8428"
- src_paths:
- "/select/logsql/.*"
url_prefix: "http://victorialogs:9428"

View file

@ -4,7 +4,7 @@ services:
# And forward them to --remoteWrite.url
vmagent:
container_name: vmagent
image: victoriametrics/vmagent:v1.104.0
image: victoriametrics/vmagent:v1.105.0
depends_on:
- "vminsert"
ports:
@ -13,8 +13,8 @@ services:
- vmagentdata:/vmagentdata
- ./prometheus-cluster.yml:/etc/prometheus/prometheus.yml
command:
- '--promscrape.config=/etc/prometheus/prometheus.yml'
- '--remoteWrite.url=http://vminsert:8480/insert/0/prometheus/'
- "--promscrape.config=/etc/prometheus/prometheus.yml"
- "--remoteWrite.url=http://vminsert:8480/insert/0/prometheus/"
restart: always
# Grafana instance configured with VictoriaMetrics as datasource
@ -39,7 +39,7 @@ services:
# where N is number of vmstorages (2 in this case).
vmstorage-1:
container_name: vmstorage-1
image: victoriametrics/vmstorage:v1.104.0-cluster
image: victoriametrics/vmstorage:v1.105.0-cluster
ports:
- 8482
- 8400
@ -47,11 +47,11 @@ services:
volumes:
- strgdata-1:/storage
command:
- '--storageDataPath=/storage'
- "--storageDataPath=/storage"
restart: always
vmstorage-2:
container_name: vmstorage-2
image: victoriametrics/vmstorage:v1.104.0-cluster
image: victoriametrics/vmstorage:v1.105.0-cluster
ports:
- 8482
- 8400
@ -59,20 +59,20 @@ services:
volumes:
- strgdata-2:/storage
command:
- '--storageDataPath=/storage'
- "--storageDataPath=/storage"
restart: always
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
# pre-process them and distributes across configured vmstorage shards.
vminsert:
container_name: vminsert
image: victoriametrics/vminsert:v1.104.0-cluster
image: victoriametrics/vminsert:v1.105.0-cluster
depends_on:
- "vmstorage-1"
- "vmstorage-2"
command:
- '--storageNode=vmstorage-1:8400'
- '--storageNode=vmstorage-2:8400'
- "--storageNode=vmstorage-1:8400"
- "--storageNode=vmstorage-2:8400"
ports:
- 8480:8480
restart: always
@ -81,27 +81,27 @@ services:
# vmselect collects results from configured `--storageNode` shards.
vmselect-1:
container_name: vmselect-1
image: victoriametrics/vmselect:v1.104.0-cluster
image: victoriametrics/vmselect:v1.105.0-cluster
depends_on:
- "vmstorage-1"
- "vmstorage-2"
command:
- '--storageNode=vmstorage-1:8401'
- '--storageNode=vmstorage-2:8401'
- '--vmalert.proxyURL=http://vmalert:8880'
- "--storageNode=vmstorage-1:8401"
- "--storageNode=vmstorage-2:8401"
- "--vmalert.proxyURL=http://vmalert:8880"
ports:
- 8481
restart: always
vmselect-2:
container_name: vmselect-2
image: victoriametrics/vmselect:v1.104.0-cluster
image: victoriametrics/vmselect:v1.105.0-cluster
depends_on:
- "vmstorage-1"
- "vmstorage-2"
command:
- '--storageNode=vmstorage-1:8401'
- '--storageNode=vmstorage-2:8401'
- '--vmalert.proxyURL=http://vmalert:8880'
- "--storageNode=vmstorage-1:8401"
- "--storageNode=vmstorage-2:8401"
- "--vmalert.proxyURL=http://vmalert:8880"
ports:
- 8481
restart: always
@ -112,14 +112,14 @@ services:
# It can be used as an authentication proxy.
vmauth:
container_name: vmauth
image: victoriametrics/vmauth:v1.104.0
image: victoriametrics/vmauth:v1.105.0
depends_on:
- "vmselect-1"
- "vmselect-2"
volumes:
- ./auth-cluster.yml:/etc/auth.yml
command:
- '--auth.config=/etc/auth.yml'
- "--auth.config=/etc/auth.yml"
ports:
- 8427:8427
restart: always
@ -127,24 +127,24 @@ services:
# vmalert executes alerting and recording rules
vmalert:
container_name: vmalert
image: victoriametrics/vmalert:v1.104.0
image: victoriametrics/vmalert:v1.105.0
depends_on:
- "vmauth"
ports:
- 8880:8880
volumes:
- ./alerts-cluster.yml:/etc/alerts/alerts.yml
- ./alerts-health.yml:/etc/alerts/alerts-health.yml
- ./alerts-vmagent.yml:/etc/alerts/alerts-vmagent.yml
- ./alerts-vmalert.yml:/etc/alerts/alerts-vmalert.yml
- ./rules/alerts-cluster.yml:/etc/alerts/alerts.yml
- ./rules/alerts-health.yml:/etc/alerts/alerts-health.yml
- ./rules/alerts-vmagent.yml:/etc/alerts/alerts-vmagent.yml
- ./rules/alerts-vmalert.yml:/etc/alerts/alerts-vmalert.yml
command:
- '--datasource.url=http://vmauth:8427/select/0/prometheus'
- '--remoteRead.url=http://vmauth:8427/select/0/prometheus'
- '--remoteWrite.url=http://vminsert:8480/insert/0/prometheus'
- '--notifier.url=http://alertmanager:9093/'
- '--rule=/etc/alerts/*.yml'
- "--datasource.url=http://vmauth:8427/select/0/prometheus"
- "--remoteRead.url=http://vmauth:8427/select/0/prometheus"
- "--remoteWrite.url=http://vminsert:8480/insert/0/prometheus"
- "--notifier.url=http://alertmanager:9093/"
- "--rule=/etc/alerts/*.yml"
# display source of alerts in grafana
- '-external.url=http://127.0.0.1:3000' #grafana outside container
- "-external.url=http://127.0.0.1:3000" #grafana outside container
- '--external.alert.source=explore?orgId=1&left={"datasource":"VictoriaMetrics","queries":[{"expr":{{.Expr|jsonEscape|queryEscape}},"refId":"A"}],"range":{"from":"{{ .ActiveAt.UnixMilli }}","to":"now"}}'
restart: always
@ -156,7 +156,7 @@ services:
volumes:
- ./alertmanager.yml:/config/alertmanager.yml
command:
- '--config.file=/config/alertmanager.yml'
- "--config.file=/config/alertmanager.yml"
ports:
- 9093:9093
restart: always

View file

@ -16,7 +16,7 @@ services:
- ./../../dashboards/victoriametrics.json:/var/lib/grafana/dashboards/vm.json
- ./../../dashboards/victorialogs.json:/var/lib/grafana/dashboards/vl.json
environment:
- "GF_INSTALL_PLUGINS=https://github.com/VictoriaMetrics/victorialogs-datasource/releases/download/v0.5.0/victorialogs-datasource-v0.5.0.zip;victorialogs-datasource"
- "GF_INSTALL_PLUGINS=https://github.com/VictoriaMetrics/victorialogs-datasource/releases/download/v0.6.2/victorialogs-datasource-v0.6.2.zip;victorialogs-datasource"
- "GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS=victorialogs-datasource"
networks:
- vm_net
@ -26,7 +26,7 @@ services:
# and forwards them to VictoriaLogs
fluentbit:
container_name: fluentbit
image: cr.fluentbit.io/fluent/fluent-bit:2.1.4
image: fluent/fluent-bit:2.1.4
volumes:
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- ./fluent-bit.conf:/fluent-bit/etc/fluent-bit.conf
@ -36,11 +36,11 @@ services:
networks:
- vm_net
# VictoriaLogs instance, a single process responsible for
# storing logs and serving read queries.
# VictoriaLogs instance, a single process responsible for
# storing logs and serving read queries.
victorialogs:
container_name: victorialogs
image: docker.io/victoriametrics/victoria-logs:v0.37.0-victorialogs
image: victoriametrics/victoria-logs:v0.40.0-victorialogs
command:
- "--storageDataPath=/vlogs"
- "--httpListenAddr=:9428"
@ -55,7 +55,7 @@ services:
# scraping, storing metrics and serve read requests.
victoriametrics:
container_name: victoriametrics
image: victoriametrics/victoria-metrics:v1.104.0
image: victoriametrics/victoria-metrics:v1.105.0
ports:
- 8428:8428
volumes:
@ -69,29 +69,49 @@ services:
- vm_net
restart: always
# vmalert executes alerting and recording rules
vmalert:
container_name: vmalert
image: victoriametrics/vmalert:v1.104.0
# vmauth is a router and balancer for HTTP requests.
# It proxies query requests from vmalert to either VictoriaMetrics or VictoriaLogs,
# depending on the requested path.
vmauth:
container_name: vmauth
image: victoriametrics/vmauth:v1.105.0
depends_on:
- "victoriametrics"
- "victorialogs"
volumes:
- ./auth-mixed-datasource.yml:/etc/auth.yml
command:
- "--auth.config=/etc/auth.yml"
ports:
- 8427:8427
networks:
- vm_net
restart: always
# vmalert executes alerting and recording rules according to given rule type.
vmalert:
container_name: vmalert
image: victoriametrics/vmalert:v1.105.0
depends_on:
- "vmauth"
- "alertmanager"
- "victoriametrics"
ports:
- 8880:8880
volumes:
- ./alerts.yml:/etc/alerts/alerts.yml
- ./alerts-health.yml:/etc/alerts/alerts-health.yml
- ./alerts-vlogs.yml:/etc/alerts/alerts-vlogs.yml
- ./alerts-vmalert.yml:/etc/alerts/alerts-vmalert.yml
- ./rules/alerts.yml:/etc/alerts/alerts.yml
- ./rules/alerts-vlogs.yml:/etc/alerts/vlogs.yml
- ./rules/alerts-health.yml:/etc/alerts/alerts-health.yml
- ./rules/alerts-vmagent.yml:/etc/alerts/alerts-vmagent.yml
- ./rules/alerts-vmalert.yml:/etc/alerts/alerts-vmalert.yml
command:
- "--datasource.url=http://victoriametrics:8428/"
- "--datasource.url=http://vmauth:8427/"
- "--remoteRead.url=http://victoriametrics:8428/"
- "--remoteWrite.url=http://victoriametrics:8428/"
- "--notifier.url=http://alertmanager:9093/"
- "--rule=/etc/alerts/*.yml"
# display source of alerts in grafana
- "--external.url=http://127.0.0.1:3000" #grafana outside container
- '--external.alert.source=explore?orgId=1&left={"datasource":"VictoriaMetrics","queries":[{"expr":{{.Expr|jsonEscape|queryEscape}},"refId":"A"}],"range":{"from":"{{ .ActiveAt.UnixMilli }}","to":"now"}}'
networks:
- vm_net
restart: always

View file

@ -4,7 +4,7 @@ services:
# And forward them to --remoteWrite.url
vmagent:
container_name: vmagent
image: victoriametrics/vmagent:v1.104.0
image: victoriametrics/vmagent:v1.105.0
depends_on:
- "victoriametrics"
ports:
@ -22,7 +22,7 @@ services:
# storing metrics and serve read requests.
victoriametrics:
container_name: victoriametrics
image: victoriametrics/victoria-metrics:v1.104.0
image: victoriametrics/victoria-metrics:v1.105.0
ports:
- 8428:8428
- 8089:8089
@ -65,17 +65,17 @@ services:
# vmalert executes alerting and recording rules
vmalert:
container_name: vmalert
image: victoriametrics/vmalert:v1.104.0
image: victoriametrics/vmalert:v1.105.0
depends_on:
- "victoriametrics"
- "alertmanager"
ports:
- 8880:8880
volumes:
- ./alerts.yml:/etc/alerts/alerts.yml
- ./alerts-health.yml:/etc/alerts/alerts-health.yml
- ./alerts-vmagent.yml:/etc/alerts/alerts-vmagent.yml
- ./alerts-vmalert.yml:/etc/alerts/alerts-vmalert.yml
- ./rules/alerts.yml:/etc/alerts/alerts.yml
- ./rules/alerts-health.yml:/etc/alerts/alerts-health.yml
- ./rules/alerts-vmagent.yml:/etc/alerts/alerts-vmagent.yml
- ./rules/alerts-vmalert.yml:/etc/alerts/alerts-vmalert.yml
command:
- "--datasource.url=http://victoriametrics:8428/"
- "--remoteRead.url=http://victoriametrics:8428/"

View file

@ -74,6 +74,17 @@ groups:
description: "vmalert instance {{ $labels.instance }} is failing to push metrics generated via alerting
or recording rules to the configured remote write URL. Check vmalert's logs for detailed error message."
- alert: RemoteWriteDroppingData
expr: increase(vmalert_remotewrite_dropped_rows_total[5m]) > 0
for: 5m
labels:
severity: critical
annotations:
summary: "vmalert instance {{ $labels.instance }} is dropping data sent to remote write URL"
description: "vmalert instance {{ $labels.instance }} is failing to send results of alerting or recording rules
to the configured remote write URL. This may result into gaps in recording rules or alerts state.
Check vmalert's logs for detailed error message."
- alert: AlertmanagerErrors
expr: increase(vmalert_alerts_send_errors_total[5m]) > 0
for: 15m

View file

@ -0,0 +1,13 @@
groups:
- name: TestGroup
type: vlogs
interval: 1m
rules:
- record: logCount
expr: '_time: 1m | stats by (path) count () as total'
annotations:
description: "path {{$labels.path}} generated {{$value}} logs in the last 1 minute"
- alert: tooManyLogs
expr: '_time: 1m | stats by (path) count () as total | filter total:>50'
annotations:
description: "path {{$labels.path}} generated more than 50 log entries in the last 1 minute: {{$value}}"

View file

@ -1,11 +1,14 @@
services:
# meta service will be ignored by compose
.victorialogs:
image: docker.io/victoriametrics/victoria-logs:v0.37.0-victorialogs
image: docker.io/victoriametrics/victoria-logs:v0.40.0-victorialogs
command:
- -storageDataPath=/vlogs
- -loggerFormat=json
- -syslog.listenAddr.tcp=0.0.0.0:8094
- -journald.streamFields=_HOSTNAME,_SYSTEMD_UNIT,_PID
- -journald.ignoreFields=MESSAGE_ID,INVOCATION_ID,USER_INVOCATION_ID,
- -journald.ignoreFields=_BOOT_ID,_MACHINE_ID,_SYSTEMD_INVOCATION_ID,_STREAM_ID,_UID
deploy:
replicas: 0
healthcheck:

View file

@ -5,6 +5,7 @@ The folder contains examples of [FluentBit](https://docs.fluentbit.io/manual) in
* [loki](./loki)
* [jsonline single node](./jsonline)
* [jsonline HA setup](./jsonline-ha)
* [otlp](./otlp)
To spin-up environment `cd` to any of listed above directories run the following command:
```
@ -32,5 +33,6 @@ FluentBit configuration example can be found below:
* [loki](./loki/fluent-bit.conf)
* [jsonline single node](./jsonline/fluent-bit.conf)
* [jsonline HA setup](./jsonline-ha/fluent-bit.conf)
* [otlp](./otlp/fluent-bit.conf)
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) to achieve better performance.

View file

@ -1,6 +1,13 @@
include:
- ../compose-base.yml
services:
nginx:
image: nginx:1.27
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
depends_on: [fluentbit]
ports:
- "8080:80"
fluentbit:
image: cr.fluentbit.io/fluent/fluent-bit:3.1.7
volumes:

View file

@ -0,0 +1,16 @@
events {
worker_connections 2000;
}
http {
server {
listen 0.0.0.0;
server_name _;
location /opentelemetry/api/v1/push {
proxy_pass http://victoriametrics:8428;
}
location /insert/opentelemetry/v1/logs {
proxy_pass http://victorialogs:9428;
}
}
}

View file

@ -0,0 +1,3 @@
include:
- ../compose.yml
name: fluentbit-loki

View file

@ -0,0 +1,33 @@
[INPUT]
name tail
path /var/lib/docker/containers/**/*.log
path_key path
multiline.parser docker, cri
Parser docker
Docker_Mode On
[INPUT]
Name syslog
Listen 0.0.0.0
Port 5140
Parser syslog-rfc3164
Mode tcp
[INPUT]
name fluentbit_metrics
tag internal_metrics
scrape_interval 2
[SERVICE]
Flush 1
Parsers_File parsers.conf
[OUTPUT]
name opentelemetry
match *
host nginx
logs_uri /insert/opentelemetry/v1/logs
metrics_uri /opentelemetry/api/v1/push
port 80
header VL-Msg-Field log
header VL-Stream-Fields severity

View file

@ -0,0 +1,9 @@
FROM ubuntu:24.04
RUN \
apt update && \
apt install -y \
systemd \
systemd-journal-remote && \
sed -i 's/# URL=/URL=http:\/\/victorialogs:9428\/insert\/journald/g' /etc/systemd/journal-upload.conf && \
systemctl enable systemd-journal-upload.service

View file

@ -0,0 +1,29 @@
# Docker compose Journald integration with VictoriaLogs
The folder contains examples of Journald integration with VictoriaLogs using protocols:
* [journald](./journald)
To spin-up environment `cd` to any of listed above directories run the following command:
```
docker compose up -d
```
To shut down the docker-compose environment run the following command:
```
docker compose down
docker compose rm -f
```
The docker compose file contains the following components:
* journald - Journald logs collection agent, which is configured to collect and write data to `victorialogs`
* victorialogs - VictoriaLogs log database, which accepts the data from `journald`
* victoriametrics - VictoriaMetrics metrics database, which collects metrics from `victorialogs` and `journald`
Querying the data
* [vmui](https://docs.victoriametrics.com/victorialogs/querying/#vmui) - a web UI is accessible by `http://localhost:9428/select/vmui`
* for querying the data via command-line please check [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line)
Please, note that `_stream_fields` parameter must follow recommended [best practices](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) to achieve better performance.

View file

@ -0,0 +1,14 @@
include:
- ../compose-base.yml
services:
journald:
build: .
restart: on-failure
privileged: true
user: root
entrypoint: /lib/systemd/systemd
depends_on:
victorialogs:
condition: service_healthy
victoriametrics:
condition: service_healthy

View file

@ -0,0 +1,3 @@
include:
- ../compose-base.yml
name: journald-remote

Some files were not shown because too many files have changed in this diff Show more