mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
Merge branch 'public-single-node' into victorialogs-wip
This commit is contained in:
commit
07d244dab0
7 changed files with 48 additions and 18 deletions
|
@ -195,7 +195,8 @@ func SearchLabelNamesWithFiltersOnTimeRange(qt *querytracer.Tracer, tfss []*stor
|
||||||
|
|
||||||
// SearchLabelValuesWithFiltersOnTimeRange searches for label values for the given labelName, tfss and tr.
|
// SearchLabelValuesWithFiltersOnTimeRange searches for label values for the given labelName, tfss and tr.
|
||||||
func SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, labelName string, tfss []*storage.TagFilters,
|
func SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, labelName string, tfss []*storage.TagFilters,
|
||||||
tr storage.TimeRange, maxLabelValues, maxMetrics int, deadline uint64) ([]string, error) {
|
tr storage.TimeRange, maxLabelValues, maxMetrics int, deadline uint64,
|
||||||
|
) ([]string, error) {
|
||||||
WG.Add(1)
|
WG.Add(1)
|
||||||
labelValues, err := Storage.SearchLabelValuesWithFiltersOnTimeRange(qt, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
|
labelValues, err := Storage.SearchLabelValuesWithFiltersOnTimeRange(qt, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
|
||||||
WG.Done()
|
WG.Done()
|
||||||
|
@ -492,6 +493,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||||
|
|
||||||
metrics.WriteCounterUint64(w, `vm_indexdb_items_added_total`, idbm.ItemsAdded)
|
metrics.WriteCounterUint64(w, `vm_indexdb_items_added_total`, idbm.ItemsAdded)
|
||||||
metrics.WriteCounterUint64(w, `vm_indexdb_items_added_size_bytes_total`, idbm.ItemsAddedSizeBytes)
|
metrics.WriteCounterUint64(w, `vm_indexdb_items_added_size_bytes_total`, idbm.ItemsAddedSizeBytes)
|
||||||
|
metrics.WriteCounterUint64(w, `vm_indexdb_items_dropped_total{reason="too_long_item"}`, idbm.TooLongItemsDroppedTotal)
|
||||||
|
|
||||||
metrics.WriteGaugeUint64(w, `vm_pending_rows{type="storage"}`, tm.PendingRows)
|
metrics.WriteGaugeUint64(w, `vm_pending_rows{type="storage"}`, tm.PendingRows)
|
||||||
metrics.WriteGaugeUint64(w, `vm_pending_rows{type="indexdb"}`, idbm.PendingItems)
|
metrics.WriteGaugeUint64(w, `vm_pending_rows{type="indexdb"}`, idbm.PendingItems)
|
||||||
|
|
|
@ -140,17 +140,6 @@ groups:
|
||||||
for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series.
|
for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series.
|
||||||
See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183"
|
See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183"
|
||||||
|
|
||||||
- alert: ProcessNearFDLimits
|
|
||||||
expr: (process_max_fds - process_open_fds) < 100
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=117&var-instance={{ $labels.instance }}"
|
|
||||||
summary: "Number of free file descriptors is less than 100 for \"{{ $labels.job }}\"(\"{{ $labels.instance }}\") for the last 5m"
|
|
||||||
description: "Exhausting OS file descriptors limit can cause severe degradation of the process.
|
|
||||||
Consider to increase the limit as fast as possible."
|
|
||||||
|
|
||||||
- alert: LabelsLimitExceededOnIngestion
|
- alert: LabelsLimitExceededOnIngestion
|
||||||
expr: increase(vm_metrics_with_dropped_labels_total[5m]) > 0
|
expr: increase(vm_metrics_with_dropped_labels_total[5m]) > 0
|
||||||
for: 15m
|
for: 15m
|
||||||
|
@ -176,4 +165,3 @@ groups:
|
||||||
is saturated by more than 90% and vminsert won't be able to keep up.\n
|
is saturated by more than 90% and vminsert won't be able to keep up.\n
|
||||||
This usually means that more vminsert or vmstorage nodes must be added to the cluster in order to increase
|
This usually means that more vminsert or vmstorage nodes must be added to the cluster in order to increase
|
||||||
the total number of vminsert -> vmstorage links."
|
the total number of vminsert -> vmstorage links."
|
||||||
|
|
||||||
|
|
|
@ -87,3 +87,34 @@ groups:
|
||||||
In some cases for components like vmagent or vminsert the alert might trigger if there are too many clients
|
In some cases for components like vmagent or vminsert the alert might trigger if there are too many clients
|
||||||
making write attempts. If vmagent's or vminsert's CPU usage and network saturation are at normal level, then
|
making write attempts. If vmagent's or vminsert's CPU usage and network saturation are at normal level, then
|
||||||
it might be worth adjusting `-maxConcurrentInserts` cmd-line flag."
|
it might be worth adjusting `-maxConcurrentInserts` cmd-line flag."
|
||||||
|
|
||||||
|
- alert: IndexDBRecordsDrop
|
||||||
|
expr: increase(vm_indexdb_items_dropped_total[5m]) > 0
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "IndexDB skipped registering items during data ingestion with reason={{ $labels.reason }}."
|
||||||
|
description: "VictoriaMetrics could skip registering new timeseries during ingestion if they fail the validation process.
|
||||||
|
For example, `reason=too_long_item` means that time series cannot exceed 64KB. Please, reduce the number
|
||||||
|
of labels or label values for such series. Or enforce these limits via `-maxLabelsPerTimeseries` and
|
||||||
|
`-maxLabelValueLen` command-line flags."
|
||||||
|
|
||||||
|
- alert: TooLongLabelValues
|
||||||
|
expr: increase(vm_too_long_label_values_total[5m]) > 0
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "VictoriaMetrics truncates too long label values"
|
||||||
|
description: "The maximum length of a label value is limited via `-maxLabelValueLen` cmd-line flag.
|
||||||
|
Longer label values are truncated and may result into time series overlapping.
|
||||||
|
Please, check your logs to find which labels were truncated and
|
||||||
|
either reduce the size of label values or increase `-maxLabelValueLen`".
|
||||||
|
|
||||||
|
- alert: TooLongLabelNames
|
||||||
|
expr: increase(vm_too_long_label_names_total[5m]) > 0
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "VictoriaMetrics truncates too long label names"
|
||||||
|
description: "The maximum length of a label name is limited by 256 bytes.
|
||||||
|
Longer label names are truncated and may result into time series overlapping.".
|
|
@ -43,6 +43,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
|
||||||
* FEATURE: [dashboards/single](https://grafana.com/grafana/dashboards/10229): add `Network Usage` panel to `Resource Usage` row.
|
* FEATURE: [dashboards/single](https://grafana.com/grafana/dashboards/10229): add `Network Usage` panel to `Resource Usage` row.
|
||||||
* FEATURE: [dashboards/operator](https://grafana.com/grafana/dashboards/17869), [dashboards/backupmanager](https://grafana.com/grafana/dashboards/17798) and [dashboard/tenant-statistic](https://grafana.com/grafana/dashboards/16399): update dashboard to be compatible with Grafana 10+ version.
|
* FEATURE: [dashboards/operator](https://grafana.com/grafana/dashboards/17869), [dashboards/backupmanager](https://grafana.com/grafana/dashboards/17798) and [dashboard/tenant-statistic](https://grafana.com/grafana/dashboards/16399): update dashboard to be compatible with Grafana 10+ version.
|
||||||
* FEATURE: [dashboards/cluster](https://grafana.com/grafana/dashboards/11176): add new panel `Concurrent selects` to `vmstorage` row. The panel will show how many ongoing select queries are processed by vmstorage and should help to identify resource bottlenecks. See panel description for more details.
|
* FEATURE: [dashboards/cluster](https://grafana.com/grafana/dashboards/11176): add new panel `Concurrent selects` to `vmstorage` row. The panel will show how many ongoing select queries are processed by vmstorage and should help to identify resource bottlenecks. See panel description for more details.
|
||||||
|
* FEATURE: [dashboards](https://grafana.com/orgs/victoriametrics): use `$__interval` variable for offsets and look-behind windows in annotations. This should improve precision of `restarts` and `version change` annotations when zooming-in/zooming-out on the dashboards.
|
||||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): support aggregation and deduplication configs before replicating data to configured `-remoteWrite.url` destinations. This saves CPU and memory resources when incoming data needs to be aggregated or deduplicated once and then replicated to multiple destinations. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5467).
|
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): support aggregation and deduplication configs before replicating data to configured `-remoteWrite.url` destinations. This saves CPU and memory resources when incoming data needs to be aggregated or deduplicated once and then replicated to multiple destinations. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5467).
|
||||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add service discovery support for [Vultr](https://www.vultr.com/). See [these docs](https://docs.victoriametrics.com/sd_configs/#vultr_sd_configs) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6041).
|
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add service discovery support for [Vultr](https://www.vultr.com/). See [these docs](https://docs.victoriametrics.com/sd_configs/#vultr_sd_configs) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6041).
|
||||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): allow configuring `-remoteWrite.disableOnDiskQueue` and `-remoteWrite.dropSamplesOnOverload` cmd-line flags per each `-remoteWrite.url`. See this [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6065). Thanks to @rbizos for implementaion!
|
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): allow configuring `-remoteWrite.disableOnDiskQueue` and `-remoteWrite.dropSamplesOnOverload` cmd-line flags per each `-remoteWrite.url`. See this [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6065). Thanks to @rbizos for implementaion!
|
||||||
|
@ -52,7 +53,8 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
|
||||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): speed up retrieving rules files from object storages by skipping unchanged objects during reloading. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6210).
|
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): speed up retrieving rules files from object storages by skipping unchanged objects during reloading. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6210).
|
||||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): support reading [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) records in `-datasource.url`, `-remoteWrite.url` and `-remoteRead.url` command-line option. For example, `-remoteWrite.url=http://srv+victoria-metrics` automatically resolves the `victoria-metrics` DNS SRV to a list of hostnames with TCP ports and then sends data to one of the addresses. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6053).
|
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): support reading [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) records in `-datasource.url`, `-remoteWrite.url` and `-remoteRead.url` command-line option. For example, `-remoteWrite.url=http://srv+victoria-metrics` automatically resolves the `victoria-metrics` DNS SRV to a list of hostnames with TCP ports and then sends data to one of the addresses. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6053).
|
||||||
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmrestore](https://docs.victoriametrics.com/vmrestore/), [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): add `-s3TLSInsecureSkipVerify` command-line flag for skipping TLS certificates verification when connecting to S3 endpoint.
|
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmrestore](https://docs.victoriametrics.com/vmrestore/), [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): add `-s3TLSInsecureSkipVerify` command-line flag for skipping TLS certificates verification when connecting to S3 endpoint.
|
||||||
* FEATURE: [dashboards](https://grafana.com/orgs/victoriametrics): use `$__interval` variable for offsets and look-behind windows in annotations. This should improve precision of `restarts` and `version change` annotations when zooming-in/zooming-out on the dashboards.
|
* FEATURE: expose metric `vm_indexdb_items_dropped_total` to track the number of IndexDB records that had to be dropped during ingestion. The reason of dropping the record will be annotated in `reason` label of the exposed metric. This change also comes with a new [alerting rule](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-health.yml) to track changes of this metric.
|
||||||
|
* FEATURE: [alerts-health](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-health.yml): add new alerting rules `TooLongLabelValues` and `TooLongLabelNames` to notify about truncation of label values or names respectively.
|
||||||
|
|
||||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix bug that prevents the first query trace from expanding on click event. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6186). The issue was introduced in [v1.100.0](https://docs.victoriametrics.com/changelog/#v11000) release.
|
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix bug that prevents the first query trace from expanding on click event. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6186). The issue was introduced in [v1.100.0](https://docs.victoriametrics.com/changelog/#v11000) release.
|
||||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix calendar display when `UTC+00:00` timezone is set. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6239).
|
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix calendar display when `UTC+00:00` timezone is set. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6239).
|
||||||
|
|
|
@ -1307,7 +1307,7 @@ Below is the output for `/path/to/vmselect -help`:
|
||||||
-blockcache.missesBeforeCaching int
|
-blockcache.missesBeforeCaching int
|
||||||
The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
|
The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
|
||||||
-cacheDataPath string
|
-cacheDataPath string
|
||||||
Path to directory for cache files. Cache isn't saved if empty
|
Path to directory for cache files. By default, the cache is not persisted.
|
||||||
-cacheExpireDuration duration
|
-cacheExpireDuration duration
|
||||||
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
|
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
|
||||||
-cluster.tls
|
-cluster.tls
|
||||||
|
|
|
@ -276,7 +276,8 @@ func (ris *rawItemsShard) addItems(items [][]byte) ([][]byte, []*inmemoryBlock)
|
||||||
if len(itemPrefix) > 128 {
|
if len(itemPrefix) > 128 {
|
||||||
itemPrefix = itemPrefix[:128]
|
itemPrefix = itemPrefix[:128]
|
||||||
}
|
}
|
||||||
tooLongItemLogger.Errorf("skipping adding too long item to indexdb: len(item)=%d; it souldn't exceed %d bytes; item prefix=%q", len(item), maxInmemoryBlockSize, itemPrefix)
|
tooLongItemsTotal.Add(1)
|
||||||
|
tooLongItemLogger.Errorf("skipping adding too long item to indexdb: len(item)=%d; it shouldn't exceed %d bytes; item prefix=%q", len(item), maxInmemoryBlockSize, itemPrefix)
|
||||||
}
|
}
|
||||||
ris.ibs = ibs
|
ris.ibs = ibs
|
||||||
ris.mu.Unlock()
|
ris.mu.Unlock()
|
||||||
|
@ -290,6 +291,8 @@ func (ris *rawItemsShard) updateFlushDeadline() {
|
||||||
|
|
||||||
var tooLongItemLogger = logger.WithThrottler("tooLongItem", 5*time.Second)
|
var tooLongItemLogger = logger.WithThrottler("tooLongItem", 5*time.Second)
|
||||||
|
|
||||||
|
var tooLongItemsTotal atomic.Uint64
|
||||||
|
|
||||||
type partWrapper struct {
|
type partWrapper struct {
|
||||||
// refCount is the number of references to partWrapper
|
// refCount is the number of references to partWrapper
|
||||||
refCount atomic.Int32
|
refCount atomic.Int32
|
||||||
|
@ -575,6 +578,8 @@ type TableMetrics struct {
|
||||||
IndexBlocksCacheMisses uint64
|
IndexBlocksCacheMisses uint64
|
||||||
|
|
||||||
PartsRefCount uint64
|
PartsRefCount uint64
|
||||||
|
|
||||||
|
TooLongItemsDroppedTotal uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
// TotalItemsCount returns the total number of items in the table.
|
// TotalItemsCount returns the total number of items in the table.
|
||||||
|
@ -632,6 +637,8 @@ func (tb *Table) UpdateMetrics(m *TableMetrics) {
|
||||||
m.IndexBlocksCacheSizeMaxBytes = uint64(idxbCache.SizeMaxBytes())
|
m.IndexBlocksCacheSizeMaxBytes = uint64(idxbCache.SizeMaxBytes())
|
||||||
m.IndexBlocksCacheRequests = idxbCache.Requests()
|
m.IndexBlocksCacheRequests = idxbCache.Requests()
|
||||||
m.IndexBlocksCacheMisses = idxbCache.Misses()
|
m.IndexBlocksCacheMisses = idxbCache.Misses()
|
||||||
|
|
||||||
|
m.TooLongItemsDroppedTotal += tooLongItemsTotal.Load()
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddItems adds the given items to the tb.
|
// AddItems adds the given items to the tb.
|
||||||
|
|
Loading…
Reference in a new issue