mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/mergeset: adds tracking for indexdb records drop (#6297)
It allows to create alert for possible item drops at indexdb. It may happen, if ingested metric size exceeds max indexdb item size. --------- Signed-off-by: hagen1778 <roman@victoriametrics.com> Co-authored-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> Co-authored-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
0915d6d841
commit
69d244e6fb
6 changed files with 26 additions and 6 deletions
|
@ -195,7 +195,8 @@ func SearchLabelNamesWithFiltersOnTimeRange(qt *querytracer.Tracer, tfss []*stor
|
|||
|
||||
// SearchLabelValuesWithFiltersOnTimeRange searches for label values for the given labelName, tfss and tr.
|
||||
func SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, labelName string, tfss []*storage.TagFilters,
|
||||
tr storage.TimeRange, maxLabelValues, maxMetrics int, deadline uint64) ([]string, error) {
|
||||
tr storage.TimeRange, maxLabelValues, maxMetrics int, deadline uint64,
|
||||
) ([]string, error) {
|
||||
WG.Add(1)
|
||||
labelValues, err := Storage.SearchLabelValuesWithFiltersOnTimeRange(qt, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
|
||||
WG.Done()
|
||||
|
@ -492,6 +493,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
|||
|
||||
metrics.WriteCounterUint64(w, `vm_indexdb_items_added_total`, idbm.ItemsAdded)
|
||||
metrics.WriteCounterUint64(w, `vm_indexdb_items_added_size_bytes_total`, idbm.ItemsAddedSizeBytes)
|
||||
metrics.WriteCounterUint64(w, `vm_indexdb_items_dropped_total{reason="too_long_item"}`, idbm.TooLongItemsDroppedTotal)
|
||||
|
||||
metrics.WriteGaugeUint64(w, `vm_pending_rows{type="storage"}`, tm.PendingRows)
|
||||
metrics.WriteGaugeUint64(w, `vm_pending_rows{type="indexdb"}`, idbm.PendingItems)
|
||||
|
|
|
@ -175,5 +175,4 @@ groups:
|
|||
description: "The connection between vminsert (instance {{ $labels.instance }}) and vmstorage (instance {{ $labels.addr }})
|
||||
is saturated by more than 90% and vminsert won't be able to keep up.\n
|
||||
This usually means that more vminsert or vmstorage nodes must be added to the cluster in order to increase
|
||||
the total number of vminsert -> vmstorage links."
|
||||
|
||||
the total number of vminsert -> vmstorage links."
|
|
@ -87,3 +87,14 @@ groups:
|
|||
In some cases for components like vmagent or vminsert the alert might trigger if there are too many clients
|
||||
making write attempts. If vmagent's or vminsert's CPU usage and network saturation are at normal level, then
|
||||
it might be worth adjusting `-maxConcurrentInserts` cmd-line flag."
|
||||
|
||||
- alert: IndexDBRecordsDrop
|
||||
expr: increase(vm_indexdb_items_dropped_total[5m]) > 0
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "IndexDB skipped registering items during data ingestion with reason={{ $labels.reason }}."
|
||||
description: "VictoriaMetrics could skip registering new timeseries during ingestion if they fail the validation process.
|
||||
For example, `reason=too_long_item` means that time series cannot exceed 64KB. Please, reduce the number
|
||||
of labels or label values for such series. Or enforce these limits via `-maxLabelsPerTimeseries` and
|
||||
`-maxLabelValueLen` command-line flags."
|
|
@ -130,4 +130,4 @@ groups:
|
|||
summary: "Metrics ingested in ({{ $labels.instance }}) are exceeding labels limit"
|
||||
description: "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n
|
||||
This prevents ingestion of metrics with too many labels. Please verify that `-maxLabelsPerTimeseries` is configured
|
||||
correctly or that clients which send these metrics aren't misbehaving."
|
||||
correctly or that clients which send these metrics aren't misbehaving."
|
|
@ -43,6 +43,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
|
|||
* FEATURE: [dashboards/single](https://grafana.com/grafana/dashboards/10229): add `Network Usage` panel to `Resource Usage` row.
|
||||
* FEATURE: [dashboards/operator](https://grafana.com/grafana/dashboards/17869), [dashboards/backupmanager](https://grafana.com/grafana/dashboards/17798) and [dashboard/tenant-statistic](https://grafana.com/grafana/dashboards/16399): update dashboard to be compatible with Grafana 10+ version.
|
||||
* FEATURE: [dashboards/cluster](https://grafana.com/grafana/dashboards/11176): add new panel `Concurrent selects` to `vmstorage` row. The panel will show how many ongoing select queries are processed by vmstorage and should help to identify resource bottlenecks. See panel description for more details.
|
||||
* FEATURE: [dashboards](https://grafana.com/orgs/victoriametrics): use `$__interval` variable for offsets and look-behind windows in annotations. This should improve precision of `restarts` and `version change` annotations when zooming-in/zooming-out on the dashboards.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): support aggregation and deduplication configs before replicating data to configured `-remoteWrite.url` destinations. This saves CPU and memory resources when incoming data needs to be aggregated or deduplicated once and then replicated to multiple destinations. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5467).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add service discovery support for [Vultr](https://www.vultr.com/). See [these docs](https://docs.victoriametrics.com/sd_configs/#vultr_sd_configs) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6041).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): allow configuring `-remoteWrite.disableOnDiskQueue` and `-remoteWrite.dropSamplesOnOverload` cmd-line flags per each `-remoteWrite.url`. See this [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6065). Thanks to @rbizos for implementaion!
|
||||
|
@ -52,7 +53,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
|
|||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): speed up retrieving rules files from object storages by skipping unchanged objects during reloading. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6210).
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): support reading [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) records in `-datasource.url`, `-remoteWrite.url` and `-remoteRead.url` command-line option. For example, `-remoteWrite.url=http://srv+victoria-metrics` automatically resolves the `victoria-metrics` DNS SRV to a list of hostnames with TCP ports and then sends data to one of the addresses. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6053).
|
||||
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmrestore](https://docs.victoriametrics.com/vmrestore/), [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): add `-s3TLSInsecureSkipVerify` command-line flag for skipping TLS certificates verification when connecting to S3 endpoint.
|
||||
* FEATURE: [dashboards](https://grafana.com/orgs/victoriametrics): use `$__interval` variable for offsets and look-behind windows in annotations. This should improve precision of `restarts` and `version change` annotations when zooming-in/zooming-out on the dashboards.
|
||||
* FEATURE: expose metric `vm_indexdb_items_dropped_total` to track the number of IndexDB records that had to be dropped during ingestion. The reason of dropping the record will be annotated in `reason` label of the exposed metric. This change also comes with a new [alerting rule](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-health.yml) to track changes of this metric.
|
||||
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix bug that prevents the first query trace from expanding on click event. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6186). The issue was introduced in [v1.100.0](https://docs.victoriametrics.com/changelog/#v11000) release.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix calendar display when `UTC+00:00` timezone is set. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6239).
|
||||
|
|
|
@ -276,7 +276,8 @@ func (ris *rawItemsShard) addItems(items [][]byte) ([][]byte, []*inmemoryBlock)
|
|||
if len(itemPrefix) > 128 {
|
||||
itemPrefix = itemPrefix[:128]
|
||||
}
|
||||
tooLongItemLogger.Errorf("skipping adding too long item to indexdb: len(item)=%d; it souldn't exceed %d bytes; item prefix=%q", len(item), maxInmemoryBlockSize, itemPrefix)
|
||||
tooLongItemsTotal.Add(1)
|
||||
tooLongItemLogger.Errorf("skipping adding too long item to indexdb: len(item)=%d; it shouldn't exceed %d bytes; item prefix=%q", len(item), maxInmemoryBlockSize, itemPrefix)
|
||||
}
|
||||
ris.ibs = ibs
|
||||
ris.mu.Unlock()
|
||||
|
@ -290,6 +291,8 @@ func (ris *rawItemsShard) updateFlushDeadline() {
|
|||
|
||||
var tooLongItemLogger = logger.WithThrottler("tooLongItem", 5*time.Second)
|
||||
|
||||
var tooLongItemsTotal atomic.Uint64
|
||||
|
||||
type partWrapper struct {
|
||||
// refCount is the number of references to partWrapper
|
||||
refCount atomic.Int32
|
||||
|
@ -575,6 +578,8 @@ type TableMetrics struct {
|
|||
IndexBlocksCacheMisses uint64
|
||||
|
||||
PartsRefCount uint64
|
||||
|
||||
TooLongItemsDroppedTotal uint64
|
||||
}
|
||||
|
||||
// TotalItemsCount returns the total number of items in the table.
|
||||
|
@ -632,6 +637,8 @@ func (tb *Table) UpdateMetrics(m *TableMetrics) {
|
|||
m.IndexBlocksCacheSizeMaxBytes = uint64(idxbCache.SizeMaxBytes())
|
||||
m.IndexBlocksCacheRequests = idxbCache.Requests()
|
||||
m.IndexBlocksCacheMisses = idxbCache.Misses()
|
||||
|
||||
m.TooLongItemsDroppedTotal += tooLongItemsTotal.Load()
|
||||
}
|
||||
|
||||
// AddItems adds the given items to the tb.
|
||||
|
|
Loading…
Reference in a new issue