From 7185fe012b14bed0c1a9850a2d054e609e22dff8 Mon Sep 17 00:00:00 2001 From: Zhu Jiekun Date: Thu, 26 Sep 2024 15:35:28 +0800 Subject: [PATCH] feature: [victorialogs] drop logs without non-empty _msg field (#7056) ### Describe Your Changes VictoriaLogs allows logs without `_msg` field or `_msg` field is empty. This lead to incorrect search result. See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6785 This pull request search for non-empty `_msg` field before log entry is added to `LogRows`. New counter `vl_rows_dropped_total{reason="msg_not_exist"}` is introduced. Example log output: ``` 2024-09-23T02:33:19.719Z warn app/vlinsert/insertutils/common_params.go:189 dropping log line without _msg field; [{@timestamp 2024-09-18T13:42:16.600000000Z} {Attributes.array.attribute ["many","values"]} {Attributes.boolean.attribute true} {Attributes.double.attribute 637.704} {Attributes.int.attribute 10} {Attributes.map.attribute.some.map.key some value} {Attributes.string.attribute some string} {Body Example ddddddddddlog record} {Resource.service.name my.service} {Scope.my.scope.attribute some scope attribute} {Scope.name my.library} {Scope.version 1.0.0} {SeverityNumber 10} {SeverityText Information} {SpanId eee19b7ec3c1b174} {TraceFlags 0} {TraceId 5b8efff798038103d269b633813fc60c}] ``` ### Checklist The following checks are **mandatory**: - [x] My change adheres [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/contributing/). - [ ] Benchmark for potential performance loss. --------- Co-authored-by: Aliaksandr Valialkin --- app/vlinsert/insertutils/common_params.go | 16 ++++++++++++++++ docs/VictoriaLogs/CHANGELOG.md | 1 + 2 files changed, 17 insertions(+) diff --git a/app/vlinsert/insertutils/common_params.go b/app/vlinsert/insertutils/common_params.go index 12a56ee99..df4df79c4 100644 --- a/app/vlinsert/insertutils/common_params.go +++ b/app/vlinsert/insertutils/common_params.go @@ -176,6 +176,21 @@ func (lmp *logMessageProcessor) AddRow(timestamp int64, fields []logstorage.Fiel return } + // _msg field must exist and not empty + msgExist := false + for i := range fields { + if fields[i].Name == `_msg` { + msgExist = len(fields[i].Value) > 0 + break + } + } + if !msgExist { + rf := logstorage.RowFormatter(fields) + logger.Warnf("dropping log line without _msg field; %s", rf) + rowsDroppedTotalMsgNotValid.Inc() + return + } + lmp.lr.MustAdd(lmp.cp.TenantID, timestamp, fields) if lmp.cp.Debug { s := lmp.lr.GetRowString(0) @@ -225,4 +240,5 @@ func (cp *CommonParams) NewLogMessageProcessor() LogMessageProcessor { var ( rowsDroppedTotalDebug = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`) rowsDroppedTotalTooManyFields = metrics.NewCounter(`vl_rows_dropped_total{reason="too_many_fields"}`) + rowsDroppedTotalMsgNotValid = metrics.NewCounter(`vl_rows_dropped_total{reason="msg_not_exist"}`) ) diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index a56ba2bbf..0b6b46c1d 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -16,6 +16,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add button for enabling auto refresh, similarly to VictoriaMetrics vmui. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7017). +* FEATURE: drop logs without [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) field or with empty `_msg` field, since this field is required to be non-empty in [VictoriaLogs data model](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6785). * FEATURE: improve performance of analytical queries, which do not need reading the `_time` field. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7070). * FEATURE: add [`blocks_count` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#blocks_count-pipe), which can be used for counting the number of matching blocks for the given query. For example, `_time:5m | blocks_count` returns the number of blocks with logs for the last 5 minutes. This pipe can be useful for debugging purposes.