From 3fa72b2c1b8b4210d9ade9b4a1ae1f04fbc93df0 Mon Sep 17 00:00:00 2001
From: Zhu Jiekun <jiekun@victoriametrics.com>
Date: Thu, 26 Sep 2024 15:35:28 +0800
Subject: [PATCH] feature: [victorialogs] drop logs without non-empty _msg
 field (#7056)

### Describe Your Changes

VictoriaLogs allows logs without `_msg` field or `_msg` field is empty.
This lead to incorrect search result. See:
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6785

This pull request search for non-empty `_msg` field before log entry is
added to `LogRows`.

New counter `vl_rows_dropped_total{reason="msg_not_exist"}` is
introduced.

Example log output:
```
2024-09-23T02:33:19.719Z        warn    app/vlinsert/insertutils/common_params.go:189   dropping log line without _msg field; [{@timestamp 2024-09-18T13:42:16.600000000Z} {Attributes.array.attribute ["many","values"]} {Attributes.boolean.attribute true} {Attributes.double.attribute 637.704} {Attributes.int.attribute 10} {Attributes.map.attribute.some.map.key some value} {Attributes.string.attribute some string} {Body Example ddddddddddlog record} {Resource.service.name my.service} {Scope.my.scope.attribute some scope attribute} {Scope.name my.library} {Scope.version 1.0.0} {SeverityNumber 10} {SeverityText Information} {SpanId eee19b7ec3c1b174} {TraceFlags 0} {TraceId 5b8efff798038103d269b633813fc60c}]
```

### Checklist

The following checks are **mandatory**:

- [x] My change adheres [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/contributing/).
- [ ] Benchmark for potential performance loss.

---------

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
---
 app/vlinsert/insertutils/common_params.go | 16 ++++++++++++++++
 docs/VictoriaLogs/CHANGELOG.md            |  1 +
 2 files changed, 17 insertions(+)

diff --git a/app/vlinsert/insertutils/common_params.go b/app/vlinsert/insertutils/common_params.go
index 12a56ee99d..df4df79c4a 100644
--- a/app/vlinsert/insertutils/common_params.go
+++ b/app/vlinsert/insertutils/common_params.go
@@ -176,6 +176,21 @@ func (lmp *logMessageProcessor) AddRow(timestamp int64, fields []logstorage.Fiel
 		return
 	}
 
+	// _msg field must exist and not empty
+	msgExist := false
+	for i := range fields {
+		if fields[i].Name == `_msg` {
+			msgExist = len(fields[i].Value) > 0
+			break
+		}
+	}
+	if !msgExist {
+		rf := logstorage.RowFormatter(fields)
+		logger.Warnf("dropping log line without _msg field; %s", rf)
+		rowsDroppedTotalMsgNotValid.Inc()
+		return
+	}
+
 	lmp.lr.MustAdd(lmp.cp.TenantID, timestamp, fields)
 	if lmp.cp.Debug {
 		s := lmp.lr.GetRowString(0)
@@ -225,4 +240,5 @@ func (cp *CommonParams) NewLogMessageProcessor() LogMessageProcessor {
 var (
 	rowsDroppedTotalDebug         = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)
 	rowsDroppedTotalTooManyFields = metrics.NewCounter(`vl_rows_dropped_total{reason="too_many_fields"}`)
+	rowsDroppedTotalMsgNotValid   = metrics.NewCounter(`vl_rows_dropped_total{reason="msg_not_exist"}`)
 )
diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md
index a56ba2bbf6..0b6b46c1dc 100644
--- a/docs/VictoriaLogs/CHANGELOG.md
+++ b/docs/VictoriaLogs/CHANGELOG.md
@@ -16,6 +16,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
 ## tip
 
 * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add button for enabling auto refresh, similarly to VictoriaMetrics vmui. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7017).
+* FEATURE: drop logs without [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) field or with empty `_msg` field, since this field is required to be non-empty in [VictoriaLogs data model](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6785).
 * FEATURE: improve performance of analytical queries, which do not need reading the `_time` field. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7070).
 * FEATURE: add [`blocks_count` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#blocks_count-pipe), which can be used for counting the number of matching blocks for the given query. For example, `_time:5m | blocks_count` returns the number of blocks with logs for the last 5 minutes. This pipe can be useful for debugging purposes.