diff --git a/app/vlinsert/insertutils/common_params.go b/app/vlinsert/insertutils/common_params.go index 12b9f1103..f2170da62 100644 --- a/app/vlinsert/insertutils/common_params.go +++ b/app/vlinsert/insertutils/common_params.go @@ -1,6 +1,7 @@ package insertutils import ( + "flag" "net/http" "strings" "sync" @@ -16,6 +17,11 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil" ) +var ( + defaultMsgValue = flag.String("defaultMsgValue", "missing _msg field; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field", + "Default value for _msg field if the ingested log entry doesn't contain it; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field") +) + // CommonParams contains common HTTP parameters used by log ingestion APIs. // // See https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters @@ -140,6 +146,8 @@ type logMessageProcessor struct { stopCh chan struct{} lastFlushTime time.Time + tmpFields []logstorage.Field + cp *CommonParams lr *logstorage.LogRows } @@ -182,20 +190,15 @@ func (lmp *logMessageProcessor) AddRow(timestamp int64, fields []logstorage.Fiel return } - // _msg field must be non-empty according to VictoriaLogs data model. - // See https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field - msgExist := false - for i := range fields { - if fields[i].Name == "_msg" { - msgExist = len(fields[i].Value) > 0 - break - } - } - if !msgExist { - rf := logstorage.RowFormatter(fields) - logger.Warnf("dropping log line without _msg field; %s", rf) - rowsDroppedTotalMsgNotValid.Inc() - return + if *defaultMsgValue != "" && !hasMsgField(fields) { + // The log entry doesn't contain mandatory _msg field. Add _msg field with default value then + // according to https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field . + lmp.tmpFields = append(lmp.tmpFields[:0], fields...) + lmp.tmpFields = append(lmp.tmpFields, logstorage.Field{ + Name: "_msg", + Value: *defaultMsgValue, + }) + fields = lmp.tmpFields } lmp.lr.MustAdd(lmp.cp.TenantID, timestamp, fields) @@ -211,6 +214,15 @@ func (lmp *logMessageProcessor) AddRow(timestamp int64, fields []logstorage.Fiel } } +func hasMsgField(fields []logstorage.Field) bool { + for _, f := range fields { + if f.Name == "_msg" { + return len(f.Value) > 0 + } + } + return false +} + // flushLocked must be called under locked lmp.mu. func (lmp *logMessageProcessor) flushLocked() { lmp.lastFlushTime = time.Now() @@ -247,5 +259,4 @@ func (cp *CommonParams) NewLogMessageProcessor() LogMessageProcessor { var ( rowsDroppedTotalDebug = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`) rowsDroppedTotalTooManyFields = metrics.NewCounter(`vl_rows_dropped_total{reason="too_many_fields"}`) - rowsDroppedTotalMsgNotValid = metrics.NewCounter(`vl_rows_dropped_total{reason="msg_not_exist"}`) ) diff --git a/app/vlinsert/jsonline/jsonline_test.go b/app/vlinsert/jsonline/jsonline_test.go index 429a1e4f2..6e5fe7722 100644 --- a/app/vlinsert/jsonline/jsonline_test.go +++ b/app/vlinsert/jsonline/jsonline_test.go @@ -35,6 +35,18 @@ func TestProcessStreamInternal_Success(t *testing.T) { {"_msg":"baz"} {"_msg":"xyz","x":"y"}` f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected) + + // Non-existing msgField + data = `{"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"} +{"@timestamp":"2023-06-06T04:48:12.735+01:00","message":"baz"} +` + timeField = "@timestamp" + msgField = "foobar" + rowsExpected = 2 + timestampsExpected = []int64{1686026891735000000, 1686023292735000000} + resultExpected = `{"log.offset":"71770","log.file.path":"/var/log/auth.log","message":"foobar"} +{"message":"baz","aa":"bb"}` + f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected) } func TestProcessStreamInternal_Failure(t *testing.T) { diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 378108cc6..c94767038 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -16,6 +16,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip * FEATURE: allow specifying a list of log fields, which contain log message, via `_msg_field` query arg and via `VL-Msg-Field` HTTP request header. For example, `_msg_field=message,event.message` instructs obtaining [message field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) from the first non-empty field out of the `message` and `event.message` fields. See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for details. +* FEATURE: accept logs without [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field). In this case the `_msg` field is automatically set to the value specified in the `-defaultMsgValue` command-line flag. * BUGFIX: fix `runtime error: index out of range [0] with length 0` panic during low-rate data ingestion. The panic has been introduced in [v0.38.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.38.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7391). diff --git a/docs/VictoriaLogs/README.md b/docs/VictoriaLogs/README.md index a818b7b95..b37fc242c 100644 --- a/docs/VictoriaLogs/README.md +++ b/docs/VictoriaLogs/README.md @@ -260,8 +260,8 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line ``` -blockcache.missesBeforeCaching int The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2) - -cacheExpireDuration duration - Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s) + -defaultMsgValue string + Default value for _msg field if the ingested log entry doesn't contain it; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field (default "missing _msg field; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field") -elasticsearch.version string Elasticsearch version to report to client (default "8.9.0") -enableTCP6 @@ -275,6 +275,9 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line -flagsAuthKey value Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.* Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path + -forceMergeAuthKey value + authKey, which must be passed in query string to /internal/force_merge pages. It overrides -httpAuth.* + Flag value can be read from the given file when using -forceMergeAuthKey=file:///abs/path/to/file or -forceMergeAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -forceMergeAuthKey=http://host/path or -forceMergeAuthKey=https://host/path -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() -futureRetention value @@ -326,6 +329,20 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen -internStringMaxLen int The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500) + -journald.ignoreFields array + Journal fields to ignore. See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html. + Supports an array of values separated by comma or specified via multiple flags. + Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. + -journald.includeEntryMetadata + Include journal entry fields, which with double underscores. + -journald.streamFields array + Journal fields to be used as stream fields. See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html. + Supports an array of values separated by comma or specified via multiple flags. + Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. + -journald.tenantID string + TenantID for logs ingested via the Journald endpoint. (default "0:0") + -journald.timeField string + Journal field to be used as time field. See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html. (default "__REALTIME_TIMESTAMP") -logIngestedRows Whether to log all the ingested log entries; this can be useful for debugging of data ingestion; see https://docs.victoriametrics.com/victorialogs/data-ingestion/ ; see also -logNewStreams -logNewStreams @@ -341,7 +358,7 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line -loggerLevel string Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO") -loggerMaxArgLen int - The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 1000) + The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 5000) -loggerOutput string Output for the logs. Supported values: stderr, stdout (default "stderr") -loggerTimezone string @@ -361,10 +378,8 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides -httpAuth.* Flag value can be read from the given file when using -metricsAuthKey=file:///abs/path/to/file or -metricsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -metricsAuthKey=http://host/path or -metricsAuthKey=https://host/path -pprofAuthKey value - Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides -httpAuth.* + Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It -httpAuth.* Flag value can be read from the given file when using -pprofAuthKey=file:///abs/path/to/file or -pprofAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -pprofAuthKey=http://host/path or -pprofAuthKey=https://host/path - -prevCacheRemovalPercent float - Items in the previous caches are removed when the percent of requests it serves becomes lower than this value. Higher values reduce memory usage at the cost of higher CPU usage. See also -cacheExpireDuration (default 0.1) -pushmetrics.disableCompression Whether to disable request body compression when pushing metrics to every -pushmetrics.url -pushmetrics.extraLabel array diff --git a/docs/VictoriaLogs/keyConcepts.md b/docs/VictoriaLogs/keyConcepts.md index 8eee6fbbf..c89c6e22c 100644 --- a/docs/VictoriaLogs/keyConcepts.md +++ b/docs/VictoriaLogs/keyConcepts.md @@ -127,11 +127,14 @@ log entry, which can be ingested into VictoriaLogs: } ``` -If the actual log message has other than `_msg` field name, then it is possible to specify the real log message field -via `_msg_field` query arg or via `VL-Msg-Field` HTTP header during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/). +If the actual log message has other than `_msg` field name, then it can be specified via `_msg_field` HTTP query arg or via `VL-Msg-Field` HTTP header +during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) +according to [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters). For example, if log message is located in the `event.original` field, then specify `_msg_field=event.original` query arg during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/). -See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for more details. + +If the `_msg` field remains empty after an attempt to get it from `_msg_field`, then VictoriaLogs automatically sets it to the value specified +via `-defaultMsgValue` command-line flag. ### Time field