mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/logstorage: parse syslog structured data into separate fields in order to simplify further querying of this data
This commit is contained in:
parent
9e1c037249
commit
3eacd43fff
6 changed files with 37 additions and 7 deletions
|
@ -94,7 +94,7 @@ func TestProcessStreamInternal_Success(t *testing.T) {
|
|||
timestampsExpected := []int64{1685794113000000000, 1685880513000000000, 1685814132345000000}
|
||||
resultExpected := `{"format":"rfc3164","timestamp":"","hostname":"abcd","app_name":"systemd","_msg":"Starting Update the local ESM caches..."}
|
||||
{"priority":"165","facility":"20","severity":"5","format":"rfc3164","timestamp":"","hostname":"abcd","app_name":"systemd","proc_id":"345","_msg":"abc defg"}
|
||||
{"priority":"123","facility":"15","severity":"3","format":"rfc5424","timestamp":"","hostname":"mymachine.example.com","app_name":"appname","proc_id":"12345","msg_id":"ID47","exampleSDID@32473":"iut=\"3\" eventSource=\"Application 123 = ] 56\" eventID=\"11211\"","_msg":"This is a test message with structured data."}`
|
||||
{"priority":"123","facility":"15","severity":"3","format":"rfc5424","timestamp":"","hostname":"mymachine.example.com","app_name":"appname","proc_id":"12345","msg_id":"ID47","exampleSDID@32473.iut":"3","exampleSDID@32473.eventSource":"Application 123 = ] 56","exampleSDID@32473.eventID":"11211","_msg":"This is a test message with structured data."}`
|
||||
f(data, currentYear, rowsExpected, timestampsExpected, resultExpected)
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
|
|||
|
||||
## tip
|
||||
|
||||
* FEATURE: [syslog data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/): parse [STRUCTURED-DATA](https://datatracker.ietf.org/doc/html/rfc5424#section-6.3) into `SD-ID.field1=value1`, `SD-ID.field2=value2`, ..., `SD-ID.fieldN=valueN` [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Previously the `STRUCTURED-DATA` was parsed into a single log field with the `SD-ID` name and `field1=value1 field2=value2 ... fieldN=valueN` value. This could complicate querying of such data.
|
||||
|
||||
* BUGFIX: properly parse timestamps with timezones during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) and [querying](https://docs.victoriametrics.com/victorialogs/querying/). This has been broken in [v0.20.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.20.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6508).
|
||||
|
||||
## [v0.22.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.22.0-victorialogs)
|
||||
|
|
|
@ -2571,8 +2571,8 @@ The following fields are unpacked:
|
|||
|
||||
The `<PRI>` part is optional. If it is missing, then `priority`, `facility` and `severity` fields aren't set.
|
||||
|
||||
The `[STRUCTURED-DATA]` is parsed into fields with the `SD-ID` name and `param1="value1" ... paramN="valueN"` value
|
||||
according to [the specification](https://datatracker.ietf.org/doc/html/rfc5424#section-6.3). The value then can be parsed to separate fields with [`unpack_logfmt` pipe](#unpack_logfmt-pipe).
|
||||
The `[STRUCTURED-DATA]` is parsed into fields with the `SD-ID.param1`, `SD-ID.param2`, ..., `SD-ID.paramN` names and the corresponding values
|
||||
according to [the specification](https://datatracker.ietf.org/doc/html/rfc5424#section-6.3).
|
||||
|
||||
For example, the following query unpacks [syslog](https://en.wikipedia.org/wiki/Syslog) message from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
|
||||
across logs for the last 5 minutes:
|
||||
|
|
|
@ -50,6 +50,9 @@ from the received Syslog lines:
|
|||
- `format` - this field is set to either `rfc3164` or `rfc5424` depending on the format of the parsed syslog line
|
||||
- `msg_id` - `MSGID` field from log line in `RFC5424` format.
|
||||
|
||||
The `[STRUCTURED-DATA]` is parsed into fields with the `SD-ID.param1`, `SD-ID.param2`, ..., `SD-ID.paramN` names and the corresponding values
|
||||
according to [the specification](https://datatracker.ietf.org/doc/html/rfc5424#section-6.3).
|
||||
|
||||
By default local timezone is used when parsing timestamps in `rfc3164` lines. This can be changed to any desired timezone via `-syslog.timezone` command-line flag.
|
||||
See [the list of supported timezone identifiers](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). For example, the following command starts VictoriaLogs,
|
||||
which parses syslog timestamps in `rfc3164` using `Europe/Berlin` timezone:
|
||||
|
|
|
@ -52,10 +52,18 @@ type SyslogParser struct {
|
|||
// Fields contains parsed fields after Parse call.
|
||||
Fields []Field
|
||||
|
||||
// buf contains temporary data used in Fields.
|
||||
buf []byte
|
||||
|
||||
// sdParser is used for structured data parsing in rfc5424.
|
||||
// See https://datatracker.ietf.org/doc/html/rfc5424#section-6.3
|
||||
sdParser logfmtParser
|
||||
|
||||
// currentYear is used as the current year for rfc3164 messages.
|
||||
currentYear int
|
||||
timezone *time.Location
|
||||
|
||||
// timezeon is used as the current timezeon for rfc3164 messages.
|
||||
timezone *time.Location
|
||||
}
|
||||
|
||||
func (p *SyslogParser) reset() {
|
||||
|
@ -69,6 +77,7 @@ func (p *SyslogParser) resetFields() {
|
|||
p.Fields = p.Fields[:0]
|
||||
|
||||
p.buf = p.buf[:0]
|
||||
p.sdParser.reset()
|
||||
}
|
||||
|
||||
func (p *SyslogParser) addField(name, value string) {
|
||||
|
@ -259,7 +268,23 @@ func (p *SyslogParser) parseRFC5424SDLine(s string) (string, bool) {
|
|||
}
|
||||
|
||||
sdValue := strings.TrimSpace(s[:i])
|
||||
p.addField(sdID, sdValue)
|
||||
|
||||
p.sdParser.parse(sdValue)
|
||||
if len(p.sdParser.fields) == 0 {
|
||||
// Special case when structured data doesn't contain any fields
|
||||
p.addField(sdID, "")
|
||||
} else {
|
||||
for _, f := range p.sdParser.fields {
|
||||
bufLen := len(p.buf)
|
||||
p.buf = append(p.buf, sdID...)
|
||||
p.buf = append(p.buf, '.')
|
||||
p.buf = append(p.buf, f.Name...)
|
||||
|
||||
fieldName := bytesutil.ToUnsafeString(p.buf[bufLen:])
|
||||
p.addField(fieldName, f.Value)
|
||||
}
|
||||
}
|
||||
|
||||
s = s[i+1:]
|
||||
return s, true
|
||||
}
|
||||
|
|
|
@ -38,9 +38,9 @@ func TestSyslogParser(t *testing.T) {
|
|||
f(`1 2023-06-03T17:42:32.123456789Z mymachine.example.com appname 12345 ID47 - This is a test message with structured data.`, time.UTC,
|
||||
`{"format":"rfc5424","timestamp":"2023-06-03T17:42:32.123456789Z","hostname":"mymachine.example.com","app_name":"appname","proc_id":"12345","msg_id":"ID47","message":"This is a test message with structured data."}`)
|
||||
f(`<165>1 2023-06-03T17:42:00.000Z mymachine.example.com appname 12345 ID47 [exampleSDID@32473 iut="3" eventSource="Application 123 = ] 56" eventID="11211"] This is a test message with structured data.`, time.UTC,
|
||||
`{"priority":"165","facility":"20","severity":"5","format":"rfc5424","timestamp":"2023-06-03T17:42:00.000Z","hostname":"mymachine.example.com","app_name":"appname","proc_id":"12345","msg_id":"ID47","exampleSDID@32473":"iut=\"3\" eventSource=\"Application 123 = ] 56\" eventID=\"11211\"","message":"This is a test message with structured data."}`)
|
||||
`{"priority":"165","facility":"20","severity":"5","format":"rfc5424","timestamp":"2023-06-03T17:42:00.000Z","hostname":"mymachine.example.com","app_name":"appname","proc_id":"12345","msg_id":"ID47","exampleSDID@32473.iut":"3","exampleSDID@32473.eventSource":"Application 123 = ] 56","exampleSDID@32473.eventID":"11211","message":"This is a test message with structured data."}`)
|
||||
f(`<165>1 2023-06-03T17:42:00.000Z mymachine.example.com appname 12345 ID47 [foo@123 iut="3"][bar@456 eventID="11211"] This is a test message with structured data.`, time.UTC,
|
||||
`{"priority":"165","facility":"20","severity":"5","format":"rfc5424","timestamp":"2023-06-03T17:42:00.000Z","hostname":"mymachine.example.com","app_name":"appname","proc_id":"12345","msg_id":"ID47","foo@123":"iut=\"3\"","bar@456":"eventID=\"11211\"","message":"This is a test message with structured data."}`)
|
||||
`{"priority":"165","facility":"20","severity":"5","format":"rfc5424","timestamp":"2023-06-03T17:42:00.000Z","hostname":"mymachine.example.com","app_name":"appname","proc_id":"12345","msg_id":"ID47","foo@123.iut":"3","bar@456.eventID":"11211","message":"This is a test message with structured data."}`)
|
||||
|
||||
// Incomplete RFC 3164
|
||||
f("", time.UTC, `{}`)
|
||||
|
|
Loading…
Reference in a new issue