lib/logstorage: support for [label1=value1 ... labelN=valueN] syntax inside syslog messages for adding arbitrary labels (fields) to log entries

This commit is contained in:
Aliaksandr Valialkin 2024-11-08 19:57:22 +01:00
parent f55791f20b
commit 63c76b9b27
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
3 changed files with 29 additions and 7 deletions

View file

@ -19,6 +19,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
* FEATURE: [`_time` filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter): allow specifying offset without time range. For example, `_time:offset 1d` matches all the logs until `now-1d` in the [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field). This is useful when building graphs for time ranges with some offset in the past.
* FEATURE: [`/select/logsql/tail` HTTP endpoint](): support for `offset` query arg, which can be used for delayed emission of matching logs during live tailing. Thanks to @Fusl for the initial idea and implementation in [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7428).
* FEATURE: [vlogscli](https://docs.victoriametrics.com/victorialogs/querying/vlogscli/): allow enabling and disabling wrapping of long lines, which do not fit screen width, with `\wrap_long_lines` command.
* FEATURE: [syslog data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/): allow adding arbitrary [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) via `[label1=value1 ... labelN=valueN]` syntax inside Syslog messages. For example, `<165>1 2024-06-03T17:42:00.000Z example.com appname 12345 ID47 [field1=value1 field2=value2] some message`.
* BUGFIX: [HTTP querying APIs](https://docs.victoriametrics.com/victorialogs/querying/#http-api): properly take into account the `end` query arg when calculating time range for [`_time:duration` filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter). Previously the `_time:duration` filter was treated as `_time:[now-duration, now)`, while it should be treated as `_time:[end-duration, end)`.

View file

@ -240,6 +240,12 @@ func (p *SyslogParser) parseRFC5424SDLine(s string) (string, bool) {
sdID := s[:n]
s = s[n:]
if n := strings.IndexByte(sdID, '='); n >= 0 {
// Special case when sdID contains `key=value`
p.addField(sdID[:n], sdID[n+1:])
sdID = ""
}
// Parse structured data
i := 0
for i < len(s) && s[i] != ']' {
@ -257,11 +263,19 @@ func (p *SyslogParser) parseRFC5424SDLine(s string) (string, bool) {
i += n + 1
// Parse value
if strings.HasPrefix(s[i:], `"`) {
qp, err := strconv.QuotedPrefix(s[i:])
if err != nil {
return s, false
}
i += len(qp)
} else {
n := strings.IndexAny(s[i:], " ]")
if n < 0 {
return s, false
}
i += n
}
}
if i == len(s) {
return s, false
@ -272,9 +286,16 @@ func (p *SyslogParser) parseRFC5424SDLine(s string) (string, bool) {
p.sdParser.parse(sdValue)
if len(p.sdParser.fields) == 0 {
// Special case when structured data doesn't contain any fields
if sdID != "" {
p.addField(sdID, "")
}
} else {
for _, f := range p.sdParser.fields {
if sdID == "" {
p.addField(f.Name, f.Value)
continue
}
bufLen := len(p.buf)
p.buf = append(p.buf, sdID...)
p.buf = append(p.buf, '.')

View file

@ -39,8 +39,8 @@ func TestSyslogParser(t *testing.T) {
`format=rfc5424 timestamp=2023-06-03T17:42:32.123456789Z hostname=mymachine.example.com app_name=appname proc_id=12345 msg_id=ID47 message="This is a test message with structured data."`)
f(`<165>1 2023-06-03T17:42:00.000Z mymachine.example.com appname 12345 ID47 [exampleSDID@32473 iut="3" eventSource="Application 123 = ] 56" eventID="11211"] This is a test message with structured data.`, time.UTC,
`priority=165 facility=20 severity=5 format=rfc5424 timestamp=2023-06-03T17:42:00.000Z hostname=mymachine.example.com app_name=appname proc_id=12345 msg_id=ID47 exampleSDID@32473.iut=3 exampleSDID@32473.eventSource="Application 123 = ] 56" exampleSDID@32473.eventID=11211 message="This is a test message with structured data."`)
f(`<165>1 2023-06-03T17:42:00.000Z mymachine.example.com appname 12345 ID47 [foo@123 iut="3"][bar@456 eventID="11211"] This is a test message with structured data.`, time.UTC,
`priority=165 facility=20 severity=5 format=rfc5424 timestamp=2023-06-03T17:42:00.000Z hostname=mymachine.example.com app_name=appname proc_id=12345 msg_id=ID47 foo@123.iut=3 bar@456.eventID=11211 message="This is a test message with structured data."`)
f(`<165>1 2023-06-03T17:42:00.000Z mymachine.example.com appname 12345 ID47 [foo@123 iut="3"][bar@456 eventID="11211"][abc=def][x=y z=a q="]= "] This is a test message with structured data.`, time.UTC,
`priority=165 facility=20 severity=5 format=rfc5424 timestamp=2023-06-03T17:42:00.000Z hostname=mymachine.example.com app_name=appname proc_id=12345 msg_id=ID47 foo@123.iut=3 bar@456.eventID=11211 abc=def x=y z=a q="]= " message="This is a test message with structured data."`)
// Incomplete RFC 3164
f("", time.UTC, ``)