2024-06-03 22:59:25 +00:00
|
|
|
package logstorage
|
|
|
|
|
|
|
|
import (
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
|
|
|
)
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
// GetSyslogParser returns syslog parser from the pool.
|
|
|
|
//
|
|
|
|
// currentYear must contain the current year. It is used for properly setting timestamp
|
|
|
|
// field for rfc3164 format, which doesn't contain year.
|
|
|
|
//
|
|
|
|
// the timezone is used for rfc3164 format for setting the desired timezone.
|
|
|
|
//
|
|
|
|
// Return back the parser to the pool by calling PutSyslogParser when it is no longer needed.
|
|
|
|
func GetSyslogParser(currentYear int, timezone *time.Location) *SyslogParser {
|
2024-06-03 22:59:25 +00:00
|
|
|
v := syslogParserPool.Get()
|
|
|
|
if v == nil {
|
2024-06-17 10:13:18 +00:00
|
|
|
v = &SyslogParser{}
|
2024-06-03 22:59:25 +00:00
|
|
|
}
|
2024-06-17 10:13:18 +00:00
|
|
|
p := v.(*SyslogParser)
|
2024-06-03 22:59:25 +00:00
|
|
|
p.currentYear = currentYear
|
2024-06-17 10:13:18 +00:00
|
|
|
p.timezone = timezone
|
2024-06-03 22:59:25 +00:00
|
|
|
return p
|
|
|
|
}
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
// PutSyslogParser returns back syslog parser to the pool.
|
|
|
|
//
|
|
|
|
// p cannot be used after returning to the pool.
|
|
|
|
func PutSyslogParser(p *SyslogParser) {
|
2024-06-03 22:59:25 +00:00
|
|
|
p.reset()
|
|
|
|
syslogParserPool.Put(p)
|
|
|
|
}
|
|
|
|
|
|
|
|
var syslogParserPool sync.Pool
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
// SyslogParser is parser for syslog messages.
|
|
|
|
//
|
|
|
|
// It understands the following syslog formats:
|
|
|
|
//
|
|
|
|
// - https://datatracker.ietf.org/doc/html/rfc5424
|
|
|
|
// - https://datatracker.ietf.org/doc/html/rfc3164
|
|
|
|
//
|
|
|
|
// It extracts the following list of syslog message fields into Fields -
|
|
|
|
// https://docs.victoriametrics.com/victorialogs/logsql/#unpack_syslog-pipe
|
|
|
|
type SyslogParser struct {
|
|
|
|
// Fields contains parsed fields after Parse call.
|
|
|
|
Fields []Field
|
|
|
|
|
2024-06-25 12:52:43 +00:00
|
|
|
// buf contains temporary data used in Fields.
|
2024-06-17 10:13:18 +00:00
|
|
|
buf []byte
|
2024-06-03 22:59:25 +00:00
|
|
|
|
2024-06-25 12:52:43 +00:00
|
|
|
// sdParser is used for structured data parsing in rfc5424.
|
|
|
|
// See https://datatracker.ietf.org/doc/html/rfc5424#section-6.3
|
|
|
|
sdParser logfmtParser
|
|
|
|
|
|
|
|
// currentYear is used as the current year for rfc3164 messages.
|
2024-06-17 10:13:18 +00:00
|
|
|
currentYear int
|
2024-06-25 12:52:43 +00:00
|
|
|
|
|
|
|
// timezeon is used as the current timezeon for rfc3164 messages.
|
|
|
|
timezone *time.Location
|
2024-06-03 22:59:25 +00:00
|
|
|
}
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
func (p *SyslogParser) reset() {
|
2024-06-03 22:59:25 +00:00
|
|
|
p.currentYear = 0
|
2024-06-17 10:13:18 +00:00
|
|
|
p.timezone = nil
|
2024-06-03 22:59:25 +00:00
|
|
|
p.resetFields()
|
|
|
|
}
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
func (p *SyslogParser) resetFields() {
|
|
|
|
clear(p.Fields)
|
|
|
|
p.Fields = p.Fields[:0]
|
2024-06-03 22:59:25 +00:00
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
p.buf = p.buf[:0]
|
2024-06-25 12:52:43 +00:00
|
|
|
p.sdParser.reset()
|
2024-06-03 22:59:25 +00:00
|
|
|
}
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
func (p *SyslogParser) addField(name, value string) {
|
|
|
|
p.Fields = append(p.Fields, Field{
|
2024-06-03 22:59:25 +00:00
|
|
|
Name: name,
|
|
|
|
Value: value,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
// Parse parses syslog message from s into p.Fields.
|
|
|
|
//
|
|
|
|
// p.Fields is valid until s is modified or p state is changed.
|
|
|
|
func (p *SyslogParser) Parse(s string) {
|
2024-06-03 22:59:25 +00:00
|
|
|
p.resetFields()
|
|
|
|
|
|
|
|
if len(s) == 0 {
|
|
|
|
// Cannot parse syslog message
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if s[0] != '<' {
|
|
|
|
p.parseNoHeader(s)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// parse priority
|
|
|
|
s = s[1:]
|
|
|
|
n := strings.IndexByte(s, '>')
|
|
|
|
if n < 0 {
|
|
|
|
// Cannot parse priority
|
|
|
|
return
|
|
|
|
}
|
|
|
|
priorityStr := s[:n]
|
|
|
|
s = s[n+1:]
|
|
|
|
|
|
|
|
p.addField("priority", priorityStr)
|
|
|
|
priority, ok := tryParseUint64(priorityStr)
|
|
|
|
if !ok {
|
|
|
|
// Cannot parse priority
|
|
|
|
return
|
|
|
|
}
|
|
|
|
facility := priority / 8
|
|
|
|
severity := priority % 8
|
|
|
|
|
|
|
|
bufLen := len(p.buf)
|
|
|
|
p.buf = marshalUint64String(p.buf, facility)
|
|
|
|
p.addField("facility", bytesutil.ToUnsafeString(p.buf[bufLen:]))
|
|
|
|
|
|
|
|
bufLen = len(p.buf)
|
|
|
|
p.buf = marshalUint64String(p.buf, severity)
|
|
|
|
p.addField("severity", bytesutil.ToUnsafeString(p.buf[bufLen:]))
|
|
|
|
|
|
|
|
p.parseNoHeader(s)
|
|
|
|
}
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
func (p *SyslogParser) parseNoHeader(s string) {
|
2024-06-03 22:59:25 +00:00
|
|
|
if len(s) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if strings.HasPrefix(s, "1 ") {
|
|
|
|
p.parseRFC5424(s[2:])
|
|
|
|
} else {
|
|
|
|
p.parseRFC3164(s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
func (p *SyslogParser) parseRFC5424(s string) {
|
2024-06-03 22:59:25 +00:00
|
|
|
// See https://datatracker.ietf.org/doc/html/rfc5424
|
|
|
|
|
2024-06-05 01:18:12 +00:00
|
|
|
p.addField("format", "rfc5424")
|
|
|
|
|
2024-06-03 22:59:25 +00:00
|
|
|
if len(s) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse timestamp
|
|
|
|
n := strings.IndexByte(s, ' ')
|
|
|
|
if n < 0 {
|
|
|
|
p.addField("timestamp", s)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
p.addField("timestamp", s[:n])
|
|
|
|
s = s[n+1:]
|
|
|
|
|
|
|
|
// Parse hostname
|
|
|
|
n = strings.IndexByte(s, ' ')
|
|
|
|
if n < 0 {
|
|
|
|
p.addField("hostname", s)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
p.addField("hostname", s[:n])
|
|
|
|
s = s[n+1:]
|
|
|
|
|
|
|
|
// Parse app-name
|
|
|
|
n = strings.IndexByte(s, ' ')
|
|
|
|
if n < 0 {
|
|
|
|
p.addField("app_name", s)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
p.addField("app_name", s[:n])
|
|
|
|
s = s[n+1:]
|
|
|
|
|
|
|
|
// Parse procid
|
|
|
|
n = strings.IndexByte(s, ' ')
|
|
|
|
if n < 0 {
|
|
|
|
p.addField("proc_id", s)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
p.addField("proc_id", s[:n])
|
|
|
|
s = s[n+1:]
|
|
|
|
|
|
|
|
// Parse msgID
|
|
|
|
n = strings.IndexByte(s, ' ')
|
|
|
|
if n < 0 {
|
|
|
|
p.addField("msg_id", s)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
p.addField("msg_id", s[:n])
|
|
|
|
s = s[n+1:]
|
|
|
|
|
|
|
|
// Parse structured data
|
|
|
|
tail, ok := p.parseRFC5424SD(s)
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
s = tail
|
|
|
|
|
|
|
|
// Parse message
|
|
|
|
p.addField("message", s)
|
|
|
|
}
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
func (p *SyslogParser) parseRFC5424SD(s string) (string, bool) {
|
2024-06-03 22:59:25 +00:00
|
|
|
if strings.HasPrefix(s, "- ") {
|
|
|
|
return s[2:], true
|
|
|
|
}
|
|
|
|
|
|
|
|
for {
|
|
|
|
tail, ok := p.parseRFC5424SDLine(s)
|
|
|
|
if !ok {
|
|
|
|
return tail, false
|
|
|
|
}
|
|
|
|
s = tail
|
|
|
|
if strings.HasPrefix(s, " ") {
|
|
|
|
s = s[1:]
|
|
|
|
return s, true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
func (p *SyslogParser) parseRFC5424SDLine(s string) (string, bool) {
|
2024-06-03 22:59:25 +00:00
|
|
|
if len(s) == 0 || s[0] != '[' {
|
|
|
|
return s, false
|
|
|
|
}
|
|
|
|
s = s[1:]
|
|
|
|
|
|
|
|
n := strings.IndexAny(s, " ]")
|
|
|
|
if n < 0 {
|
|
|
|
return s, false
|
|
|
|
}
|
|
|
|
sdID := s[:n]
|
|
|
|
s = s[n:]
|
|
|
|
|
|
|
|
// Parse structured data
|
|
|
|
i := 0
|
|
|
|
for i < len(s) && s[i] != ']' {
|
|
|
|
// skip whitespace
|
|
|
|
if s[i] != ' ' {
|
|
|
|
return s, false
|
|
|
|
}
|
|
|
|
i++
|
|
|
|
|
|
|
|
// Parse name
|
|
|
|
n := strings.IndexByte(s[i:], '=')
|
|
|
|
if n < 0 {
|
|
|
|
return s, false
|
|
|
|
}
|
|
|
|
i += n + 1
|
|
|
|
|
|
|
|
// Parse value
|
|
|
|
qp, err := strconv.QuotedPrefix(s[i:])
|
|
|
|
if err != nil {
|
|
|
|
return s, false
|
|
|
|
}
|
|
|
|
i += len(qp)
|
|
|
|
}
|
|
|
|
if i == len(s) {
|
|
|
|
return s, false
|
|
|
|
}
|
|
|
|
|
|
|
|
sdValue := strings.TrimSpace(s[:i])
|
2024-06-25 12:52:43 +00:00
|
|
|
|
|
|
|
p.sdParser.parse(sdValue)
|
|
|
|
if len(p.sdParser.fields) == 0 {
|
|
|
|
// Special case when structured data doesn't contain any fields
|
|
|
|
p.addField(sdID, "")
|
|
|
|
} else {
|
|
|
|
for _, f := range p.sdParser.fields {
|
|
|
|
bufLen := len(p.buf)
|
|
|
|
p.buf = append(p.buf, sdID...)
|
|
|
|
p.buf = append(p.buf, '.')
|
|
|
|
p.buf = append(p.buf, f.Name...)
|
|
|
|
|
|
|
|
fieldName := bytesutil.ToUnsafeString(p.buf[bufLen:])
|
|
|
|
p.addField(fieldName, f.Value)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-03 22:59:25 +00:00
|
|
|
s = s[i+1:]
|
|
|
|
return s, true
|
|
|
|
}
|
|
|
|
|
2024-06-17 10:13:18 +00:00
|
|
|
func (p *SyslogParser) parseRFC3164(s string) {
|
2024-06-03 22:59:25 +00:00
|
|
|
// See https://datatracker.ietf.org/doc/html/rfc3164
|
|
|
|
|
2024-06-10 16:42:19 +00:00
|
|
|
p.addField("format", "rfc3164")
|
|
|
|
|
2024-06-03 22:59:25 +00:00
|
|
|
// Parse timestamp
|
|
|
|
n := len(time.Stamp)
|
|
|
|
if len(s) < n {
|
2024-06-10 16:42:19 +00:00
|
|
|
p.addField("message", s)
|
2024-06-03 22:59:25 +00:00
|
|
|
return
|
|
|
|
}
|
2024-06-05 01:18:12 +00:00
|
|
|
|
2024-06-03 22:59:25 +00:00
|
|
|
t, err := time.Parse(time.Stamp, s[:n])
|
|
|
|
if err != nil {
|
|
|
|
// TODO: fall back to parsing ISO8601 timestamp?
|
2024-06-10 16:42:19 +00:00
|
|
|
p.addField("message", s)
|
2024-06-03 22:59:25 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
s = s[n:]
|
|
|
|
|
|
|
|
t = t.UTC()
|
2024-06-17 10:13:18 +00:00
|
|
|
t = time.Date(p.currentYear, t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), p.timezone)
|
2024-06-03 22:59:25 +00:00
|
|
|
if uint64(t.Unix())-24*3600 > fasttime.UnixTimestamp() {
|
|
|
|
// Adjust time to the previous year
|
2024-06-17 10:13:18 +00:00
|
|
|
t = time.Date(t.Year()-1, t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), p.timezone)
|
2024-06-03 22:59:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bufLen := len(p.buf)
|
|
|
|
p.buf = marshalTimestampISO8601String(p.buf, t.UnixNano())
|
|
|
|
p.addField("timestamp", bytesutil.ToUnsafeString(p.buf[bufLen:]))
|
|
|
|
|
|
|
|
if len(s) == 0 || s[0] != ' ' {
|
|
|
|
// Missing space after the time field
|
2024-06-10 16:42:19 +00:00
|
|
|
if len(s) > 0 {
|
|
|
|
p.addField("message", s)
|
|
|
|
}
|
2024-06-03 22:59:25 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
s = s[1:]
|
|
|
|
|
|
|
|
// Parse hostname
|
|
|
|
n = strings.IndexByte(s, ' ')
|
|
|
|
if n < 0 {
|
|
|
|
p.addField("hostname", s)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
p.addField("hostname", s[:n])
|
|
|
|
s = s[n+1:]
|
|
|
|
|
|
|
|
// Parse tag (aka app_name)
|
|
|
|
n = strings.IndexAny(s, "[: ")
|
|
|
|
if n < 0 {
|
|
|
|
p.addField("app_name", s)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
p.addField("app_name", s[:n])
|
|
|
|
s = s[n:]
|
|
|
|
|
|
|
|
// Parse proc_id
|
|
|
|
if len(s) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if s[0] == '[' {
|
|
|
|
s = s[1:]
|
|
|
|
n = strings.IndexByte(s, ']')
|
|
|
|
if n < 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
p.addField("proc_id", s[:n])
|
|
|
|
s = s[n+1:]
|
|
|
|
}
|
|
|
|
|
|
|
|
// Skip optional ': ' in front of message
|
|
|
|
s = strings.TrimPrefix(s, ":")
|
|
|
|
s = strings.TrimPrefix(s, " ")
|
|
|
|
|
|
|
|
if len(s) > 0 {
|
|
|
|
p.addField("message", s)
|
|
|
|
}
|
|
|
|
}
|