mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-01 14:47:38 +00:00
037652d5ae
Use local timezone of the host server in this case. The timezone can be overridden with TZ environment variable if needed. While at it, allow using whitespace instead of T as a delimiter between data and time in the ingested _time field. For example, '2024-09-20 10:20:30' is now accepted during data ingestion. This is valid ISO8601 format, which is used by some log shippers, so it should be supported. This format is also known as SQL datetime format. Also assume local time zone when time without timezone information is passed to querying APIs. Previously such a time was parsed in UTC timezone. Add `Z` to the end of the time string if the old behaviour is preferred. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6721
371 lines
7.2 KiB
Go
371 lines
7.2 KiB
Go
package logstorage
|
|
|
|
import (
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
|
)
|
|
|
|
// GetSyslogParser returns syslog parser from the pool.
|
|
//
|
|
// currentYear must contain the current year. It is used for properly setting timestamp
|
|
// field for rfc3164 format, which doesn't contain year.
|
|
//
|
|
// the timezone is used for rfc3164 format for setting the desired timezone.
|
|
//
|
|
// Return back the parser to the pool by calling PutSyslogParser when it is no longer needed.
|
|
func GetSyslogParser(currentYear int, timezone *time.Location) *SyslogParser {
|
|
v := syslogParserPool.Get()
|
|
if v == nil {
|
|
v = &SyslogParser{}
|
|
}
|
|
p := v.(*SyslogParser)
|
|
p.currentYear = currentYear
|
|
p.timezone = timezone
|
|
return p
|
|
}
|
|
|
|
// PutSyslogParser returns back syslog parser to the pool.
|
|
//
|
|
// p cannot be used after returning to the pool.
|
|
func PutSyslogParser(p *SyslogParser) {
|
|
p.reset()
|
|
syslogParserPool.Put(p)
|
|
}
|
|
|
|
var syslogParserPool sync.Pool
|
|
|
|
// SyslogParser is parser for syslog messages.
|
|
//
|
|
// It understands the following syslog formats:
|
|
//
|
|
// - https://datatracker.ietf.org/doc/html/rfc5424
|
|
// - https://datatracker.ietf.org/doc/html/rfc3164
|
|
//
|
|
// It extracts the following list of syslog message fields into Fields -
|
|
// https://docs.victoriametrics.com/victorialogs/logsql/#unpack_syslog-pipe
|
|
type SyslogParser struct {
|
|
// Fields contains parsed fields after Parse call.
|
|
Fields []Field
|
|
|
|
// buf contains temporary data used in Fields.
|
|
buf []byte
|
|
|
|
// sdParser is used for structured data parsing in rfc5424.
|
|
// See https://datatracker.ietf.org/doc/html/rfc5424#section-6.3
|
|
sdParser logfmtParser
|
|
|
|
// currentYear is used as the current year for rfc3164 messages.
|
|
currentYear int
|
|
|
|
// timezone is used as the current timezone for rfc3164 messages.
|
|
timezone *time.Location
|
|
}
|
|
|
|
func (p *SyslogParser) reset() {
|
|
p.currentYear = 0
|
|
p.timezone = nil
|
|
p.resetFields()
|
|
}
|
|
|
|
func (p *SyslogParser) resetFields() {
|
|
clear(p.Fields)
|
|
p.Fields = p.Fields[:0]
|
|
|
|
p.buf = p.buf[:0]
|
|
p.sdParser.reset()
|
|
}
|
|
|
|
func (p *SyslogParser) addField(name, value string) {
|
|
p.Fields = append(p.Fields, Field{
|
|
Name: name,
|
|
Value: value,
|
|
})
|
|
}
|
|
|
|
// Parse parses syslog message from s into p.Fields.
|
|
//
|
|
// p.Fields is valid until s is modified or p state is changed.
|
|
func (p *SyslogParser) Parse(s string) {
|
|
p.resetFields()
|
|
|
|
if len(s) == 0 {
|
|
// Cannot parse syslog message
|
|
return
|
|
}
|
|
|
|
if s[0] != '<' {
|
|
p.parseNoHeader(s)
|
|
return
|
|
}
|
|
|
|
// parse priority
|
|
s = s[1:]
|
|
n := strings.IndexByte(s, '>')
|
|
if n < 0 {
|
|
// Cannot parse priority
|
|
return
|
|
}
|
|
priorityStr := s[:n]
|
|
s = s[n+1:]
|
|
|
|
p.addField("priority", priorityStr)
|
|
priority, ok := tryParseUint64(priorityStr)
|
|
if !ok {
|
|
// Cannot parse priority
|
|
return
|
|
}
|
|
facility := priority / 8
|
|
severity := priority % 8
|
|
|
|
bufLen := len(p.buf)
|
|
p.buf = marshalUint64String(p.buf, facility)
|
|
p.addField("facility", bytesutil.ToUnsafeString(p.buf[bufLen:]))
|
|
|
|
bufLen = len(p.buf)
|
|
p.buf = marshalUint64String(p.buf, severity)
|
|
p.addField("severity", bytesutil.ToUnsafeString(p.buf[bufLen:]))
|
|
|
|
p.parseNoHeader(s)
|
|
}
|
|
|
|
func (p *SyslogParser) parseNoHeader(s string) {
|
|
if len(s) == 0 {
|
|
return
|
|
}
|
|
if strings.HasPrefix(s, "1 ") {
|
|
p.parseRFC5424(s[2:])
|
|
} else {
|
|
p.parseRFC3164(s)
|
|
}
|
|
}
|
|
|
|
func (p *SyslogParser) parseRFC5424(s string) {
|
|
// See https://datatracker.ietf.org/doc/html/rfc5424
|
|
|
|
p.addField("format", "rfc5424")
|
|
|
|
if len(s) == 0 {
|
|
return
|
|
}
|
|
|
|
// Parse timestamp
|
|
n := strings.IndexByte(s, ' ')
|
|
if n < 0 {
|
|
p.addField("timestamp", s)
|
|
return
|
|
}
|
|
p.addField("timestamp", s[:n])
|
|
s = s[n+1:]
|
|
|
|
// Parse hostname
|
|
n = strings.IndexByte(s, ' ')
|
|
if n < 0 {
|
|
p.addField("hostname", s)
|
|
return
|
|
}
|
|
p.addField("hostname", s[:n])
|
|
s = s[n+1:]
|
|
|
|
// Parse app-name
|
|
n = strings.IndexByte(s, ' ')
|
|
if n < 0 {
|
|
p.addField("app_name", s)
|
|
return
|
|
}
|
|
p.addField("app_name", s[:n])
|
|
s = s[n+1:]
|
|
|
|
// Parse procid
|
|
n = strings.IndexByte(s, ' ')
|
|
if n < 0 {
|
|
p.addField("proc_id", s)
|
|
return
|
|
}
|
|
p.addField("proc_id", s[:n])
|
|
s = s[n+1:]
|
|
|
|
// Parse msgID
|
|
n = strings.IndexByte(s, ' ')
|
|
if n < 0 {
|
|
p.addField("msg_id", s)
|
|
return
|
|
}
|
|
p.addField("msg_id", s[:n])
|
|
s = s[n+1:]
|
|
|
|
// Parse structured data
|
|
tail, ok := p.parseRFC5424SD(s)
|
|
if !ok {
|
|
return
|
|
}
|
|
s = tail
|
|
|
|
// Parse message
|
|
p.addField("message", s)
|
|
}
|
|
|
|
func (p *SyslogParser) parseRFC5424SD(s string) (string, bool) {
|
|
if strings.HasPrefix(s, "- ") {
|
|
return s[2:], true
|
|
}
|
|
|
|
for {
|
|
tail, ok := p.parseRFC5424SDLine(s)
|
|
if !ok {
|
|
return tail, false
|
|
}
|
|
s = tail
|
|
if strings.HasPrefix(s, " ") {
|
|
s = s[1:]
|
|
return s, true
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *SyslogParser) parseRFC5424SDLine(s string) (string, bool) {
|
|
if len(s) == 0 || s[0] != '[' {
|
|
return s, false
|
|
}
|
|
s = s[1:]
|
|
|
|
n := strings.IndexAny(s, " ]")
|
|
if n < 0 {
|
|
return s, false
|
|
}
|
|
sdID := s[:n]
|
|
s = s[n:]
|
|
|
|
// Parse structured data
|
|
i := 0
|
|
for i < len(s) && s[i] != ']' {
|
|
// skip whitespace
|
|
if s[i] != ' ' {
|
|
return s, false
|
|
}
|
|
i++
|
|
|
|
// Parse name
|
|
n := strings.IndexByte(s[i:], '=')
|
|
if n < 0 {
|
|
return s, false
|
|
}
|
|
i += n + 1
|
|
|
|
// Parse value
|
|
qp, err := strconv.QuotedPrefix(s[i:])
|
|
if err != nil {
|
|
return s, false
|
|
}
|
|
i += len(qp)
|
|
}
|
|
if i == len(s) {
|
|
return s, false
|
|
}
|
|
|
|
sdValue := strings.TrimSpace(s[:i])
|
|
|
|
p.sdParser.parse(sdValue)
|
|
if len(p.sdParser.fields) == 0 {
|
|
// Special case when structured data doesn't contain any fields
|
|
p.addField(sdID, "")
|
|
} else {
|
|
for _, f := range p.sdParser.fields {
|
|
bufLen := len(p.buf)
|
|
p.buf = append(p.buf, sdID...)
|
|
p.buf = append(p.buf, '.')
|
|
p.buf = append(p.buf, f.Name...)
|
|
|
|
fieldName := bytesutil.ToUnsafeString(p.buf[bufLen:])
|
|
p.addField(fieldName, f.Value)
|
|
}
|
|
}
|
|
|
|
s = s[i+1:]
|
|
return s, true
|
|
}
|
|
|
|
func (p *SyslogParser) parseRFC3164(s string) {
|
|
// See https://datatracker.ietf.org/doc/html/rfc3164
|
|
|
|
p.addField("format", "rfc3164")
|
|
|
|
// Parse timestamp
|
|
n := len(time.Stamp)
|
|
if len(s) < n {
|
|
p.addField("message", s)
|
|
return
|
|
}
|
|
|
|
t, err := time.Parse(time.Stamp, s[:n])
|
|
if err != nil {
|
|
// TODO: fall back to parsing ISO8601 timestamp?
|
|
p.addField("message", s)
|
|
return
|
|
}
|
|
s = s[n:]
|
|
|
|
t = t.UTC()
|
|
t = time.Date(p.currentYear, t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), p.timezone)
|
|
if uint64(t.Unix())-24*3600 > fasttime.UnixTimestamp() {
|
|
// Adjust time to the previous year
|
|
t = time.Date(t.Year()-1, t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), p.timezone)
|
|
}
|
|
|
|
bufLen := len(p.buf)
|
|
p.buf = marshalTimestampISO8601String(p.buf, t.UnixNano())
|
|
p.addField("timestamp", bytesutil.ToUnsafeString(p.buf[bufLen:]))
|
|
|
|
if len(s) == 0 || s[0] != ' ' {
|
|
// Missing space after the time field
|
|
if len(s) > 0 {
|
|
p.addField("message", s)
|
|
}
|
|
return
|
|
}
|
|
s = s[1:]
|
|
|
|
// Parse hostname
|
|
n = strings.IndexByte(s, ' ')
|
|
if n < 0 {
|
|
p.addField("hostname", s)
|
|
return
|
|
}
|
|
p.addField("hostname", s[:n])
|
|
s = s[n+1:]
|
|
|
|
// Parse tag (aka app_name)
|
|
n = strings.IndexAny(s, "[: ")
|
|
if n < 0 {
|
|
p.addField("app_name", s)
|
|
return
|
|
}
|
|
p.addField("app_name", s[:n])
|
|
s = s[n:]
|
|
|
|
// Parse proc_id
|
|
if len(s) == 0 {
|
|
return
|
|
}
|
|
if s[0] == '[' {
|
|
s = s[1:]
|
|
n = strings.IndexByte(s, ']')
|
|
if n < 0 {
|
|
return
|
|
}
|
|
p.addField("proc_id", s[:n])
|
|
s = s[n+1:]
|
|
}
|
|
|
|
// Skip optional ': ' in front of message
|
|
s = strings.TrimPrefix(s, ":")
|
|
s = strings.TrimPrefix(s, " ")
|
|
|
|
if len(s) > 0 {
|
|
p.addField("message", s)
|
|
}
|
|
}
|