VictoriaMetrics/lib/protoparser/graphite/parser.go
Andrii Chubatiuk 476faf5578
lib/protoparser/graphite: added -graphite.sanitizeMetricName flag (#6489)
### Describe Your Changes

Added flag to sanitize graphite metrics
fixes #6077

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/contributing/).

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
2024-07-02 14:56:41 +02:00

271 lines
6.2 KiB
Go

package graphite
import (
"flag"
"fmt"
"regexp"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fastjson/fastfloat"
)
var (
sanitizeMetricName = flag.Bool("graphite.sanitizeMetricName", false, "Sanitize metric names for the ingested Graphite data. "+
"See https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd")
)
// graphite text line protocol may use white space or tab as separator
// See https://github.com/grobian/carbon-c-relay/commit/f3ffe6cc2b52b07d14acbda649ad3fd6babdd528
const graphiteSeparators = " \t"
// Rows contains parsed graphite rows.
type Rows struct {
Rows []Row
tagsPool []Tag
}
// Reset resets rs.
func (rs *Rows) Reset() {
// Reset items, so they can be GC'ed
for i := range rs.Rows {
rs.Rows[i].reset()
}
rs.Rows = rs.Rows[:0]
for i := range rs.tagsPool {
rs.tagsPool[i].reset()
}
rs.tagsPool = rs.tagsPool[:0]
}
// Unmarshal unmarshals grahite plaintext protocol rows from s.
//
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
//
// s shouldn't be modified when rs is in use.
func (rs *Rows) Unmarshal(s string) {
rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
}
// Row is a single graphite row.
type Row struct {
Metric string
Tags []Tag
Value float64
Timestamp int64
}
func (r *Row) reset() {
r.Metric = ""
r.Tags = nil
r.Value = 0
r.Timestamp = 0
}
// UnmarshalMetricAndTags unmarshals metric and optional tags from s.
func (r *Row) UnmarshalMetricAndTags(s string, tagsPool []Tag) ([]Tag, error) {
n := strings.IndexByte(s, ';')
if n < 0 {
// No tags
r.Metric = s
} else {
// Tags found
r.Metric = s[:n]
tagsStart := len(tagsPool)
tagsPool = unmarshalTags(tagsPool, s[n+1:])
tags := tagsPool[tagsStart:]
r.Tags = tags[:len(tags):len(tags)]
}
if len(r.Metric) == 0 {
return tagsPool, fmt.Errorf("metric cannot be empty")
}
if *sanitizeMetricName {
r.Metric = sanitizer.Transform(r.Metric)
}
return tagsPool, nil
}
func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
r.reset()
sOrig := s
s = stripTrailingWhitespace(s)
n := strings.LastIndexAny(s, graphiteSeparators)
if n < 0 {
return tagsPool, fmt.Errorf("cannot find separator between value and timestamp in %q", s)
}
timestampStr := s[n+1:]
valueStr := ""
metricAndTags := ""
s = stripTrailingWhitespace(s[:n])
n = strings.LastIndexAny(s, graphiteSeparators)
if n < 0 {
// Missing timestamp
metricAndTags = stripLeadingWhitespace(s)
valueStr = timestampStr
timestampStr = ""
} else {
metricAndTags = stripLeadingWhitespace(s[:n])
valueStr = s[n+1:]
}
metricAndTags = stripTrailingWhitespace(metricAndTags)
tagsPool, err := r.UnmarshalMetricAndTags(metricAndTags, tagsPool)
if err != nil {
return tagsPool, fmt.Errorf("cannot parse metric and tags from %q: %w; original line: %q", metricAndTags, err, sOrig)
}
if len(timestampStr) > 0 {
ts, err := fastfloat.Parse(timestampStr)
if err != nil {
return tagsPool, fmt.Errorf("cannot unmarshal timestamp from %q: %w; orignal line: %q", timestampStr, err, sOrig)
}
r.Timestamp = int64(ts)
}
v, err := fastfloat.Parse(valueStr)
if err != nil {
return tagsPool, fmt.Errorf("cannot unmarshal value from %q: %w; original line: %q", valueStr, err, sOrig)
}
r.Value = v
return tagsPool, nil
}
func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
for len(s) > 0 {
n := strings.IndexByte(s, '\n')
if n < 0 {
// The last line.
return unmarshalRow(dst, s, tagsPool)
}
dst, tagsPool = unmarshalRow(dst, s[:n], tagsPool)
s = s[n+1:]
}
return dst, tagsPool
}
func unmarshalRow(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
if len(s) > 0 && s[len(s)-1] == '\r' {
s = s[:len(s)-1]
}
s = stripLeadingWhitespace(s)
if len(s) == 0 {
// Skip empty line
return dst, tagsPool
}
if cap(dst) > len(dst) {
dst = dst[:len(dst)+1]
} else {
dst = append(dst, Row{})
}
r := &dst[len(dst)-1]
var err error
tagsPool, err = r.unmarshal(s, tagsPool)
if err != nil {
dst = dst[:len(dst)-1]
logger.Errorf("cannot unmarshal Graphite line %q: %s", s, err)
invalidLines.Inc()
}
return dst, tagsPool
}
var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="graphite"}`)
func unmarshalTags(dst []Tag, s string) []Tag {
for {
if cap(dst) > len(dst) {
dst = dst[:len(dst)+1]
} else {
dst = append(dst, Tag{})
}
tag := &dst[len(dst)-1]
n := strings.IndexByte(s, ';')
if n < 0 {
// The last tag found
tag.unmarshal(s)
if len(tag.Key) == 0 || len(tag.Value) == 0 {
// Skip empty tag
dst = dst[:len(dst)-1]
}
return dst
}
tag.unmarshal(s[:n])
s = s[n+1:]
if len(tag.Key) == 0 || len(tag.Value) == 0 {
// Skip empty tag
dst = dst[:len(dst)-1]
}
}
}
// Tag is a graphite tag.
type Tag struct {
Key string
Value string
}
func (t *Tag) reset() {
t.Key = ""
t.Value = ""
}
func (t *Tag) unmarshal(s string) {
t.reset()
if *sanitizeMetricName {
s = sanitizer.Transform(s)
}
n := strings.IndexByte(s, '=')
if n < 0 {
// Empty tag value.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1100
t.Key = s
t.Value = s[len(s):]
} else {
t.Key = s[:n]
t.Value = s[n+1:]
}
}
func stripTrailingWhitespace(s string) string {
n := len(s)
for {
n--
if n < 0 {
return ""
}
ch := s[n]
// graphite text line protocol may use white space or tab as separator
// See https://github.com/grobian/carbon-c-relay/commit/f3ffe6cc2b52b07d14acbda649ad3fd6babdd528
if ch != ' ' && ch != '\t' {
return s[:n+1]
}
}
}
func stripLeadingWhitespace(s string) string {
for len(s) > 0 {
ch := s[0]
if ch != ' ' && ch != '\t' {
return s
}
s = s[1:]
}
return ""
}
var sanitizer = bytesutil.NewFastStringTransformer(func(s string) string {
// Apply rule to drop some chars to preserve backwards compatibility
s = dropChars.Replace(s)
// Replace any remaining illegal chars
return allowedChars.ReplaceAllLiteralString(s, "_")
})
var (
dropChars = strings.NewReplacer(
`\`, "",
"..", ".",
)
allowedChars = regexp.MustCompile(`[^a-zA-Z0-9:._=\p{L}]`)
)