lib/protoparser/graphite: return error when value or timestamp cannot be properly parsed

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/99
2024-11-21 14:44:00 +00:00 · 2020-09-16 01:33:32 +03:00 · 2020-09-16 01:33:32 +03:00 · 9bc8484ab6
commit 9bc8484ab6
parent 26fa94ba8d
6 changed files with 270 additions and 7 deletions
--- a/go.mod
+++ b/go.mod
@ -13,7 +13,7 @@ require (
 	github.com/cespare/xxhash/v2 v2.1.1
 	github.com/golang/snappy v0.0.1
 	github.com/klauspost/compress v1.11.0
-	github.com/valyala/fastjson v1.5.5
+	github.com/valyala/fastjson v1.6.0
 	github.com/valyala/fastrand v1.0.0
 	github.com/valyala/fasttemplate v1.2.1
 	github.com/valyala/gozstd v1.8.3
--- a/go.sum
+++ b/go.sum
@ -173,8 +173,8 @@ github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.15.1/go.mod h1:YOKImeEosDdBPnxc0gy7INqi3m1zK6A+xl6TwOBhHCA=
-github.com/valyala/fastjson v1.5.5 h1:4CAm2y8QWklzgJx0hELD61SYkRkFxn4wUIqb0Uzcn44=
-github.com/valyala/fastjson v1.5.5/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY=
+github.com/valyala/fastjson v1.6.0 h1:aJV8Tvmeq1mCXxDOVV8raxBoyA3eE8xwTgW8SGQ5yKM=
+github.com/valyala/fastjson v1.6.0/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY=
 github.com/valyala/fastrand v1.0.0 h1:LUKT9aKer2dVQNUi3waewTbKV+7H17kvWFNKs2ObdkI=
 github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
 github.com/valyala/fasttemplate v1.2.1 h1:TVEnxayobAdVkhQfrfes2IzOB6o+z4roRkPF52WA1u4=
--- a/lib/protoparser/graphite/parser.go
+++ b/lib/protoparser/graphite/parser.go
@ -87,11 +87,23 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
 	n = strings.IndexByte(tail, ' ')
 	if n < 0 {
 		// There is no timestamp. Use default timestamp instead.
-		r.Value = fastfloat.ParseBestEffort(tail)
+		v, err := fastfloat.Parse(tail)
+		if err != nil {
+			return tagsPool, fmt.Errorf("cannot unmarshal value from %q: %w", tail, err)
+		}
+		r.Value = v
 		return tagsPool, nil
 	}
-	r.Value = fastfloat.ParseBestEffort(tail[:n])
-	r.Timestamp = fastfloat.ParseInt64BestEffort(tail[n+1:])
+	v, err := fastfloat.Parse(tail[:n])
+	if err != nil {
+		return tagsPool, fmt.Errorf("cannot unmarshal value from %q: %w", tail[:n], err)
+	}
+	ts, err := fastfloat.ParseInt64(tail[n+1:])
+	if err != nil {
+		return tagsPool, fmt.Errorf("cannot unmarshal timestamp from %q: %w", tail[n+1:], err)
+	}
+	r.Value = v
+	r.Timestamp = ts
 	return tagsPool, nil
 }

--- a/lib/protoparser/graphite/parser_test.go
+++ b/lib/protoparser/graphite/parser_test.go
@ -34,6 +34,16 @@ func TestRowsUnmarshalFailure(t *testing.T) {

 	// missing tag value
 	f("aa;bb 23 34")
+
+	// unexpected space in tag value
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/99
+	f("s;tag1=aaa1;tag2=bb b2;tag3=ccc3 1")
+
+	// invalid value
+	f("aa bb")
+
+	// invalid timestamp
+	f("aa 123 bar")
 }

 func TestRowsUnmarshalSuccess(t *testing.T) {
--- a/vendor/github.com/valyala/fastjson/fastfloat/parse.go
+++ b/vendor/github.com/valyala/fastjson/fastfloat/parse.go
@ -1,6 +1,7 @@
 package fastfloat

 import (
+	"fmt"
 	"math"
 	"strconv"
 	"strings"
@ -11,6 +12,7 @@ import (
 // It is equivalent to strconv.ParseUint(s, 10, 64), but is faster.
 //
 // 0 is returned if the number cannot be parsed.
+// See also ParseUint64, which returns parse error if the number cannot be parsed.
 func ParseUint64BestEffort(s string) uint64 {
 	if len(s) == 0 {
 		return 0
@ -45,11 +47,51 @@ func ParseUint64BestEffort(s string) uint64 {
 	return d
 }

+// ParseUint64 parses uint64 from s.
+//
+// It is equivalent to strconv.ParseUint(s, 10, 64), but is faster.
+//
+// See also ParseUint64BestEffort.
+func ParseUint64(s string) (uint64, error) {
+	if len(s) == 0 {
+		return 0, fmt.Errorf("cannot parse uint64 from empty string")
+	}
+	i := uint(0)
+	d := uint64(0)
+	j := i
+	for i < uint(len(s)) {
+		if s[i] >= '0' && s[i] <= '9' {
+			d = d*10 + uint64(s[i]-'0')
+			i++
+			if i > 18 {
+				// The integer part may be out of range for uint64.
+				// Fall back to slow parsing.
+				dd, err := strconv.ParseUint(s, 10, 64)
+				if err != nil {
+					return 0, err
+				}
+				return dd, nil
+			}
+			continue
+		}
+		break
+	}
+	if i <= j {
+		return 0, fmt.Errorf("cannot parse uint64 from %q", s)
+	}
+	if i < uint(len(s)) {
+		// Unparsed tail left.
+		return 0, fmt.Errorf("unparsed tail left after parsing uint64 from %q: %q", s, s[i:])
+	}
+	return d, nil
+}
+
 // ParseInt64BestEffort parses int64 number s.
 //
 // It is equivalent to strconv.ParseInt(s, 10, 64), but is faster.
 //
 // 0 is returned if the number cannot be parsed.
+// See also ParseInt64, which returns parse error if the number cannot be parsed.
 func ParseInt64BestEffort(s string) int64 {
 	if len(s) == 0 {
 		return 0
@ -95,6 +137,56 @@ func ParseInt64BestEffort(s string) int64 {
 	return d
 }

+// ParseInt64 parses int64 number s.
+//
+// It is equivalent to strconv.ParseInt(s, 10, 64), but is faster.
+//
+// See also ParseInt64BestEffort.
+func ParseInt64(s string) (int64, error) {
+	if len(s) == 0 {
+		return 0, fmt.Errorf("cannot parse int64 from empty string")
+	}
+	i := uint(0)
+	minus := s[0] == '-'
+	if minus {
+		i++
+		if i >= uint(len(s)) {
+			return 0, fmt.Errorf("cannot parse int64 from %q", s)
+		}
+	}
+
+	d := int64(0)
+	j := i
+	for i < uint(len(s)) {
+		if s[i] >= '0' && s[i] <= '9' {
+			d = d*10 + int64(s[i]-'0')
+			i++
+			if i > 18 {
+				// The integer part may be out of range for int64.
+				// Fall back to slow parsing.
+				dd, err := strconv.ParseInt(s, 10, 64)
+				if err != nil {
+					return 0, err
+				}
+				return dd, nil
+			}
+			continue
+		}
+		break
+	}
+	if i <= j {
+		return 0, fmt.Errorf("cannot parse int64 from %q", s)
+	}
+	if i < uint(len(s)) {
+		// Unparsed tail left.
+		return 0, fmt.Errorf("unparsed tail left after parsing int64 form %q: %q", s, s[i:])
+	}
+	if minus {
+		d = -d
+	}
+	return d, nil
+}
+
 // Exact powers of 10.
 //
 // This works faster than math.Pow10, since it avoids additional multiplication.
@ -107,6 +199,7 @@ var float64pow10 = [...]float64{
 // It is equivalent to strconv.ParseFloat(s, 64), but is faster.
 //
 // 0 is returned if the number cannot be parsed.
+// See also Parse, which returns parse error if the number cannot be parsed.
 func ParseBestEffort(s string) float64 {
 	if len(s) == 0 {
 		return 0
@ -250,5 +343,153 @@ func ParseBestEffort(s string) float64 {
 	return 0
 }

+// Parse parses floating-point number s.
+//
+// It is equivalent to strconv.ParseFloat(s, 64), but is faster.
+//
+// See also ParseBestEffort.
+func Parse(s string) (float64, error) {
+	if len(s) == 0 {
+		return 0, fmt.Errorf("cannot parse float64 from empty string")
+	}
+	i := uint(0)
+	minus := s[0] == '-'
+	if minus {
+		i++
+		if i >= uint(len(s)) {
+			return 0, fmt.Errorf("cannot parse float64 from %q", s)
+		}
+	}
+
+	d := uint64(0)
+	j := i
+	for i < uint(len(s)) {
+		if s[i] >= '0' && s[i] <= '9' {
+			d = d*10 + uint64(s[i]-'0')
+			i++
+			if i > 18 {
+				// The integer part may be out of range for uint64.
+				// Fall back to slow parsing.
+				f, err := strconv.ParseFloat(s, 64)
+				if err != nil && !math.IsInf(f, 0) {
+					return 0, err
+				}
+				return f, nil
+			}
+			continue
+		}
+		break
+	}
+	if i <= j {
+		ss := s[i:]
+		if strings.HasPrefix(ss, "+") {
+			ss = ss[1:]
+		}
+		if strings.EqualFold(ss, "inf") {
+			if minus {
+				return -inf, nil
+			}
+			return inf, nil
+		}
+		if strings.EqualFold(ss, "nan") {
+			return nan, nil
+		}
+		return 0, fmt.Errorf("unparsed tail left after parsing float64 from %q: %q", s, ss)
+	}
+	f := float64(d)
+	if i >= uint(len(s)) {
+		// Fast path - just integer.
+		if minus {
+			f = -f
+		}
+		return f, nil
+	}
+
+	if s[i] == '.' {
+		// Parse fractional part.
+		i++
+		if i >= uint(len(s)) {
+			return 0, fmt.Errorf("cannot parse fractional part in %q", s)
+		}
+		k := i
+		for i < uint(len(s)) {
+			if s[i] >= '0' && s[i] <= '9' {
+				d = d*10 + uint64(s[i]-'0')
+				i++
+				if i-j >= uint(len(float64pow10)) {
+					// The mantissa is out of range. Fall back to standard parsing.
+					f, err := strconv.ParseFloat(s, 64)
+					if err != nil && !math.IsInf(f, 0) {
+						return 0, fmt.Errorf("cannot parse mantissa in %q: %s", s, err)
+					}
+					return f, nil
+				}
+				continue
+			}
+			break
+		}
+		if i < k {
+			return 0, fmt.Errorf("cannot find mantissa in %q", s)
+		}
+		// Convert the entire mantissa to a float at once to avoid rounding errors.
+		f = float64(d) / float64pow10[i-k]
+		if i >= uint(len(s)) {
+			// Fast path - parsed fractional number.
+			if minus {
+				f = -f
+			}
+			return f, nil
+		}
+	}
+	if s[i] == 'e' || s[i] == 'E' {
+		// Parse exponent part.
+		i++
+		if i >= uint(len(s)) {
+			return 0, fmt.Errorf("cannot parse exponent in %q", s)
+		}
+		expMinus := false
+		if s[i] == '+' || s[i] == '-' {
+			expMinus = s[i] == '-'
+			i++
+			if i >= uint(len(s)) {
+				return 0, fmt.Errorf("cannot parse exponent in %q", s)
+			}
+		}
+		exp := int16(0)
+		j := i
+		for i < uint(len(s)) {
+			if s[i] >= '0' && s[i] <= '9' {
+				exp = exp*10 + int16(s[i]-'0')
+				i++
+				if exp > 300 {
+					// The exponent may be too big for float64.
+					// Fall back to standard parsing.
+					f, err := strconv.ParseFloat(s, 64)
+					if err != nil && !math.IsInf(f, 0) {
+						return 0, fmt.Errorf("cannot parse exponent in %q: %s", s, err)
+					}
+					return f, nil
+				}
+				continue
+			}
+			break
+		}
+		if i <= j {
+			return 0, fmt.Errorf("cannot parse exponent in %q", s)
+		}
+		if expMinus {
+			exp = -exp
+		}
+		f *= math.Pow10(int(exp))
+		if i >= uint(len(s)) {
+			if minus {
+				f = -f
+			}
+			return f, nil
+		}
+	}
+	return 0, fmt.Errorf("cannot parse float64 from %q", s)
+}
+
 var inf = math.Inf(1)
 var nan = math.NaN()
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -98,7 +98,7 @@ github.com/klauspost/compress/zstd
 github.com/klauspost/compress/zstd/internal/xxhash
 # github.com/valyala/bytebufferpool v1.0.0
 github.com/valyala/bytebufferpool
-# github.com/valyala/fastjson v1.5.5
+# github.com/valyala/fastjson v1.6.0
 github.com/valyala/fastjson
 github.com/valyala/fastjson/fastfloat
 # github.com/valyala/fastrand v1.0.0