VictoriaMetrics/vendor/github.com/valyala/fastjson/parser.go
2020-11-02 22:01:42 +02:00

976 lines
22 KiB
Go

package fastjson
import (
"fmt"
"github.com/valyala/fastjson/fastfloat"
"strconv"
"strings"
"unicode/utf16"
)
// Parser parses JSON.
//
// Parser may be re-used for subsequent parsing.
//
// Parser cannot be used from concurrent goroutines.
// Use per-goroutine parsers or ParserPool instead.
type Parser struct {
// b contains working copy of the string to be parsed.
b []byte
// c is a cache for json values.
c cache
}
// Parse parses s containing JSON.
//
// The returned value is valid until the next call to Parse*.
//
// Use Scanner if a stream of JSON values must be parsed.
func (p *Parser) Parse(s string) (*Value, error) {
s = skipWS(s)
p.b = append(p.b[:0], s...)
p.c.reset()
v, tail, err := parseValue(b2s(p.b), &p.c, 0)
if err != nil {
return nil, fmt.Errorf("cannot parse JSON: %s; unparsed tail: %q", err, startEndString(tail))
}
tail = skipWS(tail)
if len(tail) > 0 {
return nil, fmt.Errorf("unexpected tail: %q", startEndString(tail))
}
return v, nil
}
// ParseBytes parses b containing JSON.
//
// The returned Value is valid until the next call to Parse*.
//
// Use Scanner if a stream of JSON values must be parsed.
func (p *Parser) ParseBytes(b []byte) (*Value, error) {
return p.Parse(b2s(b))
}
type cache struct {
vs []Value
}
func (c *cache) reset() {
c.vs = c.vs[:0]
}
func (c *cache) getValue() *Value {
if cap(c.vs) > len(c.vs) {
c.vs = c.vs[:len(c.vs)+1]
} else {
c.vs = append(c.vs, Value{})
}
// Do not reset the value, since the caller must properly init it.
return &c.vs[len(c.vs)-1]
}
func skipWS(s string) string {
if len(s) == 0 || s[0] > 0x20 {
// Fast path.
return s
}
return skipWSSlow(s)
}
func skipWSSlow(s string) string {
if len(s) == 0 || s[0] != 0x20 && s[0] != 0x0A && s[0] != 0x09 && s[0] != 0x0D {
return s
}
for i := 1; i < len(s); i++ {
if s[i] != 0x20 && s[i] != 0x0A && s[i] != 0x09 && s[i] != 0x0D {
return s[i:]
}
}
return ""
}
type kv struct {
k string
v *Value
}
// MaxDepth is the maximum depth for nested JSON.
const MaxDepth = 300
func parseValue(s string, c *cache, depth int) (*Value, string, error) {
if len(s) == 0 {
return nil, s, fmt.Errorf("cannot parse empty string")
}
depth++
if depth > MaxDepth {
return nil, s, fmt.Errorf("too big depth for the nested JSON; it exceeds %d", MaxDepth)
}
if s[0] == '{' {
v, tail, err := parseObject(s[1:], c, depth)
if err != nil {
return nil, tail, fmt.Errorf("cannot parse object: %s", err)
}
return v, tail, nil
}
if s[0] == '[' {
v, tail, err := parseArray(s[1:], c, depth)
if err != nil {
return nil, tail, fmt.Errorf("cannot parse array: %s", err)
}
return v, tail, nil
}
if s[0] == '"' {
ss, tail, err := parseRawString(s[1:])
if err != nil {
return nil, tail, fmt.Errorf("cannot parse string: %s", err)
}
v := c.getValue()
v.t = typeRawString
v.s = ss
return v, tail, nil
}
if s[0] == 't' {
if len(s) < len("true") || s[:len("true")] != "true" {
return nil, s, fmt.Errorf("unexpected value found: %q", s)
}
return valueTrue, s[len("true"):], nil
}
if s[0] == 'f' {
if len(s) < len("false") || s[:len("false")] != "false" {
return nil, s, fmt.Errorf("unexpected value found: %q", s)
}
return valueFalse, s[len("false"):], nil
}
if s[0] == 'n' {
if len(s) < len("null") || s[:len("null")] != "null" {
// Try parsing NaN
if len(s) >= 3 && strings.EqualFold(s[:3], "nan") {
v := c.getValue()
v.t = TypeNumber
v.s = s[:3]
return v, s[3:], nil
}
return nil, s, fmt.Errorf("unexpected value found: %q", s)
}
return valueNull, s[len("null"):], nil
}
ns, tail, err := parseRawNumber(s)
if err != nil {
return nil, tail, fmt.Errorf("cannot parse number: %s", err)
}
v := c.getValue()
v.t = TypeNumber
v.s = ns
return v, tail, nil
}
func parseArray(s string, c *cache, depth int) (*Value, string, error) {
s = skipWS(s)
if len(s) == 0 {
return nil, s, fmt.Errorf("missing ']'")
}
if s[0] == ']' {
v := c.getValue()
v.t = TypeArray
v.a = v.a[:0]
return v, s[1:], nil
}
a := c.getValue()
a.t = TypeArray
a.a = a.a[:0]
for {
var v *Value
var err error
s = skipWS(s)
v, s, err = parseValue(s, c, depth)
if err != nil {
return nil, s, fmt.Errorf("cannot parse array value: %s", err)
}
a.a = append(a.a, v)
s = skipWS(s)
if len(s) == 0 {
return nil, s, fmt.Errorf("unexpected end of array")
}
if s[0] == ',' {
s = s[1:]
continue
}
if s[0] == ']' {
s = s[1:]
return a, s, nil
}
return nil, s, fmt.Errorf("missing ',' after array value")
}
}
func parseObject(s string, c *cache, depth int) (*Value, string, error) {
s = skipWS(s)
if len(s) == 0 {
return nil, s, fmt.Errorf("missing '}'")
}
if s[0] == '}' {
v := c.getValue()
v.t = TypeObject
v.o.reset()
return v, s[1:], nil
}
o := c.getValue()
o.t = TypeObject
o.o.reset()
for {
var err error
kv := o.o.getKV()
// Parse key.
s = skipWS(s)
if len(s) == 0 || s[0] != '"' {
return nil, s, fmt.Errorf(`cannot find opening '"" for object key`)
}
kv.k, s, err = parseRawKey(s[1:])
if err != nil {
return nil, s, fmt.Errorf("cannot parse object key: %s", err)
}
s = skipWS(s)
if len(s) == 0 || s[0] != ':' {
return nil, s, fmt.Errorf("missing ':' after object key")
}
s = s[1:]
// Parse value
s = skipWS(s)
kv.v, s, err = parseValue(s, c, depth)
if err != nil {
return nil, s, fmt.Errorf("cannot parse object value: %s", err)
}
s = skipWS(s)
if len(s) == 0 {
return nil, s, fmt.Errorf("unexpected end of object")
}
if s[0] == ',' {
s = s[1:]
continue
}
if s[0] == '}' {
return o, s[1:], nil
}
return nil, s, fmt.Errorf("missing ',' after object value")
}
}
func escapeString(dst []byte, s string) []byte {
if !hasSpecialChars(s) {
// Fast path - nothing to escape.
dst = append(dst, '"')
dst = append(dst, s...)
dst = append(dst, '"')
return dst
}
// Slow path.
return strconv.AppendQuote(dst, s)
}
func hasSpecialChars(s string) bool {
if strings.IndexByte(s, '"') >= 0 || strings.IndexByte(s, '\\') >= 0 {
return true
}
for i := 0; i < len(s); i++ {
if s[i] < 0x20 {
return true
}
}
return false
}
func unescapeStringBestEffort(s string) string {
n := strings.IndexByte(s, '\\')
if n < 0 {
// Fast path - nothing to unescape.
return s
}
// Slow path - unescape string.
b := s2b(s) // It is safe to do, since s points to a byte slice in Parser.b.
b = b[:n]
s = s[n+1:]
for len(s) > 0 {
ch := s[0]
s = s[1:]
switch ch {
case '"':
b = append(b, '"')
case '\\':
b = append(b, '\\')
case '/':
b = append(b, '/')
case 'b':
b = append(b, '\b')
case 'f':
b = append(b, '\f')
case 'n':
b = append(b, '\n')
case 'r':
b = append(b, '\r')
case 't':
b = append(b, '\t')
case 'u':
if len(s) < 4 {
// Too short escape sequence. Just store it unchanged.
b = append(b, "\\u"...)
break
}
xs := s[:4]
x, err := strconv.ParseUint(xs, 16, 16)
if err != nil {
// Invalid escape sequence. Just store it unchanged.
b = append(b, "\\u"...)
break
}
s = s[4:]
if !utf16.IsSurrogate(rune(x)) {
b = append(b, string(rune(x))...)
break
}
// Surrogate.
// See https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
b = append(b, "\\u"...)
b = append(b, xs...)
break
}
x1, err := strconv.ParseUint(s[2:6], 16, 16)
if err != nil {
b = append(b, "\\u"...)
b = append(b, xs...)
break
}
r := utf16.DecodeRune(rune(x), rune(x1))
b = append(b, string(r)...)
s = s[6:]
default:
// Unknown escape sequence. Just store it unchanged.
b = append(b, '\\', ch)
}
n = strings.IndexByte(s, '\\')
if n < 0 {
b = append(b, s...)
break
}
b = append(b, s[:n]...)
s = s[n+1:]
}
return b2s(b)
}
// parseRawKey is similar to parseRawString, but is optimized
// for small-sized keys without escape sequences.
func parseRawKey(s string) (string, string, error) {
for i := 0; i < len(s); i++ {
if s[i] == '"' {
// Fast path.
return s[:i], s[i+1:], nil
}
if s[i] == '\\' {
// Slow path.
return parseRawString(s)
}
}
return s, "", fmt.Errorf(`missing closing '"'`)
}
func parseRawString(s string) (string, string, error) {
n := strings.IndexByte(s, '"')
if n < 0 {
return s, "", fmt.Errorf(`missing closing '"'`)
}
if n == 0 || s[n-1] != '\\' {
// Fast path. No escaped ".
return s[:n], s[n+1:], nil
}
// Slow path - possible escaped " found.
ss := s
for {
i := n - 1
for i > 0 && s[i-1] == '\\' {
i--
}
if uint(n-i)%2 == 0 {
return ss[:len(ss)-len(s)+n], s[n+1:], nil
}
s = s[n+1:]
n = strings.IndexByte(s, '"')
if n < 0 {
return ss, "", fmt.Errorf(`missing closing '"'`)
}
if n == 0 || s[n-1] != '\\' {
return ss[:len(ss)-len(s)+n], s[n+1:], nil
}
}
}
func parseRawNumber(s string) (string, string, error) {
// The caller must ensure len(s) > 0
// Find the end of the number.
for i := 0; i < len(s); i++ {
ch := s[i]
if (ch >= '0' && ch <= '9') || ch == '.' || ch == '-' || ch == 'e' || ch == 'E' || ch == '+' {
continue
}
if i == 0 || i == 1 && (s[0] == '-' || s[0] == '+') {
if len(s[i:]) >= 3 {
xs := s[i : i+3]
if strings.EqualFold(xs, "inf") || strings.EqualFold(xs, "nan") {
return s[:i+3], s[i+3:], nil
}
}
return "", s, fmt.Errorf("unexpected char: %q", s[:1])
}
ns := s[:i]
s = s[i:]
return ns, s, nil
}
return s, "", nil
}
// Object represents JSON object.
//
// Object cannot be used from concurrent goroutines.
// Use per-goroutine parsers or ParserPool instead.
type Object struct {
kvs []kv
keysUnescaped bool
}
func (o *Object) reset() {
o.kvs = o.kvs[:0]
o.keysUnescaped = false
}
// MarshalTo appends marshaled o to dst and returns the result.
func (o *Object) MarshalTo(dst []byte) []byte {
dst = append(dst, '{')
for i, kv := range o.kvs {
if o.keysUnescaped {
dst = escapeString(dst, kv.k)
} else {
dst = append(dst, '"')
dst = append(dst, kv.k...)
dst = append(dst, '"')
}
dst = append(dst, ':')
dst = kv.v.MarshalTo(dst)
if i != len(o.kvs)-1 {
dst = append(dst, ',')
}
}
dst = append(dst, '}')
return dst
}
// String returns string representation for the o.
//
// This function is for debugging purposes only. It isn't optimized for speed.
// See MarshalTo instead.
func (o *Object) String() string {
b := o.MarshalTo(nil)
// It is safe converting b to string without allocation, since b is no longer
// reachable after this line.
return b2s(b)
}
func (o *Object) getKV() *kv {
if cap(o.kvs) > len(o.kvs) {
o.kvs = o.kvs[:len(o.kvs)+1]
} else {
o.kvs = append(o.kvs, kv{})
}
return &o.kvs[len(o.kvs)-1]
}
func (o *Object) unescapeKeys() {
if o.keysUnescaped {
return
}
kvs := o.kvs
for i := range kvs {
kv := &kvs[i]
kv.k = unescapeStringBestEffort(kv.k)
}
o.keysUnescaped = true
}
// Len returns the number of items in the o.
func (o *Object) Len() int {
return len(o.kvs)
}
// Get returns the value for the given key in the o.
//
// Returns nil if the value for the given key isn't found.
//
// The returned value is valid until Parse is called on the Parser returned o.
func (o *Object) Get(key string) *Value {
if !o.keysUnescaped && strings.IndexByte(key, '\\') < 0 {
// Fast path - try searching for the key without object keys unescaping.
for _, kv := range o.kvs {
if kv.k == key {
return kv.v
}
}
}
// Slow path - unescape object keys.
o.unescapeKeys()
for _, kv := range o.kvs {
if kv.k == key {
return kv.v
}
}
return nil
}
// Visit calls f for each item in the o in the original order
// of the parsed JSON.
//
// f cannot hold key and/or v after returning.
func (o *Object) Visit(f func(key []byte, v *Value)) {
if o == nil {
return
}
o.unescapeKeys()
for _, kv := range o.kvs {
f(s2b(kv.k), kv.v)
}
}
// Value represents any JSON value.
//
// Call Type in order to determine the actual type of the JSON value.
//
// Value cannot be used from concurrent goroutines.
// Use per-goroutine parsers or ParserPool instead.
type Value struct {
o Object
a []*Value
s string
t Type
}
// MarshalTo appends marshaled v to dst and returns the result.
func (v *Value) MarshalTo(dst []byte) []byte {
switch v.t {
case typeRawString:
dst = append(dst, '"')
dst = append(dst, v.s...)
dst = append(dst, '"')
return dst
case TypeObject:
return v.o.MarshalTo(dst)
case TypeArray:
dst = append(dst, '[')
for i, vv := range v.a {
dst = vv.MarshalTo(dst)
if i != len(v.a)-1 {
dst = append(dst, ',')
}
}
dst = append(dst, ']')
return dst
case TypeString:
return escapeString(dst, v.s)
case TypeNumber:
return append(dst, v.s...)
case TypeTrue:
return append(dst, "true"...)
case TypeFalse:
return append(dst, "false"...)
case TypeNull:
return append(dst, "null"...)
default:
panic(fmt.Errorf("BUG: unexpected Value type: %d", v.t))
}
}
// String returns string representation of the v.
//
// The function is for debugging purposes only. It isn't optimized for speed.
// See MarshalTo instead.
//
// Don't confuse this function with StringBytes, which must be called
// for obtaining the underlying JSON string for the v.
func (v *Value) String() string {
b := v.MarshalTo(nil)
// It is safe converting b to string without allocation, since b is no longer
// reachable after this line.
return b2s(b)
}
// Type represents JSON type.
type Type int
const (
// TypeNull is JSON null.
TypeNull Type = 0
// TypeObject is JSON object type.
TypeObject Type = 1
// TypeArray is JSON array type.
TypeArray Type = 2
// TypeString is JSON string type.
TypeString Type = 3
// TypeNumber is JSON number type.
TypeNumber Type = 4
// TypeTrue is JSON true.
TypeTrue Type = 5
// TypeFalse is JSON false.
TypeFalse Type = 6
typeRawString Type = 7
)
// String returns string representation of t.
func (t Type) String() string {
switch t {
case TypeObject:
return "object"
case TypeArray:
return "array"
case TypeString:
return "string"
case TypeNumber:
return "number"
case TypeTrue:
return "true"
case TypeFalse:
return "false"
case TypeNull:
return "null"
// typeRawString is skipped intentionally,
// since it shouldn't be visible to user.
default:
panic(fmt.Errorf("BUG: unknown Value type: %d", t))
}
}
// Type returns the type of the v.
func (v *Value) Type() Type {
if v.t == typeRawString {
v.s = unescapeStringBestEffort(v.s)
v.t = TypeString
}
return v.t
}
// Exists returns true if the field exists for the given keys path.
//
// Array indexes may be represented as decimal numbers in keys.
func (v *Value) Exists(keys ...string) bool {
v = v.Get(keys...)
return v != nil
}
// Get returns value by the given keys path.
//
// Array indexes may be represented as decimal numbers in keys.
//
// nil is returned for non-existing keys path.
//
// The returned value is valid until Parse is called on the Parser returned v.
func (v *Value) Get(keys ...string) *Value {
if v == nil {
return nil
}
for _, key := range keys {
if v.t == TypeObject {
v = v.o.Get(key)
if v == nil {
return nil
}
} else if v.t == TypeArray {
n, err := strconv.Atoi(key)
if err != nil || n < 0 || n >= len(v.a) {
return nil
}
v = v.a[n]
} else {
return nil
}
}
return v
}
// GetObject returns object value by the given keys path.
//
// Array indexes may be represented as decimal numbers in keys.
//
// nil is returned for non-existing keys path or for invalid value type.
//
// The returned object is valid until Parse is called on the Parser returned v.
func (v *Value) GetObject(keys ...string) *Object {
v = v.Get(keys...)
if v == nil || v.t != TypeObject {
return nil
}
return &v.o
}
// GetArray returns array value by the given keys path.
//
// Array indexes may be represented as decimal numbers in keys.
//
// nil is returned for non-existing keys path or for invalid value type.
//
// The returned array is valid until Parse is called on the Parser returned v.
func (v *Value) GetArray(keys ...string) []*Value {
v = v.Get(keys...)
if v == nil || v.t != TypeArray {
return nil
}
return v.a
}
// GetFloat64 returns float64 value by the given keys path.
//
// Array indexes may be represented as decimal numbers in keys.
//
// 0 is returned for non-existing keys path or for invalid value type.
func (v *Value) GetFloat64(keys ...string) float64 {
v = v.Get(keys...)
if v == nil || v.Type() != TypeNumber {
return 0
}
return fastfloat.ParseBestEffort(v.s)
}
// GetInt returns int value by the given keys path.
//
// Array indexes may be represented as decimal numbers in keys.
//
// 0 is returned for non-existing keys path or for invalid value type.
func (v *Value) GetInt(keys ...string) int {
v = v.Get(keys...)
if v == nil || v.Type() != TypeNumber {
return 0
}
n := fastfloat.ParseInt64BestEffort(v.s)
nn := int(n)
if int64(nn) != n {
return 0
}
return nn
}
// GetUint returns uint value by the given keys path.
//
// Array indexes may be represented as decimal numbers in keys.
//
// 0 is returned for non-existing keys path or for invalid value type.
func (v *Value) GetUint(keys ...string) uint {
v = v.Get(keys...)
if v == nil || v.Type() != TypeNumber {
return 0
}
n := fastfloat.ParseUint64BestEffort(v.s)
nn := uint(n)
if uint64(nn) != n {
return 0
}
return nn
}
// GetInt64 returns int64 value by the given keys path.
//
// Array indexes may be represented as decimal numbers in keys.
//
// 0 is returned for non-existing keys path or for invalid value type.
func (v *Value) GetInt64(keys ...string) int64 {
v = v.Get(keys...)
if v == nil || v.Type() != TypeNumber {
return 0
}
return fastfloat.ParseInt64BestEffort(v.s)
}
// GetUint64 returns uint64 value by the given keys path.
//
// Array indexes may be represented as decimal numbers in keys.
//
// 0 is returned for non-existing keys path or for invalid value type.
func (v *Value) GetUint64(keys ...string) uint64 {
v = v.Get(keys...)
if v == nil || v.Type() != TypeNumber {
return 0
}
return fastfloat.ParseUint64BestEffort(v.s)
}
// GetStringBytes returns string value by the given keys path.
//
// Array indexes may be represented as decimal numbers in keys.
//
// nil is returned for non-existing keys path or for invalid value type.
//
// The returned string is valid until Parse is called on the Parser returned v.
func (v *Value) GetStringBytes(keys ...string) []byte {
v = v.Get(keys...)
if v == nil || v.Type() != TypeString {
return nil
}
return s2b(v.s)
}
// GetBool returns bool value by the given keys path.
//
// Array indexes may be represented as decimal numbers in keys.
//
// false is returned for non-existing keys path or for invalid value type.
func (v *Value) GetBool(keys ...string) bool {
v = v.Get(keys...)
if v != nil && v.t == TypeTrue {
return true
}
return false
}
// Object returns the underlying JSON object for the v.
//
// The returned object is valid until Parse is called on the Parser returned v.
//
// Use GetObject if you don't need error handling.
func (v *Value) Object() (*Object, error) {
if v.t != TypeObject {
return nil, fmt.Errorf("value doesn't contain object; it contains %s", v.Type())
}
return &v.o, nil
}
// Array returns the underlying JSON array for the v.
//
// The returned array is valid until Parse is called on the Parser returned v.
//
// Use GetArray if you don't need error handling.
func (v *Value) Array() ([]*Value, error) {
if v.t != TypeArray {
return nil, fmt.Errorf("value doesn't contain array; it contains %s", v.Type())
}
return v.a, nil
}
// StringBytes returns the underlying JSON string for the v.
//
// The returned string is valid until Parse is called on the Parser returned v.
//
// Use GetStringBytes if you don't need error handling.
func (v *Value) StringBytes() ([]byte, error) {
if v.Type() != TypeString {
return nil, fmt.Errorf("value doesn't contain string; it contains %s", v.Type())
}
return s2b(v.s), nil
}
// Float64 returns the underlying JSON number for the v.
//
// Use GetFloat64 if you don't need error handling.
func (v *Value) Float64() (float64, error) {
if v.Type() != TypeNumber {
return 0, fmt.Errorf("value doesn't contain number; it contains %s", v.Type())
}
return fastfloat.Parse(v.s)
}
// Int returns the underlying JSON int for the v.
//
// Use GetInt if you don't need error handling.
func (v *Value) Int() (int, error) {
if v.Type() != TypeNumber {
return 0, fmt.Errorf("value doesn't contain number; it contains %s", v.Type())
}
n, err := fastfloat.ParseInt64(v.s)
if err != nil {
return 0, err
}
nn := int(n)
if int64(nn) != n {
return 0, fmt.Errorf("number %q doesn't fit int", v.s)
}
return nn, nil
}
// Uint returns the underlying JSON uint for the v.
//
// Use GetInt if you don't need error handling.
func (v *Value) Uint() (uint, error) {
if v.Type() != TypeNumber {
return 0, fmt.Errorf("value doesn't contain number; it contains %s", v.Type())
}
n, err := fastfloat.ParseUint64(v.s)
if err != nil {
return 0, err
}
nn := uint(n)
if uint64(nn) != n {
return 0, fmt.Errorf("number %q doesn't fit uint", v.s)
}
return nn, nil
}
// Int64 returns the underlying JSON int64 for the v.
//
// Use GetInt64 if you don't need error handling.
func (v *Value) Int64() (int64, error) {
if v.Type() != TypeNumber {
return 0, fmt.Errorf("value doesn't contain number; it contains %s", v.Type())
}
return fastfloat.ParseInt64(v.s)
}
// Uint64 returns the underlying JSON uint64 for the v.
//
// Use GetInt64 if you don't need error handling.
func (v *Value) Uint64() (uint64, error) {
if v.Type() != TypeNumber {
return 0, fmt.Errorf("value doesn't contain number; it contains %s", v.Type())
}
return fastfloat.ParseUint64(v.s)
}
// Bool returns the underlying JSON bool for the v.
//
// Use GetBool if you don't need error handling.
func (v *Value) Bool() (bool, error) {
if v.t == TypeTrue {
return true, nil
}
if v.t == TypeFalse {
return false, nil
}
return false, fmt.Errorf("value doesn't contain bool; it contains %s", v.Type())
}
var (
valueTrue = &Value{t: TypeTrue}
valueFalse = &Value{t: TypeFalse}
valueNull = &Value{t: TypeNull}
)