VictoriaMetrics/lib/logstorage/values_encoder.go

package logstorage

import (
	"fmt"
	"math"
	"math/bits"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
)

// valueType is the type of values stored in every column block.
type valueType byte

const (
	// valueTypeUnknown is used for determining whether the value type is unknown.
	valueTypeUnknown = valueType(0)

	// default encoding for column blocks. Strings are stored as is.
	valueTypeString = valueType(1)

	// column blocks with small number of unique values are encoded as dict.
	valueTypeDict = valueType(2)

	// uint values up to 2^8-1 are encoded into valueTypeUint8.
	// Every value occupies a single byte.
	valueTypeUint8 = valueType(3)

	// uint values up to 2^16-1 are encoded into valueTypeUint16.
	// Every value occupies 2 bytes.
	valueTypeUint16 = valueType(4)

	// uint values up to 2^31-1 are encoded into valueTypeUint32.
	// Every value occupies 4 bytes.
	valueTypeUint32 = valueType(5)

	// uint values up to 2^64-1 are encoded into valueTypeUint64.
	// Every value occupies 8 bytes.
	valueTypeUint64 = valueType(6)

	// int values in the range [-(2^63) ... 2^63-1] are encoded into valueTypeInt64.
	valueTypeInt64 = valueType(10)

	// floating-point values are encoded into valueTypeFloat64.
	valueTypeFloat64 = valueType(7)

	// column blocks with ipv4 addresses are encoded as 4-byte strings.
	valueTypeIPv4 = valueType(8)

	// column blocks with ISO8601 timestamps are encoded into valueTypeTimestampISO8601.
	// These timestamps are commonly used by Logstash.
	valueTypeTimestampISO8601 = valueType(9)
)

func (t valueType) String() string {
	switch t {
	case valueTypeUnknown:
		return "unknown"
	case valueTypeString:
		return "string"
	case valueTypeDict:
		return "dict"
	case valueTypeUint8:
		return "uint8"
	case valueTypeUint16:
		return "uint16"
	case valueTypeUint32:
		return "uint32"
	case valueTypeUint64:
		return "uint64"
	case valueTypeInt64:
		return "int64"
	case valueTypeFloat64:
		return "float64"
	case valueTypeIPv4:
		return "ipv4"
	case valueTypeTimestampISO8601:
		return "iso8601"
	default:
		return fmt.Sprintf("unknown valueType=%d", t)
	}
}

type valuesEncoder struct {
	// buf contains data for values.
	buf []byte

	// values contains encoded values.
	values []string
}

func (ve *valuesEncoder) reset() {
	ve.buf = ve.buf[:0]

	clear(ve.values)
	ve.values = ve.values[:0]
}

// encode encodes values to ve.values and returns the encoded value type with min/max encoded values.
//
// ve.values and dict is valid until values are changed.
func (ve *valuesEncoder) encode(values []string, dict *valuesDict) (valueType, uint64, uint64) {
	ve.reset()

	if len(values) == 0 {
		return valueTypeString, 0, 0
	}

	var vt valueType
	var minValue, maxValue uint64

	// Try dict encoding at first, since it gives the highest speedup during querying.
	// It also usually gives the best compression, since every value is encoded as a single byte.
	ve.buf, ve.values, vt = tryDictEncoding(ve.buf[:0], ve.values[:0], values, dict)
	if vt != valueTypeUnknown {
		return vt, 0, 0
	}

	ve.buf, ve.values, vt, minValue, maxValue = tryUintEncoding(ve.buf[:0], ve.values[:0], values)
	if vt != valueTypeUnknown {
		return vt, minValue, maxValue
	}

	ve.buf, ve.values, vt, minValue, maxValue = tryIntEncoding(ve.buf[:0], ve.values[:0], values)
	if vt != valueTypeUnknown {
		return vt, minValue, maxValue
	}

	ve.buf, ve.values, vt, minValue, maxValue = tryFloat64Encoding(ve.buf[:0], ve.values[:0], values)
	if vt != valueTypeUnknown {
		return vt, minValue, maxValue
	}

	ve.buf, ve.values, vt, minValue, maxValue = tryIPv4Encoding(ve.buf[:0], ve.values[:0], values)
	if vt != valueTypeUnknown {
		return vt, minValue, maxValue
	}

	ve.buf, ve.values, vt, minValue, maxValue = tryTimestampISO8601Encoding(ve.buf[:0], ve.values[:0], values)
	if vt != valueTypeUnknown {
		return vt, minValue, maxValue
	}

	// Fall back to default encoding, e.g. leave values as is.
	ve.values = append(ve.values[:0], values...)
	return valueTypeString, 0, 0
}

func getValuesEncoder() *valuesEncoder {
	v := valuesEncoderPool.Get()
	if v == nil {
		return &valuesEncoder{}
	}
	return v.(*valuesEncoder)
}

func putValuesEncoder(ve *valuesEncoder) {
	ve.reset()
	valuesEncoderPool.Put(ve)
}

var valuesEncoderPool sync.Pool

type valuesDecoder struct {
	buf []byte
}

func (vd *valuesDecoder) reset() {
	vd.buf = vd.buf[:0]
}

// decodeInplace decodes values encoded with the given vt and the given dictValues inplace.
//
// the decoded values remain valid until vd.reset() is called.
func (vd *valuesDecoder) decodeInplace(values []string, vt valueType, dictValues []string) error {
	// do not reset vd.buf, since it may contain previously decoded data,
	// which must be preserved until reset() call.
	dstBuf := vd.buf

	switch vt {
	case valueTypeString:
		// nothing to do - values are already decoded.
	case valueTypeDict:
		sb := getStringBucket()
		for _, v := range dictValues {
			dstLen := len(dstBuf)
			dstBuf = append(dstBuf, v...)
			sb.a = append(sb.a, bytesutil.ToUnsafeString(dstBuf[dstLen:]))
		}
		for i, v := range values {
			id := int(v[0])
			if id >= len(dictValues) {
				return fmt.Errorf("unexpected dictionary id: %d; it must be smaller than %d", id, len(dictValues))
			}
			values[i] = sb.a[id]
		}
		putStringBucket(sb)
	case valueTypeUint8:
		for i, v := range values {
			if len(v) != 1 {
				return fmt.Errorf("unexpected value length for uint8; got %d; want 1", len(v))
			}
			n := unmarshalUint8(v)
			dstLen := len(dstBuf)
			dstBuf = marshalUint8String(dstBuf, n)
			values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
		}
	case valueTypeUint16:
		for i, v := range values {
			if len(v) != 2 {
				return fmt.Errorf("unexpected value length for uint16; got %d; want 2", len(v))
			}
			n := unmarshalUint16(v)
			dstLen := len(dstBuf)
			dstBuf = marshalUint16String(dstBuf, n)
			values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
		}
	case valueTypeUint32:
		for i, v := range values {
			if len(v) != 4 {
				return fmt.Errorf("unexpected value length for uint32; got %d; want 4", len(v))
			}
			n := unmarshalUint32(v)
			dstLen := len(dstBuf)
			dstBuf = marshalUint32String(dstBuf, n)
			values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
		}
	case valueTypeUint64:
		for i, v := range values {
			if len(v) != 8 {
				return fmt.Errorf("unexpected value length for uint64; got %d; want 8", len(v))
			}
			n := unmarshalUint64(v)
			dstLen := len(dstBuf)
			dstBuf = marshalUint64String(dstBuf, n)
			values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
		}
	case valueTypeInt64:
		for i, v := range values {
			if len(v) != 8 {
				return fmt.Errorf("unexpected value length for int64; got %d; want 8", len(v))
			}
			n := unmarshalInt64(v)
			dstLen := len(dstBuf)
			dstBuf = marshalInt64String(dstBuf, n)
			values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
		}
	case valueTypeFloat64:
		for i, v := range values {
			if len(v) != 8 {
				return fmt.Errorf("unexpected value length for uint64; got %d; want 8", len(v))
			}
			f := unmarshalFloat64(v)
			dstLen := len(dstBuf)
			dstBuf = marshalFloat64String(dstBuf, f)
			values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
		}
	case valueTypeIPv4:
		for i, v := range values {
			if len(v) != 4 {
				return fmt.Errorf("unexpected value length for ipv4; got %d; want 4", len(v))
			}
			ip := unmarshalIPv4(v)
			dstLen := len(dstBuf)
			dstBuf = marshalIPv4String(dstBuf, ip)
			values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
		}
	case valueTypeTimestampISO8601:
		for i, v := range values {
			if len(v) != 8 {
				return fmt.Errorf("unexpected value length for uint64; got %d; want 8", len(v))
			}
			timestamp := unmarshalTimestampISO8601(v)
			dstLen := len(dstBuf)
			dstBuf = marshalTimestampISO8601String(dstBuf, timestamp)
			values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
		}
	default:
		return fmt.Errorf("unknown valueType=%d", vt)
	}

	vd.buf = dstBuf
	return nil
}

func getValuesDecoder() *valuesDecoder {
	v := valuesDecoderPool.Get()
	if v == nil {
		return &valuesDecoder{}
	}
	return v.(*valuesDecoder)
}

func putValuesDecoder(vd *valuesDecoder) {
	vd.reset()
	valuesDecoderPool.Put(vd)
}

var valuesDecoderPool sync.Pool

func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
	u64s := encoding.GetInt64s(len(srcValues))
	defer encoding.PutInt64s(u64s)
	a := u64s.A
	var minValue, maxValue int64
	for i, v := range srcValues {
		n, ok := tryParseTimestampISO8601(v)
		if !ok {
			return dstBuf, dstValues, valueTypeUnknown, 0, 0
		}
		a[i] = n
		if i == 0 || n < minValue {
			minValue = n
		}
		if i == 0 || n > maxValue {
			maxValue = n
		}
	}
	for _, n := range a {
		dstLen := len(dstBuf)
		dstBuf = encoding.MarshalUint64(dstBuf, uint64(n))
		v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
		dstValues = append(dstValues, v)
	}
	return dstBuf, dstValues, valueTypeTimestampISO8601, uint64(minValue), uint64(maxValue)
}

// TryParseTimestampRFC3339Nano parses s as RFC3339 with optional nanoseconds part and timezone offset and returns unix timestamp in nanoseconds.
//
// If s doesn't contain timezone offset, then the local timezone is used.
//
// The returned timestamp can be negative if s is smaller than 1970 year.
func TryParseTimestampRFC3339Nano(s string) (int64, bool) {
	if len(s) < len("2006-01-02T15:04:05") {
		return 0, false
	}

	secs, ok, tail := tryParseTimestampSecs(s)
	if !ok {
		return 0, false
	}
	s = tail
	nsecs := secs * 1e9

	// Parse timezone offset
	offsetNsecs, prefix, ok := parseTimezoneOffset(s)
	if !ok {
		return 0, false
	}
	nsecs -= offsetNsecs
	s = prefix

	// Parse optional fractional part of seconds.
	if len(s) == 0 {
		return nsecs, true
	}
	if s[0] == '.' {
		s = s[1:]
	}
	digits := len(s)
	if digits > 9 {
		return 0, false
	}
	n64, ok := tryParseDateUint64(s)
	if !ok {
		return 0, false
	}

	if digits < 9 {
		n64 *= uint64(math.Pow10(9 - digits))
	}
	nsecs += int64(n64)
	return nsecs, true
}

func parseTimezoneOffset(s string) (int64, string, bool) {
	if strings.HasSuffix(s, "Z") {
		return 0, s[:len(s)-1], true
	}

	n := strings.LastIndexAny(s, "+-")
	if n < 0 {
		offsetNsecs := timeutil.GetLocalTimezoneOffsetNsecs()
		return offsetNsecs, s, true
	}
	offsetStr := s[n+1:]
	isMinus := s[n] == '-'
	if len(offsetStr) == 0 {
		return 0, s, false
	}
	offsetNsecs, ok := tryParseHHMM(offsetStr)
	if !ok {
		return 0, s, false
	}
	if isMinus {
		offsetNsecs = -offsetNsecs
	}
	return offsetNsecs, s[:n], true
}

func tryParseHHMM(s string) (int64, bool) {
	if len(s) != len("hh:mm") || s[2] != ':' {
		return 0, false
	}
	hourStr := s[:2]
	minuteStr := s[3:]
	hours, ok := tryParseDateUint64(hourStr)
	if !ok || hours > 24 {
		return 0, false
	}
	minutes, ok := tryParseDateUint64(minuteStr)
	if !ok || minutes > 60 {
		return 0, false
	}
	return int64(hours)*nsecsPerHour + int64(minutes)*nsecsPerMinute, true
}

// tryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds.
//
// The returned timestamp can be negative if s is smaller than 1970 year.
func tryParseTimestampISO8601(s string) (int64, bool) {
	// Do not parse timestamps with timezone, since they cannot be converted back
	// to the same string representation in general case.
	// This may break search.
	if len(s) != len("2006-01-02T15:04:05.000Z") {
		return 0, false
	}

	secs, ok, tail := tryParseTimestampSecs(s)
	if !ok {
		return 0, false
	}
	s = tail
	nsecs := secs * 1e9

	if s[0] != '.' {
		return 0, false
	}
	s = s[1:]

	// Parse milliseconds
	tzDelimiter := s[len("000")]
	if tzDelimiter != 'Z' {
		return 0, false
	}
	millisecondStr := s[:len("000")]
	msecs, ok := tryParseDateUint64(millisecondStr)
	if !ok {
		return 0, false
	}
	s = s[len("000")+1:]

	if len(s) != 0 {
		logger.Panicf("BUG: unexpected tail in timestamp: %q", s)
	}

	nsecs += int64(msecs) * 1e6
	return nsecs, true
}

// tryParseTimestampSecs parses YYYY-MM-DDTHH:mm:ss into unix timestamp in seconds.
func tryParseTimestampSecs(s string) (int64, bool, string) {
	// Parse year
	if s[len("YYYY")] != '-' {
		return 0, false, s
	}
	yearStr := s[:len("YYYY")]
	n, ok := tryParseDateUint64(yearStr)
	if !ok || n < 1677 || n > 2262 {
		return 0, false, s
	}
	year := int(n)
	s = s[len("YYYY")+1:]

	// Parse month
	if s[len("MM")] != '-' {
		return 0, false, s
	}
	monthStr := s[:len("MM")]
	n, ok = tryParseDateUint64(monthStr)
	if !ok {
		return 0, false, s
	}
	month := time.Month(n)
	s = s[len("MM")+1:]

	// Parse day.
	//
	// Allow whitespace additionally to T as the delimiter after DD,
	// so SQL datetime format can be parsed additionally to RFC3339.
	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6721
	delim := s[len("DD")]
	if delim != 'T' && delim != ' ' {
		return 0, false, s
	}
	dayStr := s[:len("DD")]
	n, ok = tryParseDateUint64(dayStr)
	if !ok {
		return 0, false, s
	}
	day := int(n)
	s = s[len("DD")+1:]

	// Parse hour
	if s[len("HH")] != ':' {
		return 0, false, s
	}
	hourStr := s[:len("HH")]
	n, ok = tryParseDateUint64(hourStr)
	if !ok {
		return 0, false, s
	}
	hour := int(n)
	s = s[len("HH")+1:]

	// Parse minute
	if s[len("MM")] != ':' {
		return 0, false, s
	}
	minuteStr := s[:len("MM")]
	n, ok = tryParseDateUint64(minuteStr)
	if !ok {
		return 0, false, s
	}
	minute := int(n)
	s = s[len("MM")+1:]

	// Parse second
	secondStr := s[:len("SS")]
	n, ok = tryParseDateUint64(secondStr)
	if !ok {
		return 0, false, s
	}
	second := int(n)
	s = s[len("SS"):]

	secs := time.Date(year, month, day, hour, minute, second, 0, time.UTC).Unix()
	if secs < int64(-1<<63)/1e9 || secs >= int64((1<<63)-1)/1e9 {
		// Too big or too small timestamp
		return 0, false, s
	}
	return secs, true, s
}

// tryParseUint64 parses s as uint64 value.
func tryParseUint64(s string) (uint64, bool) {
	if len(s) == 0 || len(s) > len("18_446_744_073_709_551_615") {
		return 0, false
	}
	if len(s) > 1 && s[0] == '0' {
		// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8361
		return 0, false
	}

	n := uint64(0)
	for i := 0; i < len(s); i++ {
		ch := s[i]
		if ch == '_' {
			continue
		}
		if ch < '0' || ch > '9' {
			return 0, false
		}
		if n > ((1<<64)-1)/10 {
			return 0, false
		}
		n *= 10
		d := uint64(ch - '0')
		if n > (1<<64)-1-d {
			return 0, false
		}
		n += d
	}
	return n, true
}

// tryParseDateUint64 parses s (which is a part of some timestamp) as uint64 value.
func tryParseDateUint64(s string) (uint64, bool) {
	if len(s) == 0 || len(s) > 9 {
		return 0, false
	}

	if len(s) == 2 {
		// fast path for two-digit number, which is used in hours, minutes and seconds
		if s[0] < '0' || s[0] > '9' {
			return 0, false
		}
		n := 10*uint64(s[0]-'0') + uint64(s[1]-'0')
		return n, true
	}

	n := uint64(0)
	for i := 0; i < len(s); i++ {
		ch := s[i]
		if ch < '0' || ch > '9' {
			return 0, false
		}
		if n > ((1<<64)-1)/10 {
			return 0, false
		}
		n *= 10
		d := uint64(ch - '0')
		if n > (1<<64)-1-d {
			return 0, false
		}
		n += d
	}
	return n, true
}

// tryParseInt64 parses s as int64 value.
func tryParseInt64(s string) (int64, bool) {
	if len(s) == 0 {
		return 0, false
	}
	isMinus := s[0] == '-'
	if isMinus {
		s = s[1:]
	}
	n, ok := tryParseUint64(s)
	if !ok {
		return 0, false
	}
	if n >= 1<<63 {
		if isMinus && n == 1<<63 {
			return -1 << 63, true
		}
		return 0, false
	}
	ni := int64(n)
	if isMinus {
		ni = -ni
	}
	return ni, true
}

func tryIPv4Encoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
	u32s := encoding.GetUint32s(len(srcValues))
	defer encoding.PutUint32s(u32s)
	a := u32s.A
	var minValue, maxValue uint32
	for i, v := range srcValues {
		n, ok := tryParseIPv4(v)
		if !ok {
			return dstBuf, dstValues, valueTypeUnknown, 0, 0
		}
		a[i] = n
		if i == 0 || n < minValue {
			minValue = n
		}
		if i == 0 || n > maxValue {
			maxValue = n
		}
	}
	for _, n := range a {
		dstLen := len(dstBuf)
		dstBuf = encoding.MarshalUint32(dstBuf, n)
		v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
		dstValues = append(dstValues, v)
	}
	return dstBuf, dstValues, valueTypeIPv4, uint64(minValue), uint64(maxValue)
}

// tryParseIPv4 tries parsing ipv4 from s.
func tryParseIPv4(s string) (uint32, bool) {
	if len(s) < len("1.1.1.1") || len(s) > len("255.255.255.255") || strings.Count(s, ".") != 3 {
		// Fast path - the entry isn't IPv4
		return 0, false
	}

	var octets [4]byte
	var v uint64
	var ok bool

	// Parse octet 1
	n := strings.IndexByte(s, '.')
	if n <= 0 || n > 3 {
		return 0, false
	}
	v, ok = tryParseDateUint64(s[:n])
	if !ok || v > 255 {
		return 0, false
	}
	octets[0] = byte(v)
	s = s[n+1:]

	// Parse octet 2
	n = strings.IndexByte(s, '.')
	if n <= 0 || n > 3 {
		return 0, false
	}
	v, ok = tryParseDateUint64(s[:n])
	if !ok || v > 255 {
		return 0, false
	}
	octets[1] = byte(v)
	s = s[n+1:]

	// Parse octet 3
	n = strings.IndexByte(s, '.')
	if n <= 0 || n > 3 {
		return 0, false
	}
	v, ok = tryParseDateUint64(s[:n])
	if !ok || v > 255 {
		return 0, false
	}
	octets[2] = byte(v)
	s = s[n+1:]

	// Parse octet 4
	v, ok = tryParseDateUint64(s)
	if !ok || v > 255 {
		return 0, false
	}
	octets[3] = byte(v)

	ipv4 := encoding.UnmarshalUint32(octets[:])
	return ipv4, true
}

func tryFloat64Encoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
	u64s := encoding.GetUint64s(len(srcValues))
	defer encoding.PutUint64s(u64s)
	a := u64s.A
	var minValue, maxValue float64
	for i, v := range srcValues {
		f, ok := tryParseFloat64Exact(v)
		if !ok {
			return dstBuf, dstValues, valueTypeUnknown, 0, 0
		}
		a[i] = math.Float64bits(f)
		if i == 0 || f < minValue {
			minValue = f
		}
		if i == 0 || f > maxValue {
			maxValue = f
		}
	}
	for _, n := range a {
		dstLen := len(dstBuf)
		dstBuf = encoding.MarshalUint64(dstBuf, n)
		v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
		dstValues = append(dstValues, v)
	}
	minValueU64 := math.Float64bits(minValue)
	maxValueU64 := math.Float64bits(maxValue)
	return dstBuf, dstValues, valueTypeFloat64, minValueU64, maxValueU64
}

// tryParseFloat64Prefix tries parsing float64 number at the beginning of s and returns the remaining tail.
func tryParseFloat64Prefix(s string) (float64, bool, string) {
	i := 0
	for i < len(s) && (s[i] >= '0' && s[i] <= '9' || s[i] == '.' || s[i] == '_') {
		i++
	}
	if i == 0 {
		return 0, false, s
	}

	f, ok := tryParseFloat64(s[:i])
	return f, ok, s[i:]
}

// tryParseFloat64 tries parsing s as float64.
//
// The parsed result may lose precision, e.g. it may not match the original value when converting back to string.
// Use tryParseFloat64Exact when lossless parsing is needed.
func tryParseFloat64(s string) (float64, bool) {
	return tryParseFloat64Internal(s, false)
}

// tryParseFloat64Exact tries parsing s as float64.
func tryParseFloat64Exact(s string) (float64, bool) {
	return tryParseFloat64Internal(s, true)
}

func tryParseFloat64Internal(s string, isExact bool) (float64, bool) {
	if len(s) == 0 || len(s) > len("-18_446_744_073_709_551_615") {
		return 0, false
	}
	// Allow only decimal digits, minus and a dot.
	// Do not allows scientific notation (for example 1.23E+05),
	// since it cannot be converted back to the same string form.

	minus := s[0] == '-'
	if minus {
		s = s[1:]
	}
	n := strings.IndexByte(s, '.')
	if n < 0 {
		// fast path - there are no dots
		n, ok := tryParseUint64(s)
		if !ok {
			return 0, false
		}
		if isExact && n >= (1<<53) {
			// The integer cannot be represented as float64 without precision loss.
			return 0, false
		}
		f := float64(n)
		if minus {
			f = -f
		}
		return f, true
	}
	if n == 0 || n == len(s)-1 {
		// Do not allow dots at the beginning and at the end of s,
		// since they cannot be converted back to the same string form.
		return 0, false
	}
	sInt := s[:n]
	sFrac := s[n+1:]
	nInt, ok := tryParseUint64(sInt)
	if !ok {
		return 0, false
	}
	nFrac, ok := tryParseUint64(sFrac)
	if !ok {
		return 0, false
	}
	p10 := math.Pow10(strings.Count(sFrac, "_") - len(sFrac))
	f := math.FMA(float64(nFrac), p10, float64(nInt))
	if minus {
		f = -f
	}
	return f, true
}

// tryParseBytes parses user-readable bytes representation in s.
//
// Supported suffixes:
//
//	K, KB - for 1000
func tryParseBytes(s string) (int64, bool) {
	if len(s) == 0 {
		return 0, false
	}

	isMinus := s[0] == '-'
	if isMinus {
		s = s[1:]
	}

	n := int64(0)
	for len(s) > 0 {
		f, ok, tail := tryParseFloat64Prefix(s)
		if !ok {
			return 0, false
		}
		if len(tail) == 0 {
			if _, frac := math.Modf(f); frac != 0 {
				// deny floating-point numbers without any suffix.
				return 0, false
			}
		}
		s = tail
		if len(s) == 0 {
			n = addInt64NoOverflow(n, f)
			continue
		}
		if len(s) >= 3 {
			switch {
			case strings.HasPrefix(s, "KiB"):
				n = addInt64NoOverflow(n, f*(1<<10))
				s = s[3:]
				continue
			case strings.HasPrefix(s, "MiB"):
				n = addInt64NoOverflow(n, f*(1<<20))
				s = s[3:]
				continue
			case strings.HasPrefix(s, "GiB"):
				n = addInt64NoOverflow(n, f*(1<<30))
				s = s[3:]
				continue
			case strings.HasPrefix(s, "TiB"):
				n = addInt64NoOverflow(n, f*(1<<40))
				s = s[3:]
				continue
			}
		}
		if len(s) >= 2 {
			switch {
			case strings.HasPrefix(s, "Ki"):
				n = addInt64NoOverflow(n, f*(1<<10))
				s = s[2:]
				continue
			case strings.HasPrefix(s, "Mi"):
				n = addInt64NoOverflow(n, f*(1<<20))
				s = s[2:]
				continue
			case strings.HasPrefix(s, "Gi"):
				n = addInt64NoOverflow(n, f*(1<<30))
				s = s[2:]
				continue
			case strings.HasPrefix(s, "Ti"):
				n = addInt64NoOverflow(n, f*(1<<40))
				s = s[2:]
				continue
			case strings.HasPrefix(s, "KB"):
				n = addInt64NoOverflow(n, f*1_000)
				s = s[2:]
				continue
			case strings.HasPrefix(s, "MB"):
				n = addInt64NoOverflow(n, f*1_000_000)
				s = s[2:]
				continue
			case strings.HasPrefix(s, "GB"):
				n = addInt64NoOverflow(n, f*1_000_000_000)
				s = s[2:]
				continue
			case strings.HasPrefix(s, "TB"):
				n = addInt64NoOverflow(n, f*1_000_000_000_000)
				s = s[2:]
				continue
			}
		}
		switch {
		case strings.HasPrefix(s, "B"):
			n = addInt64NoOverflow(n, f)
			s = s[1:]
			continue
		case strings.HasPrefix(s, "K"):
			n = addInt64NoOverflow(n, f*1_000)
			s = s[1:]
			continue
		case strings.HasPrefix(s, "M"):
			n = addInt64NoOverflow(n, f*1_000_000)
			s = s[1:]
			continue
		case strings.HasPrefix(s, "G"):
			n = addInt64NoOverflow(n, f*1_000_000_000)
			s = s[1:]
			continue
		case strings.HasPrefix(s, "T"):
			n = addInt64NoOverflow(n, f*1_000_000_000_000)
			s = s[1:]
			continue
		}
	}

	if isMinus {
		n = -n
	}
	return n, true
}

func addInt64NoOverflow(n int64, f float64) int64 {
	x := int64(f)
	if n < 0 || x < 0 || x > 1<<63-1-n {
		return 1<<63 - 1
	}
	return n + x
}

// tryParseIPv4Mask parses '/num' ipv4 mask and returns (1<<(32-num))
func tryParseIPv4Mask(s string) (uint64, bool) {
	if len(s) == 0 || s[0] != '/' {
		return 0, false
	}
	s = s[1:]
	n, ok := tryParseUint64(s)
	if !ok || n > 32 {
		return 0, false
	}
	return 1 << (32 - uint8(n)), true
}

// tryParseDuration parses the given duration in nanoseconds and returns the result.
func tryParseDuration(s string) (int64, bool) {
	if len(s) == 0 {
		return 0, false
	}
	isMinus := s[0] == '-'
	if isMinus {
		s = s[1:]
	}

	nsecs := int64(0)
	for len(s) > 0 {
		f, ok, tail := tryParseFloat64Prefix(s)
		if !ok {
			return 0, false
		}
		s = tail
		if len(s) == 0 {
			return 0, false
		}
		if len(s) >= 3 {
			if strings.HasPrefix(s, "µs") {
				nsecs = addInt64NoOverflow(nsecs, f*nsecsPerMicrosecond)
				s = s[3:]
				continue
			}
		}
		if len(s) >= 2 {
			switch {
			case strings.HasPrefix(s, "ms"):
				nsecs = addInt64NoOverflow(nsecs, f*nsecsPerMillisecond)
				s = s[2:]
				continue
			case strings.HasPrefix(s, "ns"):
				nsecs = addInt64NoOverflow(nsecs, f)
				s = s[2:]
				continue
			}
		}
		switch {
		case strings.HasPrefix(s, "y"):
			nsecs = addInt64NoOverflow(nsecs, f*nsecsPerYear)
			s = s[1:]
		case strings.HasPrefix(s, "w"):
			nsecs = addInt64NoOverflow(nsecs, f*nsecsPerWeek)
			s = s[1:]
			continue
		case strings.HasPrefix(s, "d"):
			nsecs = addInt64NoOverflow(nsecs, f*nsecsPerDay)
			s = s[1:]
			continue
		case strings.HasPrefix(s, "h"):
			nsecs = addInt64NoOverflow(nsecs, f*nsecsPerHour)
			s = s[1:]
			continue
		case strings.HasPrefix(s, "m"):
			nsecs = addInt64NoOverflow(nsecs, f*nsecsPerMinute)
			s = s[1:]
			continue
		case strings.HasPrefix(s, "s"):
			nsecs = addInt64NoOverflow(nsecs, f*nsecsPerSecond)
			s = s[1:]
			continue
		default:
			return 0, false
		}
	}

	if isMinus {
		nsecs = -nsecs
	}
	return nsecs, true
}

// marshalDurationString appends string representation of nsec duration to dst and returns the result.
func marshalDurationString(dst []byte, nsecs int64) []byte {
	if nsecs == 0 {
		return append(dst, '0')
	}

	if nsecs < 0 {
		dst = append(dst, '-')
		nsecs = -nsecs
	}
	formatFloat64Seconds := nsecs >= nsecsPerSecond

	if nsecs >= nsecsPerWeek {
		weeks := nsecs / nsecsPerWeek
		nsecs -= weeks * nsecsPerWeek
		dst = marshalUint64String(dst, uint64(weeks))
		dst = append(dst, 'w')
	}
	if nsecs >= nsecsPerDay {
		days := nsecs / nsecsPerDay
		nsecs -= days * nsecsPerDay
		dst = marshalUint8String(dst, uint8(days))
		dst = append(dst, 'd')
	}
	if nsecs >= nsecsPerHour {
		hours := nsecs / nsecsPerHour
		nsecs -= hours * nsecsPerHour
		dst = marshalUint8String(dst, uint8(hours))
		dst = append(dst, 'h')
	}
	if nsecs >= nsecsPerMinute {
		minutes := nsecs / nsecsPerMinute
		nsecs -= minutes * nsecsPerMinute
		dst = marshalUint8String(dst, uint8(minutes))
		dst = append(dst, 'm')
	}
	if nsecs >= nsecsPerSecond {
		if formatFloat64Seconds {
			seconds := float64(nsecs) / nsecsPerSecond
			dst = marshalFloat64String(dst, seconds)
			dst = append(dst, 's')
			return dst
		}
		seconds := nsecs / nsecsPerSecond
		nsecs -= seconds * nsecsPerSecond
		dst = marshalUint8String(dst, uint8(seconds))
		dst = append(dst, 's')
	}
	if nsecs >= nsecsPerMillisecond {
		msecs := nsecs / nsecsPerMillisecond
		nsecs -= msecs * nsecsPerMillisecond
		dst = marshalUint16String(dst, uint16(msecs))
		dst = append(dst, "ms"...)
	}
	if nsecs >= nsecsPerMicrosecond {
		usecs := nsecs / nsecsPerMicrosecond
		nsecs -= usecs * nsecsPerMicrosecond
		dst = marshalUint16String(dst, uint16(usecs))
		dst = append(dst, "µs"...)
	}
	if nsecs > 0 {
		dst = marshalUint16String(dst, uint16(nsecs))
		dst = append(dst, "ns"...)
	}
	return dst
}

const (
	nsecsPerYear        = 365 * 24 * 3600 * 1e9
	nsecsPerWeek        = 7 * 24 * 3600 * 1e9
	nsecsPerDay         = 24 * 3600 * 1e9
	nsecsPerHour        = 3600 * 1e9
	nsecsPerMinute      = 60 * 1e9
	nsecsPerSecond      = 1e9
	nsecsPerMillisecond = 1e6
	nsecsPerMicrosecond = 1e3
)

func tryIntEncoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
	i64s := encoding.GetInt64s(len(srcValues))
	defer encoding.PutInt64s(i64s)
	a := i64s.A
	var minValue, maxValue int64
	for i, v := range srcValues {
		n, ok := tryParseInt64(v)
		if !ok {
			return dstBuf, dstValues, valueTypeUnknown, 0, 0
		}
		a[i] = n
		if i == 0 || n < minValue {
			minValue = n
		}
		if i == 0 || n > maxValue {
			maxValue = n
		}
	}
	for _, n := range a {
		dstLen := len(dstBuf)
		dstBuf = encoding.MarshalInt64(dstBuf, n)
		v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
		dstValues = append(dstValues, v)
	}
	return dstBuf, dstValues, valueTypeInt64, uint64(minValue), uint64(maxValue)
}

func tryUintEncoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
	u64s := encoding.GetUint64s(len(srcValues))
	defer encoding.PutUint64s(u64s)
	a := u64s.A
	var minValue, maxValue uint64
	for i, v := range srcValues {
		n, ok := tryParseUint64(v)
		if !ok {
			return dstBuf, dstValues, valueTypeUnknown, 0, 0
		}
		a[i] = n
		if i == 0 || n < minValue {
			minValue = n
		}
		if i == 0 || n > maxValue {
			maxValue = n
		}
	}

	minBitSize := bits.Len64(maxValue)
	if minBitSize <= 8 {
		for _, n := range a {
			dstLen := len(dstBuf)
			dstBuf = append(dstBuf, byte(n))
			v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
			dstValues = append(dstValues, v)
		}
		return dstBuf, dstValues, valueTypeUint8, minValue, maxValue
	}
	if minBitSize <= 16 {
		for _, n := range a {
			dstLen := len(dstBuf)
			dstBuf = encoding.MarshalUint16(dstBuf, uint16(n))
			v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
			dstValues = append(dstValues, v)
		}
		return dstBuf, dstValues, valueTypeUint16, minValue, maxValue
	}
	if minBitSize <= 32 {
		for _, n := range a {
			dstLen := len(dstBuf)
			dstBuf = encoding.MarshalUint32(dstBuf, uint32(n))
			v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
			dstValues = append(dstValues, v)
		}
		return dstBuf, dstValues, valueTypeUint32, minValue, maxValue
	}
	for _, n := range a {
		dstLen := len(dstBuf)
		dstBuf = encoding.MarshalUint64(dstBuf, n)
		v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
		dstValues = append(dstValues, v)
	}
	return dstBuf, dstValues, valueTypeUint64, minValue, maxValue
}

func tryDictEncoding(dstBuf []byte, dstValues, srcValues []string, dict *valuesDict) ([]byte, []string, valueType) {
	dict.reset()
	dstBufOrig := dstBuf
	dstValuesOrig := dstValues

	for _, v := range srcValues {
		id, ok := dict.getOrAdd(v)
		if !ok {
			dict.reset()
			return dstBufOrig, dstValuesOrig, valueTypeUnknown
		}
		dstLen := len(dstBuf)
		dstBuf = append(dstBuf, id)
		v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
		dstValues = append(dstValues, v)
	}
	return dstBuf, dstValues, valueTypeDict
}

type valuesDict struct {
	values []string
}

func (vd *valuesDict) reset() {
	clear(vd.values)
	vd.values = vd.values[:0]
}

func (vd *valuesDict) copyFrom(a *arena, src *valuesDict) {
	vd.reset()

	dstValues := vd.values
	for _, v := range src.values {
		v = a.copyString(v)
		dstValues = append(dstValues, v)
	}
	vd.values = dstValues
}

func (vd *valuesDict) copyFromNoArena(src *valuesDict) {
	vd.reset()

	vd.values = append(vd.values[:0], src.values...)
}

func (vd *valuesDict) getOrAdd(k string) (byte, bool) {
	if len(k) > maxDictSizeBytes {
		return 0, false
	}
	vs := vd.values
	dictSizeBytes := 0
	for i, v := range vs {
		if k == v {
			return byte(i), true
		}
		dictSizeBytes += len(v)
	}
	if len(vs) >= maxDictLen || dictSizeBytes+len(k) > maxDictSizeBytes {
		return 0, false
	}
	vs = append(vs, k)
	vd.values = vs

	return byte(len(vs) - 1), true
}

func (vd *valuesDict) marshal(dst []byte) []byte {
	values := vd.values
	if len(values) > maxDictLen {
		logger.Panicf("BUG: valuesDict may contain max %d items; got %d items", maxDictLen, len(values))
	}
	dst = append(dst, byte(len(values)))
	dst = marshalStrings(dst, values)
	return dst
}

// unmarshalNoArena unmarshals vd from src.
//
// vd is valid until src is changed.
func (vd *valuesDict) unmarshalNoArena(src []byte) ([]byte, error) {
	vd.reset()

	srcOrig := src
	if len(src) < 1 {
		return srcOrig, fmt.Errorf("cannot umarshal dict len from 0 bytes; need at least 1 byte")
	}
	dictLen := int(src[0])
	src = src[1:]
	for i := 0; i < dictLen; i++ {
		data, nSize := encoding.UnmarshalBytes(src)
		if nSize <= 0 {
			return srcOrig, fmt.Errorf("cannot umarshal value %d out of %d from dict", i, dictLen)
		}
		src = src[nSize:]

		v := bytesutil.ToUnsafeString(data)
		vd.values = append(vd.values, v)
	}
	return src, nil
}

func unmarshalUint8(v string) uint8 {
	return uint8(v[0])
}

func unmarshalUint16(v string) uint16 {
	b := bytesutil.ToUnsafeBytes(v)
	return encoding.UnmarshalUint16(b)
}

func unmarshalUint32(v string) uint32 {
	b := bytesutil.ToUnsafeBytes(v)
	return encoding.UnmarshalUint32(b)
}

func unmarshalUint64(v string) uint64 {
	b := bytesutil.ToUnsafeBytes(v)
	return encoding.UnmarshalUint64(b)
}

func unmarshalInt64(v string) int64 {
	b := bytesutil.ToUnsafeBytes(v)
	return encoding.UnmarshalInt64(b)
}

func unmarshalFloat64(v string) float64 {
	n := unmarshalUint64(v)
	return math.Float64frombits(n)
}

func unmarshalIPv4(v string) uint32 {
	return unmarshalUint32(v)
}

func unmarshalTimestampISO8601(v string) int64 {
	n := unmarshalUint64(v)
	return int64(n)
}

func marshalUint8String(dst []byte, n uint8) []byte {
	if n < 10 {
		return append(dst, '0'+n)
	}
	if n < 100 {
		return append(dst, '0'+n/10, '0'+n%10)
	}

	if n < 200 {
		dst = append(dst, '1')
		n -= 100
	} else {
		dst = append(dst, '2')
		n -= 200
	}
	if n < 10 {
		return append(dst, '0', '0'+n)
	}
	return append(dst, '0'+n/10, '0'+n%10)
}

func marshalUint16String(dst []byte, n uint16) []byte {
	return marshalUint64String(dst, uint64(n))
}

func marshalUint32String(dst []byte, n uint32) []byte {
	return marshalUint64String(dst, uint64(n))
}

func marshalUint64String(dst []byte, n uint64) []byte {
	return strconv.AppendUint(dst, n, 10)
}

func marshalInt64String(dst []byte, n int64) []byte {
	return strconv.AppendInt(dst, n, 10)
}

func marshalFloat64String(dst []byte, f float64) []byte {
	return strconv.AppendFloat(dst, f, 'f', -1, 64)
}

func marshalIPv4String(dst []byte, n uint32) []byte {
	dst = marshalUint8String(dst, uint8(n>>24))
	dst = append(dst, '.')
	dst = marshalUint8String(dst, uint8(n>>16))
	dst = append(dst, '.')
	dst = marshalUint8String(dst, uint8(n>>8))
	dst = append(dst, '.')
	dst = marshalUint8String(dst, uint8(n))
	return dst
}

// marshalTimestampISO8601String appends ISO8601-formatted nsecs to dst and returns the result.
func marshalTimestampISO8601String(dst []byte, nsecs int64) []byte {
	return time.Unix(0, nsecs).UTC().AppendFormat(dst, iso8601Timestamp)
}

const iso8601Timestamp = "2006-01-02T15:04:05.000Z"

// marshalTimestampRFC3339NanoString appends RFC3339Nano-formatted nsecs to dst and returns the result.
func marshalTimestampRFC3339NanoString(dst []byte, nsecs int64) []byte {
	return time.Unix(0, nsecs).UTC().AppendFormat(dst, time.RFC3339Nano)
}