2023-06-20 05:55:12 +00:00
package logstorage
import (
"fmt"
"math"
"math/bits"
"strconv"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// valueType is the type of values stored in every column block.
type valueType byte
const (
// valueTypeUnknown is used for determining whether the value type is unknown.
valueTypeUnknown = valueType ( 0 )
// default encoding for column blocks. Strings are stored as is.
valueTypeString = valueType ( 1 )
// column blocks with small number of unique values are encoded as dict.
valueTypeDict = valueType ( 2 )
// uint values up to 2^8-1 are encoded into valueTypeUint8.
// Every value occupies a single byte.
valueTypeUint8 = valueType ( 3 )
// uint values up to 2^16-1 are encoded into valueTypeUint16.
// Every value occupies 2 bytes.
valueTypeUint16 = valueType ( 4 )
// uint values up to 2^31-1 are encoded into valueTypeUint32.
// Every value occupies 4 bytes.
valueTypeUint32 = valueType ( 5 )
// uint values up to 2^64-1 are encoded into valueTypeUint64.
// Every value occupies 8 bytes.
valueTypeUint64 = valueType ( 6 )
// floating-point values are encoded into valueTypeFloat64.
valueTypeFloat64 = valueType ( 7 )
// column blocks with ipv4 addresses are encoded as 4-byte strings.
valueTypeIPv4 = valueType ( 8 )
// column blocks with ISO8601 timestamps are encoded into valueTypeTimestampISO8601.
// These timestamps are commonly used by Logstash.
valueTypeTimestampISO8601 = valueType ( 9 )
)
type valuesEncoder struct {
// buf contains data for values.
buf [ ] byte
// values contains encoded values.
values [ ] string
}
func ( ve * valuesEncoder ) reset ( ) {
ve . buf = ve . buf [ : 0 ]
2024-05-12 14:33:29 +00:00
clear ( ve . values )
ve . values = ve . values [ : 0 ]
2023-06-20 05:55:12 +00:00
}
// encode encodes values to ve.values and returns the encoded value type with min/max encoded values.
2024-05-13 23:49:20 +00:00
//
// ve.values and dict is valid until values are changed.
2023-06-20 05:55:12 +00:00
func ( ve * valuesEncoder ) encode ( values [ ] string , dict * valuesDict ) ( valueType , uint64 , uint64 ) {
ve . reset ( )
if len ( values ) == 0 {
return valueTypeString , 0 , 0
}
var vt valueType
var minValue , maxValue uint64
// Try dict encoding at first, since it gives the highest speedup during querying.
// It also usually gives the best compression, since every value is encoded as a single byte.
ve . buf , ve . values , vt = tryDictEncoding ( ve . buf [ : 0 ] , ve . values [ : 0 ] , values , dict )
if vt != valueTypeUnknown {
return vt , 0 , 0
}
ve . buf , ve . values , vt , minValue , maxValue = tryUintEncoding ( ve . buf [ : 0 ] , ve . values [ : 0 ] , values )
if vt != valueTypeUnknown {
return vt , minValue , maxValue
}
ve . buf , ve . values , vt , minValue , maxValue = tryFloat64Encoding ( ve . buf [ : 0 ] , ve . values [ : 0 ] , values )
if vt != valueTypeUnknown {
return vt , minValue , maxValue
}
ve . buf , ve . values , vt , minValue , maxValue = tryIPv4Encoding ( ve . buf [ : 0 ] , ve . values [ : 0 ] , values )
if vt != valueTypeUnknown {
return vt , minValue , maxValue
}
ve . buf , ve . values , vt , minValue , maxValue = tryTimestampISO8601Encoding ( ve . buf [ : 0 ] , ve . values [ : 0 ] , values )
if vt != valueTypeUnknown {
return vt , minValue , maxValue
}
// Fall back to default encoding, e.g. leave values as is.
ve . values = append ( ve . values [ : 0 ] , values ... )
return valueTypeString , 0 , 0
}
func getValuesEncoder ( ) * valuesEncoder {
v := valuesEncoderPool . Get ( )
if v == nil {
return & valuesEncoder { }
}
return v . ( * valuesEncoder )
}
func putValuesEncoder ( ve * valuesEncoder ) {
ve . reset ( )
valuesEncoderPool . Put ( ve )
}
var valuesEncoderPool sync . Pool
type valuesDecoder struct {
buf [ ] byte
}
func ( vd * valuesDecoder ) reset ( ) {
vd . buf = vd . buf [ : 0 ]
}
2024-05-12 14:33:29 +00:00
// decodeInplace decodes values encoded with the given vt and the given dictValues inplace.
2023-06-20 05:55:12 +00:00
//
// the decoded values remain valid until vd.reset() is called.
2024-05-12 14:33:29 +00:00
func ( vd * valuesDecoder ) decodeInplace ( values [ ] string , vt valueType , dictValues [ ] string ) error {
2023-06-20 05:55:12 +00:00
// do not reset vd.buf, since it may contain previously decoded data,
// which must be preserved until reset() call.
dstBuf := vd . buf
switch vt {
case valueTypeString :
// nothing to do - values are already decoded.
2024-05-12 14:33:29 +00:00
case valueTypeDict :
sb := getStringBucket ( )
for _ , v := range dictValues {
dstLen := len ( dstBuf )
dstBuf = append ( dstBuf , v ... )
sb . a = append ( sb . a , bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] ) )
}
for i , v := range values {
id := int ( v [ 0 ] )
if id >= len ( dictValues ) {
return fmt . Errorf ( "unexpected dictionary id: %d; it must be smaller than %d" , id , len ( dictValues ) )
}
values [ i ] = sb . a [ id ]
}
putStringBucket ( sb )
2023-06-20 05:55:12 +00:00
case valueTypeUint8 :
for i , v := range values {
if len ( v ) != 1 {
return fmt . Errorf ( "unexpected value length for uint8; got %d; want 1" , len ( v ) )
}
2024-05-20 02:08:30 +00:00
n := unmarshalUint8 ( v )
2023-06-20 05:55:12 +00:00
dstLen := len ( dstBuf )
2024-05-20 02:08:30 +00:00
dstBuf = marshalUint8String ( dstBuf , n )
2023-06-20 05:55:12 +00:00
values [ i ] = bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
}
case valueTypeUint16 :
for i , v := range values {
if len ( v ) != 2 {
return fmt . Errorf ( "unexpected value length for uint16; got %d; want 2" , len ( v ) )
}
2024-05-20 02:08:30 +00:00
n := unmarshalUint16 ( v )
2023-06-20 05:55:12 +00:00
dstLen := len ( dstBuf )
2024-05-20 02:08:30 +00:00
dstBuf = marshalUint16String ( dstBuf , n )
2023-06-20 05:55:12 +00:00
values [ i ] = bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
}
case valueTypeUint32 :
for i , v := range values {
if len ( v ) != 4 {
return fmt . Errorf ( "unexpected value length for uint32; got %d; want 4" , len ( v ) )
}
2024-05-20 02:08:30 +00:00
n := unmarshalUint32 ( v )
2023-06-20 05:55:12 +00:00
dstLen := len ( dstBuf )
2024-05-20 02:08:30 +00:00
dstBuf = marshalUint32String ( dstBuf , n )
2023-06-20 05:55:12 +00:00
values [ i ] = bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
}
case valueTypeUint64 :
for i , v := range values {
if len ( v ) != 8 {
return fmt . Errorf ( "unexpected value length for uint64; got %d; want 8" , len ( v ) )
}
2024-05-20 02:08:30 +00:00
n := unmarshalUint64 ( v )
2023-06-20 05:55:12 +00:00
dstLen := len ( dstBuf )
2024-05-20 02:08:30 +00:00
dstBuf = marshalUint64String ( dstBuf , n )
2023-06-20 05:55:12 +00:00
values [ i ] = bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
}
2024-05-12 14:33:29 +00:00
case valueTypeFloat64 :
2023-06-20 05:55:12 +00:00
for i , v := range values {
2024-05-12 14:33:29 +00:00
if len ( v ) != 8 {
return fmt . Errorf ( "unexpected value length for uint64; got %d; want 8" , len ( v ) )
2023-06-20 05:55:12 +00:00
}
2024-05-20 02:08:30 +00:00
f := unmarshalFloat64 ( v )
2024-05-12 14:33:29 +00:00
dstLen := len ( dstBuf )
2024-05-20 02:08:30 +00:00
dstBuf = marshalFloat64String ( dstBuf , f )
2024-05-12 14:33:29 +00:00
values [ i ] = bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
2023-06-20 05:55:12 +00:00
}
case valueTypeIPv4 :
for i , v := range values {
if len ( v ) != 4 {
return fmt . Errorf ( "unexpected value length for ipv4; got %d; want 4" , len ( v ) )
}
2024-05-20 02:08:30 +00:00
ip := unmarshalIPv4 ( v )
2023-06-20 05:55:12 +00:00
dstLen := len ( dstBuf )
2024-05-20 02:08:30 +00:00
dstBuf = marshalIPv4String ( dstBuf , ip )
2023-06-20 05:55:12 +00:00
values [ i ] = bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
}
case valueTypeTimestampISO8601 :
for i , v := range values {
if len ( v ) != 8 {
return fmt . Errorf ( "unexpected value length for uint64; got %d; want 8" , len ( v ) )
}
2024-05-20 02:08:30 +00:00
timestamp := unmarshalTimestampISO8601 ( v )
2023-06-20 05:55:12 +00:00
dstLen := len ( dstBuf )
2024-05-20 02:08:30 +00:00
dstBuf = marshalTimestampISO8601String ( dstBuf , timestamp )
2023-06-20 05:55:12 +00:00
values [ i ] = bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
}
default :
return fmt . Errorf ( "unknown valueType=%d" , vt )
}
vd . buf = dstBuf
return nil
}
func getValuesDecoder ( ) * valuesDecoder {
v := valuesDecoderPool . Get ( )
if v == nil {
return & valuesDecoder { }
}
return v . ( * valuesDecoder )
}
func putValuesDecoder ( vd * valuesDecoder ) {
vd . reset ( )
valuesDecoderPool . Put ( vd )
}
var valuesDecoderPool sync . Pool
func tryTimestampISO8601Encoding ( dstBuf [ ] byte , dstValues , srcValues [ ] string ) ( [ ] byte , [ ] string , valueType , uint64 , uint64 ) {
2024-05-12 14:33:29 +00:00
u64s := encoding . GetInt64s ( len ( srcValues ) )
defer encoding . PutInt64s ( u64s )
2023-06-20 05:55:12 +00:00
a := u64s . A
2024-05-12 14:33:29 +00:00
var minValue , maxValue int64
2023-06-20 05:55:12 +00:00
for i , v := range srcValues {
2024-06-25 12:08:01 +00:00
n , ok := tryParseTimestampISO8601 ( v )
2023-06-20 05:55:12 +00:00
if ! ok {
return dstBuf , dstValues , valueTypeUnknown , 0 , 0
}
a [ i ] = n
if i == 0 || n < minValue {
minValue = n
}
if i == 0 || n > maxValue {
maxValue = n
}
}
for _ , n := range a {
dstLen := len ( dstBuf )
2024-05-12 14:33:29 +00:00
dstBuf = encoding . MarshalUint64 ( dstBuf , uint64 ( n ) )
2023-06-20 05:55:12 +00:00
v := bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
dstValues = append ( dstValues , v )
}
2024-05-12 14:33:29 +00:00
return dstBuf , dstValues , valueTypeTimestampISO8601 , uint64 ( minValue ) , uint64 ( maxValue )
2023-06-20 05:55:12 +00:00
}
2024-06-25 12:08:01 +00:00
// TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and timezone offset and returns unix timestamp in nanoseconds.
2024-05-12 14:33:29 +00:00
//
// The returned timestamp can be negative if s is smaller than 1970 year.
2024-06-17 10:13:18 +00:00
func TryParseTimestampRFC3339Nano ( s string ) ( int64 , bool ) {
2024-05-12 14:33:29 +00:00
if len ( s ) < len ( "2006-01-02T15:04:05Z" ) {
return 0 , false
}
secs , ok , tail := tryParseTimestampSecs ( s )
if ! ok {
return 0 , false
}
s = tail
nsecs := secs * 1e9
2024-06-25 12:08:01 +00:00
// Parse timezone offset
n := strings . IndexAny ( s , "Z+-" )
if n < 0 {
2024-05-12 14:33:29 +00:00
return 0 , false
}
2024-06-25 12:08:01 +00:00
offsetStr := s [ n + 1 : ]
if s [ n ] != 'Z' {
isMinus := s [ n ] == '-'
if len ( offsetStr ) == 0 {
return 0 , false
}
offsetNsecs , ok := tryParseTimezoneOffset ( offsetStr )
if ! ok {
return 0 , false
}
if isMinus {
offsetNsecs = - offsetNsecs
}
nsecs -= offsetNsecs
} else {
if len ( offsetStr ) != 0 {
return 0 , false
}
}
2024-05-12 14:33:29 +00:00
s = s [ : n ]
2024-06-25 12:08:01 +00:00
// Parse optional fractional part of seconds.
2024-05-12 14:33:29 +00:00
if len ( s ) == 0 {
return nsecs , true
}
if s [ 0 ] == '.' {
s = s [ 1 : ]
}
digits := len ( s )
if digits > 9 {
return 0 , false
}
n64 , ok := tryParseUint64 ( s )
if ! ok {
return 0 , false
}
if digits < 9 {
n64 *= uint64 ( math . Pow10 ( 9 - digits ) )
}
nsecs += int64 ( n64 )
return nsecs , true
}
2024-06-25 12:08:01 +00:00
func tryParseTimezoneOffset ( offsetStr string ) ( int64 , bool ) {
n := strings . IndexByte ( offsetStr , ':' )
if n < 0 {
return 0 , false
}
hourStr := offsetStr [ : n ]
minuteStr := offsetStr [ n + 1 : ]
hours , ok := tryParseUint64 ( hourStr )
if ! ok || hours > 24 {
return 0 , false
}
minutes , ok := tryParseUint64 ( minuteStr )
if ! ok || minutes > 60 {
return 0 , false
}
return int64 ( hours ) * nsecsPerHour + int64 ( minutes ) * nsecsPerMinute , true
}
// tryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds.
2024-05-12 14:33:29 +00:00
//
// The returned timestamp can be negative if s is smaller than 1970 year.
2024-06-25 12:08:01 +00:00
func tryParseTimestampISO8601 ( s string ) ( int64 , bool ) {
2023-06-20 05:55:12 +00:00
// Do not parse timestamps with timezone, since they cannot be converted back
// to the same string representation in general case.
// This may break search.
if len ( s ) != len ( "2006-01-02T15:04:05.000Z" ) {
return 0 , false
}
2024-05-12 14:33:29 +00:00
secs , ok , tail := tryParseTimestampSecs ( s )
if ! ok {
return 0 , false
}
s = tail
nsecs := secs * 1e9
if s [ 0 ] != '.' {
return 0 , false
}
s = s [ 1 : ]
// Parse milliseconds
tzDelimiter := s [ len ( "000" ) ]
if tzDelimiter != 'Z' {
return 0 , false
}
millisecondStr := s [ : len ( "000" ) ]
msecs , ok := tryParseUint64 ( millisecondStr )
if ! ok {
return 0 , false
}
s = s [ len ( "000" ) + 1 : ]
if len ( s ) != 0 {
logger . Panicf ( "BUG: unexpected tail in timestamp: %q" , s )
}
nsecs += int64 ( msecs ) * 1e6
return nsecs , true
}
// tryParseTimestampSecs parses YYYY-MM-DDTHH:mm:ss into unix timestamp in seconds.
func tryParseTimestampSecs ( s string ) ( int64 , bool , string ) {
2023-06-20 05:55:12 +00:00
// Parse year
if s [ len ( "YYYY" ) ] != '-' {
2024-05-12 14:33:29 +00:00
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
yearStr := s [ : len ( "YYYY" ) ]
n , ok := tryParseUint64 ( yearStr )
2024-05-12 14:33:29 +00:00
if ! ok || n < 1677 || n > 2262 {
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
year := int ( n )
s = s [ len ( "YYYY" ) + 1 : ]
// Parse month
if s [ len ( "MM" ) ] != '-' {
2024-05-12 14:33:29 +00:00
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
monthStr := s [ : len ( "MM" ) ]
n , ok = tryParseUint64 ( monthStr )
2024-05-12 14:33:29 +00:00
if ! ok {
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
month := time . Month ( n )
s = s [ len ( "MM" ) + 1 : ]
// Parse day
if s [ len ( "DD" ) ] != 'T' {
2024-05-12 14:33:29 +00:00
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
dayStr := s [ : len ( "DD" ) ]
n , ok = tryParseUint64 ( dayStr )
2024-05-12 14:33:29 +00:00
if ! ok {
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
day := int ( n )
s = s [ len ( "DD" ) + 1 : ]
// Parse hour
if s [ len ( "HH" ) ] != ':' {
2024-05-12 14:33:29 +00:00
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
hourStr := s [ : len ( "HH" ) ]
n , ok = tryParseUint64 ( hourStr )
2024-05-12 14:33:29 +00:00
if ! ok {
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
hour := int ( n )
s = s [ len ( "HH" ) + 1 : ]
// Parse minute
if s [ len ( "MM" ) ] != ':' {
2024-05-12 14:33:29 +00:00
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
minuteStr := s [ : len ( "MM" ) ]
n , ok = tryParseUint64 ( minuteStr )
2024-05-12 14:33:29 +00:00
if ! ok {
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
minute := int ( n )
s = s [ len ( "MM" ) + 1 : ]
// Parse second
secondStr := s [ : len ( "SS" ) ]
n , ok = tryParseUint64 ( secondStr )
2024-05-12 14:33:29 +00:00
if ! ok {
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
second := int ( n )
2024-05-12 14:33:29 +00:00
s = s [ len ( "SS" ) : ]
2023-06-20 05:55:12 +00:00
2024-05-12 14:33:29 +00:00
secs := time . Date ( year , month , day , hour , minute , second , 0 , time . UTC ) . Unix ( )
if secs < int64 ( - 1 << 63 ) / 1e9 || secs >= int64 ( ( 1 << 63 ) - 1 ) / 1e9 {
// Too big or too small timestamp
return 0 , false , s
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
return secs , true , s
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
// tryParseUint64 parses s as uint64 value.
2023-06-20 05:55:12 +00:00
func tryParseUint64 ( s string ) ( uint64 , bool ) {
2024-05-12 14:33:29 +00:00
if len ( s ) == 0 || len ( s ) > len ( "18_446_744_073_709_551_615" ) {
2023-06-20 05:55:12 +00:00
return 0 , false
}
n := uint64 ( 0 )
for i := 0 ; i < len ( s ) ; i ++ {
ch := s [ i ]
2024-05-12 14:33:29 +00:00
if ch == '_' {
continue
}
2023-06-20 05:55:12 +00:00
if ch < '0' || ch > '9' {
return 0 , false
}
2024-05-12 14:33:29 +00:00
if n > ( ( 1 << 64 ) - 1 ) / 10 {
return 0 , false
}
2023-06-20 05:55:12 +00:00
n *= 10
2024-05-12 14:33:29 +00:00
d := uint64 ( ch - '0' )
if n > ( 1 << 64 ) - 1 - d {
return 0 , false
}
n += d
2023-06-20 05:55:12 +00:00
}
return n , true
}
func tryIPv4Encoding ( dstBuf [ ] byte , dstValues , srcValues [ ] string ) ( [ ] byte , [ ] string , valueType , uint64 , uint64 ) {
u32s := encoding . GetUint32s ( len ( srcValues ) )
defer encoding . PutUint32s ( u32s )
a := u32s . A
var minValue , maxValue uint32
for i , v := range srcValues {
n , ok := tryParseIPv4 ( v )
if ! ok {
return dstBuf , dstValues , valueTypeUnknown , 0 , 0
}
a [ i ] = n
if i == 0 || n < minValue {
minValue = n
}
if i == 0 || n > maxValue {
maxValue = n
}
}
for _ , n := range a {
dstLen := len ( dstBuf )
dstBuf = encoding . MarshalUint32 ( dstBuf , n )
v := bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
dstValues = append ( dstValues , v )
}
return dstBuf , dstValues , valueTypeIPv4 , uint64 ( minValue ) , uint64 ( maxValue )
}
2024-05-12 14:33:29 +00:00
// tryParseIPv4 tries parsing ipv4 from s.
2023-06-20 05:55:12 +00:00
func tryParseIPv4 ( s string ) ( uint32 , bool ) {
if len ( s ) < len ( "1.1.1.1" ) || len ( s ) > len ( "255.255.255.255" ) || strings . Count ( s , "." ) != 3 {
// Fast path - the entry isn't IPv4
return 0 , false
}
var octets [ 4 ] byte
var v uint64
var ok bool
// Parse octet 1
n := strings . IndexByte ( s , '.' )
if n <= 0 || n > 3 {
return 0 , false
}
v , ok = tryParseUint64 ( s [ : n ] )
if ! ok || v > 255 {
return 0 , false
}
octets [ 0 ] = byte ( v )
s = s [ n + 1 : ]
// Parse octet 2
n = strings . IndexByte ( s , '.' )
if n <= 0 || n > 3 {
return 0 , false
}
v , ok = tryParseUint64 ( s [ : n ] )
if ! ok || v > 255 {
return 0 , false
}
octets [ 1 ] = byte ( v )
s = s [ n + 1 : ]
// Parse octet 3
n = strings . IndexByte ( s , '.' )
if n <= 0 || n > 3 {
return 0 , false
}
v , ok = tryParseUint64 ( s [ : n ] )
if ! ok || v > 255 {
return 0 , false
}
octets [ 2 ] = byte ( v )
s = s [ n + 1 : ]
// Parse octet 4
v , ok = tryParseUint64 ( s )
if ! ok || v > 255 {
return 0 , false
}
octets [ 3 ] = byte ( v )
ipv4 := encoding . UnmarshalUint32 ( octets [ : ] )
return ipv4 , true
}
func tryFloat64Encoding ( dstBuf [ ] byte , dstValues , srcValues [ ] string ) ( [ ] byte , [ ] string , valueType , uint64 , uint64 ) {
u64s := encoding . GetUint64s ( len ( srcValues ) )
defer encoding . PutUint64s ( u64s )
a := u64s . A
var minValue , maxValue float64
for i , v := range srcValues {
f , ok := tryParseFloat64 ( v )
if ! ok {
return dstBuf , dstValues , valueTypeUnknown , 0 , 0
}
a [ i ] = math . Float64bits ( f )
if i == 0 || f < minValue {
minValue = f
}
if i == 0 || f > maxValue {
maxValue = f
}
}
for _ , n := range a {
dstLen := len ( dstBuf )
dstBuf = encoding . MarshalUint64 ( dstBuf , n )
v := bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
dstValues = append ( dstValues , v )
}
minValueU64 := math . Float64bits ( minValue )
maxValueU64 := math . Float64bits ( maxValue )
return dstBuf , dstValues , valueTypeFloat64 , minValueU64 , maxValueU64
}
2024-05-12 14:33:29 +00:00
// tryParseFloat64Prefix tries parsing float64 number at the beginning of s and returns the remaining tail.
func tryParseFloat64Prefix ( s string ) ( float64 , bool , string ) {
i := 0
for i < len ( s ) && ( s [ i ] >= '0' && s [ i ] <= '9' || s [ i ] == '.' || s [ i ] == '_' ) {
i ++
}
if i == 0 {
return 0 , false , s
}
f , ok := tryParseFloat64 ( s [ : i ] )
return f , ok , s [ i : ]
}
// tryParseFloat64 tries parsing s as float64.
2023-06-20 05:55:12 +00:00
func tryParseFloat64 ( s string ) ( float64 , bool ) {
if len ( s ) == 0 || len ( s ) > 20 {
return 0 , false
}
// Allow only decimal digits, minus and a dot.
// Do not allows scientific notation (for example 1.23E+05),
// since it cannot be converted back to the same string form.
minus := s [ 0 ] == '-'
if minus {
s = s [ 1 : ]
}
n := strings . IndexByte ( s , '.' )
if n < 0 {
// fast path - there are no dots
n , ok := tryParseUint64 ( s )
if ! ok {
return 0 , false
}
f := float64 ( n )
if minus {
f = - f
}
return f , true
}
if n == 0 || n == len ( s ) - 1 {
// Do not allow dots at the beginning and at the end of s,
// since they cannot be converted back to the same string form.
return 0 , false
}
sInt := s [ : n ]
sFrac := s [ n + 1 : ]
nInt , ok := tryParseUint64 ( sInt )
if ! ok {
return 0 , false
}
nFrac , ok := tryParseUint64 ( sFrac )
if ! ok {
return 0 , false
}
2024-05-12 14:33:29 +00:00
p10 := math . Pow10 ( strings . Count ( sFrac , "_" ) - len ( sFrac ) )
f := math . FMA ( float64 ( nFrac ) , p10 , float64 ( nInt ) )
2023-06-20 05:55:12 +00:00
if minus {
f = - f
}
return f , true
}
2024-05-12 14:33:29 +00:00
// tryParseBytes parses user-readable bytes representation in s.
//
// Supported suffixes:
//
// K, KB - for 1000
func tryParseBytes ( s string ) ( int64 , bool ) {
if len ( s ) == 0 {
return 0 , false
}
isMinus := s [ 0 ] == '-'
if isMinus {
s = s [ 1 : ]
}
n := int64 ( 0 )
for len ( s ) > 0 {
f , ok , tail := tryParseFloat64Prefix ( s )
if ! ok {
return 0 , false
}
if len ( tail ) == 0 {
if _ , frac := math . Modf ( f ) ; frac != 0 {
// deny floating-point numbers without any suffix.
return 0 , false
}
}
s = tail
if len ( s ) == 0 {
n += int64 ( f )
continue
}
if len ( s ) >= 3 {
switch {
case strings . HasPrefix ( s , "KiB" ) :
n += int64 ( f * ( 1 << 10 ) )
s = s [ 3 : ]
continue
case strings . HasPrefix ( s , "MiB" ) :
n += int64 ( f * ( 1 << 20 ) )
s = s [ 3 : ]
continue
case strings . HasPrefix ( s , "GiB" ) :
n += int64 ( f * ( 1 << 30 ) )
s = s [ 3 : ]
continue
case strings . HasPrefix ( s , "TiB" ) :
n += int64 ( f * ( 1 << 40 ) )
s = s [ 3 : ]
continue
}
}
if len ( s ) >= 2 {
switch {
case strings . HasPrefix ( s , "Ki" ) :
n += int64 ( f * ( 1 << 10 ) )
s = s [ 2 : ]
continue
case strings . HasPrefix ( s , "Mi" ) :
n += int64 ( f * ( 1 << 20 ) )
s = s [ 2 : ]
continue
case strings . HasPrefix ( s , "Gi" ) :
n += int64 ( f * ( 1 << 30 ) )
s = s [ 2 : ]
continue
case strings . HasPrefix ( s , "Ti" ) :
n += int64 ( f * ( 1 << 40 ) )
s = s [ 2 : ]
continue
case strings . HasPrefix ( s , "KB" ) :
n += int64 ( f * 1_000 )
s = s [ 2 : ]
continue
case strings . HasPrefix ( s , "MB" ) :
n += int64 ( f * 1_000_000 )
s = s [ 2 : ]
continue
case strings . HasPrefix ( s , "GB" ) :
n += int64 ( f * 1_000_000_000 )
s = s [ 2 : ]
continue
case strings . HasPrefix ( s , "TB" ) :
n += int64 ( f * 1_000_000_000_000 )
s = s [ 2 : ]
continue
}
}
switch {
case strings . HasPrefix ( s , "B" ) :
n += int64 ( f )
s = s [ 1 : ]
continue
case strings . HasPrefix ( s , "K" ) :
n += int64 ( f * 1_000 )
s = s [ 1 : ]
continue
case strings . HasPrefix ( s , "M" ) :
n += int64 ( f * 1_000_000 )
s = s [ 1 : ]
continue
case strings . HasPrefix ( s , "G" ) :
n += int64 ( f * 1_000_000_000 )
s = s [ 1 : ]
continue
case strings . HasPrefix ( s , "T" ) :
n += int64 ( f * 1_000_000_000_000 )
s = s [ 1 : ]
continue
}
}
if isMinus {
n = - n
}
return n , true
}
// tryParseIPv4Mask parses '/num' ipv4 mask and returns (1<<(32-num))
func tryParseIPv4Mask ( s string ) ( uint64 , bool ) {
if len ( s ) == 0 || s [ 0 ] != '/' {
return 0 , false
}
s = s [ 1 : ]
n , ok := tryParseUint64 ( s )
if ! ok || n > 32 {
return 0 , false
}
return 1 << ( 32 - uint8 ( n ) ) , true
}
// tryParseDuration parses the given duration in nanoseconds and returns the result.
func tryParseDuration ( s string ) ( int64 , bool ) {
if len ( s ) == 0 {
return 0 , false
}
isMinus := s [ 0 ] == '-'
if isMinus {
s = s [ 1 : ]
}
nsecs := int64 ( 0 )
for len ( s ) > 0 {
f , ok , tail := tryParseFloat64Prefix ( s )
if ! ok {
return 0 , false
}
s = tail
if len ( s ) == 0 {
return 0 , false
}
if len ( s ) >= 3 {
if strings . HasPrefix ( s , "µs" ) {
nsecs += int64 ( f * nsecsPerMicrosecond )
s = s [ 3 : ]
continue
}
}
if len ( s ) >= 2 {
switch {
case strings . HasPrefix ( s , "ms" ) :
nsecs += int64 ( f * nsecsPerMillisecond )
s = s [ 2 : ]
continue
case strings . HasPrefix ( s , "ns" ) :
nsecs += int64 ( f )
s = s [ 2 : ]
continue
}
}
switch {
case strings . HasPrefix ( s , "y" ) :
nsecs += int64 ( f * nsecsPerYear )
s = s [ 1 : ]
case strings . HasPrefix ( s , "w" ) :
nsecs += int64 ( f * nsecsPerWeek )
s = s [ 1 : ]
continue
case strings . HasPrefix ( s , "d" ) :
nsecs += int64 ( f * nsecsPerDay )
s = s [ 1 : ]
continue
case strings . HasPrefix ( s , "h" ) :
nsecs += int64 ( f * nsecsPerHour )
s = s [ 1 : ]
continue
case strings . HasPrefix ( s , "m" ) :
nsecs += int64 ( f * nsecsPerMinute )
s = s [ 1 : ]
continue
case strings . HasPrefix ( s , "s" ) :
nsecs += int64 ( f * nsecsPerSecond )
s = s [ 1 : ]
continue
default :
return 0 , false
}
}
if isMinus {
nsecs = - nsecs
}
return nsecs , true
}
2024-06-03 22:59:25 +00:00
// marshalDurationString appends string representation of nsec duration to dst and returns the result.
func marshalDurationString ( dst [ ] byte , nsecs int64 ) [ ] byte {
2024-05-12 14:33:29 +00:00
if nsecs == 0 {
return append ( dst , '0' )
}
if nsecs < 0 {
dst = append ( dst , '-' )
nsecs = - nsecs
}
formatFloat64Seconds := nsecs >= nsecsPerSecond
if nsecs >= nsecsPerWeek {
weeks := nsecs / nsecsPerWeek
nsecs -= weeks * nsecsPerWeek
2024-05-20 02:08:30 +00:00
dst = marshalUint64String ( dst , uint64 ( weeks ) )
2024-05-12 14:33:29 +00:00
dst = append ( dst , 'w' )
}
if nsecs >= nsecsPerDay {
days := nsecs / nsecsPerDay
nsecs -= days * nsecsPerDay
2024-05-20 02:08:30 +00:00
dst = marshalUint8String ( dst , uint8 ( days ) )
2024-05-12 14:33:29 +00:00
dst = append ( dst , 'd' )
}
if nsecs >= nsecsPerHour {
hours := nsecs / nsecsPerHour
nsecs -= hours * nsecsPerHour
2024-05-20 02:08:30 +00:00
dst = marshalUint8String ( dst , uint8 ( hours ) )
2024-05-12 14:33:29 +00:00
dst = append ( dst , 'h' )
}
if nsecs >= nsecsPerMinute {
minutes := nsecs / nsecsPerMinute
nsecs -= minutes * nsecsPerMinute
2024-05-20 02:08:30 +00:00
dst = marshalUint8String ( dst , uint8 ( minutes ) )
2024-05-12 14:33:29 +00:00
dst = append ( dst , 'm' )
}
if nsecs >= nsecsPerSecond {
if formatFloat64Seconds {
seconds := float64 ( nsecs ) / nsecsPerSecond
2024-05-20 02:08:30 +00:00
dst = marshalFloat64String ( dst , seconds )
2024-05-12 14:33:29 +00:00
dst = append ( dst , 's' )
return dst
}
seconds := nsecs / nsecsPerSecond
nsecs -= seconds * nsecsPerSecond
2024-05-20 02:08:30 +00:00
dst = marshalUint8String ( dst , uint8 ( seconds ) )
2024-05-12 14:33:29 +00:00
dst = append ( dst , 's' )
}
if nsecs >= nsecsPerMillisecond {
msecs := nsecs / nsecsPerMillisecond
nsecs -= msecs * nsecsPerMillisecond
2024-05-20 02:08:30 +00:00
dst = marshalUint16String ( dst , uint16 ( msecs ) )
2024-05-12 14:33:29 +00:00
dst = append ( dst , "ms" ... )
}
if nsecs >= nsecsPerMicrosecond {
usecs := nsecs / nsecsPerMicrosecond
nsecs -= usecs * nsecsPerMicrosecond
2024-05-20 02:08:30 +00:00
dst = marshalUint16String ( dst , uint16 ( usecs ) )
2024-05-12 14:33:29 +00:00
dst = append ( dst , "µs" ... )
}
if nsecs > 0 {
2024-05-20 02:08:30 +00:00
dst = marshalUint16String ( dst , uint16 ( nsecs ) )
2024-05-12 14:33:29 +00:00
dst = append ( dst , "ns" ... )
}
return dst
}
const (
nsecsPerYear = 365 * 24 * 3600 * 1e9
nsecsPerWeek = 7 * 24 * 3600 * 1e9
nsecsPerDay = 24 * 3600 * 1e9
nsecsPerHour = 3600 * 1e9
nsecsPerMinute = 60 * 1e9
nsecsPerSecond = 1e9
nsecsPerMillisecond = 1e6
nsecsPerMicrosecond = 1e3
)
2023-06-20 05:55:12 +00:00
func tryUintEncoding ( dstBuf [ ] byte , dstValues , srcValues [ ] string ) ( [ ] byte , [ ] string , valueType , uint64 , uint64 ) {
u64s := encoding . GetUint64s ( len ( srcValues ) )
defer encoding . PutUint64s ( u64s )
a := u64s . A
var minValue , maxValue uint64
for i , v := range srcValues {
n , ok := tryParseUint64 ( v )
if ! ok {
return dstBuf , dstValues , valueTypeUnknown , 0 , 0
}
a [ i ] = n
if i == 0 || n < minValue {
minValue = n
}
if i == 0 || n > maxValue {
maxValue = n
}
}
minBitSize := bits . Len64 ( maxValue )
if minBitSize <= 8 {
for _ , n := range a {
dstLen := len ( dstBuf )
dstBuf = append ( dstBuf , byte ( n ) )
v := bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
dstValues = append ( dstValues , v )
}
return dstBuf , dstValues , valueTypeUint8 , minValue , maxValue
}
if minBitSize <= 16 {
for _ , n := range a {
dstLen := len ( dstBuf )
dstBuf = encoding . MarshalUint16 ( dstBuf , uint16 ( n ) )
v := bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
dstValues = append ( dstValues , v )
}
return dstBuf , dstValues , valueTypeUint16 , minValue , maxValue
}
if minBitSize <= 32 {
for _ , n := range a {
dstLen := len ( dstBuf )
dstBuf = encoding . MarshalUint32 ( dstBuf , uint32 ( n ) )
v := bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
dstValues = append ( dstValues , v )
}
return dstBuf , dstValues , valueTypeUint32 , minValue , maxValue
}
for _ , n := range a {
dstLen := len ( dstBuf )
dstBuf = encoding . MarshalUint64 ( dstBuf , n )
v := bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
dstValues = append ( dstValues , v )
}
return dstBuf , dstValues , valueTypeUint64 , minValue , maxValue
}
func tryDictEncoding ( dstBuf [ ] byte , dstValues , srcValues [ ] string , dict * valuesDict ) ( [ ] byte , [ ] string , valueType ) {
dict . reset ( )
dstBufOrig := dstBuf
dstValuesOrig := dstValues
for _ , v := range srcValues {
id , ok := dict . getOrAdd ( v )
if ! ok {
dict . reset ( )
return dstBufOrig , dstValuesOrig , valueTypeUnknown
}
dstLen := len ( dstBuf )
dstBuf = append ( dstBuf , id )
v := bytesutil . ToUnsafeString ( dstBuf [ dstLen : ] )
dstValues = append ( dstValues , v )
}
return dstBuf , dstValues , valueTypeDict
}
type valuesDict struct {
values [ ] string
}
func ( vd * valuesDict ) reset ( ) {
2024-05-12 14:33:29 +00:00
clear ( vd . values )
vd . values = vd . values [ : 0 ]
2023-06-20 05:55:12 +00:00
}
2024-05-12 14:33:29 +00:00
func ( vd * valuesDict ) copyFrom ( a * arena , src * valuesDict ) {
2023-06-20 05:55:12 +00:00
vd . reset ( )
2024-05-12 14:33:29 +00:00
dstValues := vd . values
for _ , v := range src . values {
v = a . copyString ( v )
dstValues = append ( dstValues , v )
}
vd . values = dstValues
2023-06-20 05:55:12 +00:00
}
2024-05-13 23:49:20 +00:00
func ( vd * valuesDict ) copyFromNoArena ( src * valuesDict ) {
vd . reset ( )
vd . values = append ( vd . values [ : 0 ] , src . values ... )
}
2023-06-20 05:55:12 +00:00
func ( vd * valuesDict ) getOrAdd ( k string ) ( byte , bool ) {
if len ( k ) > maxDictSizeBytes {
return 0 , false
}
vs := vd . values
dictSizeBytes := 0
for i , v := range vs {
if k == v {
return byte ( i ) , true
}
dictSizeBytes += len ( v )
}
if len ( vs ) >= maxDictLen || dictSizeBytes + len ( k ) > maxDictSizeBytes {
return 0 , false
}
vs = append ( vs , k )
vd . values = vs
return byte ( len ( vs ) - 1 ) , true
}
func ( vd * valuesDict ) marshal ( dst [ ] byte ) [ ] byte {
values := vd . values
if len ( values ) > maxDictLen {
logger . Panicf ( "BUG: valuesDict may contain max %d items; got %d items" , maxDictLen , len ( values ) )
}
dst = append ( dst , byte ( len ( values ) ) )
for _ , v := range values {
dst = encoding . MarshalBytes ( dst , bytesutil . ToUnsafeBytes ( v ) )
}
return dst
}
2024-05-12 14:33:29 +00:00
// unmarshal unmarshals vd from src.
//
// vd is valid until a.reset() is called.
func ( vd * valuesDict ) unmarshal ( a * arena , src [ ] byte ) ( [ ] byte , error ) {
2023-06-20 05:55:12 +00:00
vd . reset ( )
srcOrig := src
if len ( src ) < 1 {
return srcOrig , fmt . Errorf ( "cannot umarshal dict len from 0 bytes; need at least 1 byte" )
}
dictLen := int ( src [ 0 ] )
src = src [ 1 : ]
for i := 0 ; i < dictLen ; i ++ {
2024-05-13 23:23:44 +00:00
data , nSize := encoding . UnmarshalBytes ( src )
if nSize <= 0 {
return srcOrig , fmt . Errorf ( "cannot umarshal value %d out of %d from dict" , i , dictLen )
2023-06-20 05:55:12 +00:00
}
2024-05-13 23:23:44 +00:00
src = src [ nSize : ]
2024-05-12 14:33:29 +00:00
v := a . copyBytesToString ( data )
2023-06-20 05:55:12 +00:00
vd . values = append ( vd . values , v )
}
return src , nil
}
2024-05-20 02:08:30 +00:00
func unmarshalUint8 ( v string ) uint8 {
return uint8 ( v [ 0 ] )
}
func unmarshalUint16 ( v string ) uint16 {
b := bytesutil . ToUnsafeBytes ( v )
return encoding . UnmarshalUint16 ( b )
}
func unmarshalUint32 ( v string ) uint32 {
b := bytesutil . ToUnsafeBytes ( v )
return encoding . UnmarshalUint32 ( b )
}
func unmarshalUint64 ( v string ) uint64 {
b := bytesutil . ToUnsafeBytes ( v )
return encoding . UnmarshalUint64 ( b )
}
func unmarshalFloat64 ( v string ) float64 {
n := unmarshalUint64 ( v )
return math . Float64frombits ( n )
}
func unmarshalIPv4 ( v string ) uint32 {
return unmarshalUint32 ( v )
}
func unmarshalTimestampISO8601 ( v string ) int64 {
n := unmarshalUint64 ( v )
return int64 ( n )
}
func marshalUint8String ( dst [ ] byte , n uint8 ) [ ] byte {
if n < 10 {
return append ( dst , '0' + n )
}
if n < 100 {
return append ( dst , '0' + n / 10 , '0' + n % 10 )
}
if n < 200 {
dst = append ( dst , '1' )
n -= 100
} else {
dst = append ( dst , '2' )
n -= 200
}
if n < 10 {
return append ( dst , '0' , '0' + n )
}
return append ( dst , '0' + n / 10 , '0' + n % 10 )
}
func marshalUint16String ( dst [ ] byte , n uint16 ) [ ] byte {
return marshalUint64String ( dst , uint64 ( n ) )
}
func marshalUint32String ( dst [ ] byte , n uint32 ) [ ] byte {
return marshalUint64String ( dst , uint64 ( n ) )
}
func marshalUint64String ( dst [ ] byte , n uint64 ) [ ] byte {
return strconv . AppendUint ( dst , n , 10 )
}
func marshalFloat64String ( dst [ ] byte , f float64 ) [ ] byte {
return strconv . AppendFloat ( dst , f , 'f' , - 1 , 64 )
}
func marshalIPv4String ( dst [ ] byte , n uint32 ) [ ] byte {
dst = marshalUint8String ( dst , uint8 ( n >> 24 ) )
dst = append ( dst , '.' )
dst = marshalUint8String ( dst , uint8 ( n >> 16 ) )
dst = append ( dst , '.' )
dst = marshalUint8String ( dst , uint8 ( n >> 8 ) )
dst = append ( dst , '.' )
dst = marshalUint8String ( dst , uint8 ( n ) )
return dst
}
// marshalTimestampISO8601String appends ISO8601-formatted nsecs to dst and returns the result.
func marshalTimestampISO8601String ( dst [ ] byte , nsecs int64 ) [ ] byte {
return time . Unix ( 0 , nsecs ) . UTC ( ) . AppendFormat ( dst , iso8601Timestamp )
}
const iso8601Timestamp = "2006-01-02T15:04:05.000Z"
// marshalTimestampRFC3339NanoString appends RFC3339Nano-formatted nsecs to dst and returns the result.
func marshalTimestampRFC3339NanoString ( dst [ ] byte , nsecs int64 ) [ ] byte {
return time . Unix ( 0 , nsecs ) . UTC ( ) . AppendFormat ( dst , time . RFC3339Nano )
}