lib/logstorage: automatically detect columns with int64 values and store them as packed 8-byte int64 values

Previously columns with negative int64 values were stored either as float64 or string
depending on whether the negative int64 values are bigger or smaller than -2^53.
If the integer values are smaller than -2^53, then they are stored as string, since float64 cannot
hold such values without precision loss. Now such values are stored as int64.
This should improve compression ratio and query performance over columns with negative int64 values.
This commit is contained in:
Aliaksandr Valialkin 2025-01-11 23:15:00 +01:00
parent bd00e3a735
commit df723a4870
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
42 changed files with 1229 additions and 51 deletions

View file

@ -45,6 +45,8 @@ var (
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
u64FieldsPerLog = flag.Int("u64FieldsPerLog", 1, "The number of fields with uint64 values to generate per each log entry; "+
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
i64FieldsPerLog = flag.Int("i64FieldsPerLog", 1, "The number of fields with int64 values to generate per each log entry; "+
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
floatFieldsPerLog = flag.Int("floatFieldsPerLog", 1, "The number of fields with float64 values to generate per each log entry; "+
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
ipFieldsPerLog = flag.Int("ipFieldsPerLog", 1, "The number of fields with IPv4 values to generate per each log entry; "+
@ -254,6 +256,9 @@ func generateLogsAtTimestamp(bw *bufio.Writer, workerID int, ts int64, firstStre
for j := 0; j < *u64FieldsPerLog; j++ {
fmt.Fprintf(bw, `,"u64_%d":"%d"`, j, rand.Uint64())
}
for j := 0; j < *i64FieldsPerLog; j++ {
fmt.Fprintf(bw, `,"i64_%d":"%d"`, j, int64(rand.Uint64()))
}
for j := 0; j < *floatFieldsPerLog; j++ {
fmt.Fprintf(bw, `,"float_%d":"%v"`, j, math.Round(10_000*rand.Float64())/1000)
}

View file

@ -570,6 +570,7 @@ func getNamesFromColumnHeaders(chs []columnHeader) []string {
// - valueTypeDict doesn't store anything in the bloom filter, since all the encoded values
// are available directly in the valuesDict field
// - valueTypeUint8, valueTypeUint16, valueTypeUint32 and valueTypeUint64 stores encoded uint values
// - valueTypeInt64 stores encoded int64 values
// - valueTypeFloat64 stores encoded float64 values
// - valueTypeIPv4 stores encoded into uint32 ips
// - valueTypeTimestampISO8601 stores encoded into uint64 timestamps
@ -629,20 +630,29 @@ func (ch *columnHeader) reset() {
// marshal appends marshaled ch to dst and returns the result.
func (ch *columnHeader) marshal(dst []byte) []byte {
// check minValue/maxValue
if ch.valueType == valueTypeFloat64 {
switch ch.valueType {
case valueTypeInt64:
minValue := int64(ch.minValue)
maxValue := int64(ch.maxValue)
if minValue > maxValue {
logger.Panicf("BUG: minValue=%d must be smaller than maxValue=%d for valueTypeInt64", minValue, maxValue)
}
case valueTypeFloat64:
minValue := math.Float64frombits(ch.minValue)
maxValue := math.Float64frombits(ch.maxValue)
if minValue > maxValue {
logger.Panicf("BUG: minValue=%g must be smaller than maxValue=%g for valueTypeFloat64", minValue, maxValue)
}
} else if ch.valueType == valueTypeTimestampISO8601 {
case valueTypeTimestampISO8601:
minValue := int64(ch.minValue)
maxValue := int64(ch.maxValue)
if minValue > maxValue {
logger.Panicf("BUG: minValue=%g must be smaller than maxValue=%g for valueTypeTimestampISO8601", minValue, maxValue)
logger.Panicf("BUG: minValue=%d must be smaller than maxValue=%d for valueTypeTimestampISO8601", minValue, maxValue)
}
default:
if ch.minValue > ch.maxValue {
logger.Panicf("BUG: minValue=%d must be smaller than maxValue=%d for valueType=%d", ch.minValue, ch.maxValue, ch.valueType)
}
} else if ch.minValue > ch.maxValue {
logger.Panicf("BUG: minValue=%d must be smaller than maxValue=%d for valueType=%d", ch.minValue, ch.maxValue, ch.valueType)
}
// Do not encode ch.name, since it should be encoded at columnsHeaderIndex.columnHeadersRefs
@ -673,6 +683,10 @@ func (ch *columnHeader) marshal(dst []byte) []byte {
dst = encoding.MarshalUint64(dst, ch.minValue)
dst = encoding.MarshalUint64(dst, ch.maxValue)
dst = ch.marshalValuesAndBloomFilters(dst)
case valueTypeInt64:
dst = encoding.MarshalInt64(dst, int64(ch.minValue))
dst = encoding.MarshalInt64(dst, int64(ch.maxValue))
dst = ch.marshalValuesAndBloomFilters(dst)
case valueTypeFloat64:
// float64 values are encoded as uint64 via math.Float64bits()
dst = encoding.MarshalUint64(dst, ch.minValue)
@ -809,6 +823,19 @@ func (ch *columnHeader) unmarshalNoArena(src []byte, partFormatVersion uint) ([]
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint64 for column %q: %w", ch.name, err)
}
src = tail
case valueTypeInt64:
if len(src) < 16 {
return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeInt64 from %d bytes for column %q; need at least 16 bytes", len(src), ch.name)
}
ch.minValue = uint64(encoding.UnmarshalInt64(src))
ch.maxValue = uint64(encoding.UnmarshalInt64(src[8:]))
src = src[16:]
tail, err := ch.unmarshalValuesAndBloomFilters(src)
if err != nil {
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeInt64 for column %q: %w", ch.name, err)
}
src = tail
case valueTypeFloat64:
if len(src) < 16 {
return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeFloat64 from %d bytes for column %q; need at least 16 bytes", len(src), ch.name)

View file

@ -519,6 +519,13 @@ func (br *blockResult) newValuesEncodedFromColumnHeader(bs *blockSearch, bm *bit
}
br.addValue(v)
})
case valueTypeInt64:
visitValuesReadonly(bs, ch, bm, func(v string) {
if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected size for int64 column %q; got %d bytes; want 8 bytes", bs.partPath(), ch.name, len(v))
}
br.addValue(v)
})
case valueTypeFloat64:
visitValuesReadonly(bs, ch, bm, func(v string) {
if len(v) != 8 {
@ -642,6 +649,8 @@ func (br *blockResult) newValuesBucketedForColumn(c *blockResultColumn, bf *bySt
return br.getBucketedUint32Values(valuesEncoded, bf)
case valueTypeUint64:
return br.getBucketedUint64Values(valuesEncoded, bf)
case valueTypeInt64:
return br.getBucketedInt64Values(valuesEncoded, bf)
case valueTypeFloat64:
return br.getBucketedFloat64Values(valuesEncoded, bf)
case valueTypeIPv4:
@ -1010,6 +1019,64 @@ func (br *blockResult) getBucketedUint64Values(valuesEncoded []string, bf *bySta
return br.valuesBuf[valuesBufLen:]
}
func (br *blockResult) getBucketedInt64Values(valuesEncoded []string, bf *byStatsField) []string {
buf := br.a.b
valuesBuf := br.valuesBuf
valuesBufLen := len(valuesBuf)
var s string
if !bf.hasBucketConfig() {
for i, v := range valuesEncoded {
if i > 0 && valuesEncoded[i-1] == valuesEncoded[i] {
valuesBuf = append(valuesBuf, s)
continue
}
n := unmarshalInt64(v)
bufLen := len(buf)
buf = marshalInt64String(buf, n)
s = bytesutil.ToUnsafeString(buf[bufLen:])
valuesBuf = append(valuesBuf, s)
}
} else {
bucketSizeInt := int64(bf.bucketSize)
if bucketSizeInt == 0 {
bucketSizeInt = 1
}
bucketOffsetInt := int64(bf.bucketOffset)
nPrev := int64(0)
for i, v := range valuesEncoded {
if i > 0 && valuesEncoded[i-1] == valuesEncoded[i] {
valuesBuf = append(valuesBuf, s)
continue
}
n := unmarshalInt64(v)
n -= bucketOffsetInt
n -= n % bucketSizeInt
n += bucketOffsetInt
if i > 0 && nPrev == n {
valuesBuf = append(valuesBuf, s)
continue
}
nPrev = n
bufLen := len(buf)
buf = marshalInt64String(buf, n)
s = bytesutil.ToUnsafeString(buf[bufLen:])
valuesBuf = append(valuesBuf, s)
}
}
br.valuesBuf = valuesBuf
br.a.b = buf
return br.valuesBuf[valuesBufLen:]
}
func (br *blockResult) getBucketedFloat64Values(valuesEncoded []string, bf *byStatsField) []string {
buf := br.a.b
valuesBuf := br.valuesBuf
@ -1820,6 +1887,9 @@ func (c *blockResultColumn) getFloatValueAtRow(br *blockResult, rowIdx int) (flo
case valueTypeUint64:
v := valuesEncoded[rowIdx]
return float64(unmarshalUint64(v)), true
case valueTypeInt64:
v := valuesEncoded[rowIdx]
return float64(unmarshalInt64(v)), true
case valueTypeFloat64:
v := valuesEncoded[rowIdx]
f := unmarshalFloat64(v)
@ -1863,6 +1933,8 @@ func (c *blockResultColumn) sumLenValues(br *blockResult) uint64 {
return c.sumLenStringValues(br)
case valueTypeUint64:
return c.sumLenStringValues(br)
case valueTypeInt64:
return c.sumLenStringValues(br)
case valueTypeFloat64:
return c.sumLenStringValues(br)
case valueTypeIPv4:
@ -1960,6 +2032,12 @@ func (c *blockResultColumn) sumValues(br *blockResult) (float64, int) {
sum += float64(unmarshalUint64(v))
}
return sum, br.rowsLen
case valueTypeInt64:
sum := float64(0)
for _, v := range c.getValuesEncoded(br) {
sum += float64(unmarshalInt64(v))
}
return sum, br.rowsLen
case valueTypeFloat64:
sum := float64(0)
for _, v := range c.getValuesEncoded(br) {

View file

@ -120,6 +120,8 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchUint32ByExactValue(bs, ch, bm, phraseLowercase, tokens)
case valueTypeUint64:
matchUint64ByExactValue(bs, ch, bm, phraseLowercase, tokens)
case valueTypeInt64:
matchInt64ByExactValue(bs, ch, bm, phraseLowercase, tokens)
case valueTypeFloat64:
matchFloat64ByPhrase(bs, ch, bm, phraseLowercase, tokens)
case valueTypeIPv4:

View file

@ -622,6 +622,71 @@ func TestFilterAnyCasePhrase(t *testing.T) {
testFilterMatchForColumns(t, columns, pf, "foo", nil)
})
t.Run("int64", func(t *testing.T) {
t.Parallel()
columns := []column{
{
name: "foo",
values: []string{
"1234",
"0",
"3454",
"65536",
"-12345678901",
"1",
"2",
"3",
"4",
},
},
}
// match
pf := &filterAnyCasePhrase{
fieldName: "foo",
phrase: "1234",
}
testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
pf = &filterAnyCasePhrase{
fieldName: "foo",
phrase: "-12345678901",
}
testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
pf = &filterAnyCasePhrase{
fieldName: "non-existing-column",
phrase: "",
}
testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
// mismatch
pf = &filterAnyCasePhrase{
fieldName: "foo",
phrase: "bar",
}
testFilterMatchForColumns(t, columns, pf, "foo", nil)
pf = &filterAnyCasePhrase{
fieldName: "foo",
phrase: "",
}
testFilterMatchForColumns(t, columns, pf, "foo", nil)
pf = &filterAnyCasePhrase{
fieldName: "foo",
phrase: "33",
}
testFilterMatchForColumns(t, columns, pf, "foo", nil)
pf = &filterAnyCasePhrase{
fieldName: "foo",
phrase: "12345678901234567890",
}
testFilterMatchForColumns(t, columns, pf, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
t.Parallel()

View file

@ -121,6 +121,8 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchUint32ByPrefix(bs, ch, bm, prefixLowercase)
case valueTypeUint64:
matchUint64ByPrefix(bs, ch, bm, prefixLowercase)
case valueTypeInt64:
matchInt64ByPrefix(bs, ch, bm, prefixLowercase)
case valueTypeFloat64:
matchFloat64ByPrefix(bs, ch, bm, prefixLowercase, tokens)
case valueTypeIPv4:

View file

@ -658,6 +658,71 @@ func TestFilterAnyCasePrefix(t *testing.T) {
testFilterMatchForColumns(t, columns, fp, "foo", nil)
})
t.Run("int64", func(t *testing.T) {
t.Parallel()
columns := []column{
{
name: "foo",
values: []string{
"1234",
"0",
"3454",
"65536",
"-12345678901",
"1",
"2",
"3",
"4",
},
},
}
// match
fp := &filterAnyCasePrefix{
fieldName: "foo",
prefix: "1234",
}
testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 4})
fp = &filterAnyCasePrefix{
fieldName: "foo",
prefix: "",
}
testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
fp = &filterAnyCasePrefix{
fieldName: "foo",
prefix: "-12345678901",
}
testFilterMatchForColumns(t, columns, fp, "foo", []int{4})
// mismatch
fp = &filterAnyCasePrefix{
fieldName: "foo",
prefix: "bar",
}
testFilterMatchForColumns(t, columns, fp, "foo", nil)
fp = &filterAnyCasePrefix{
fieldName: "foo",
prefix: "33",
}
testFilterMatchForColumns(t, columns, fp, "foo", nil)
fp = &filterAnyCasePrefix{
fieldName: "foo",
prefix: "12345678901234567890",
}
testFilterMatchForColumns(t, columns, fp, "foo", nil)
fp = &filterAnyCasePrefix{
fieldName: "non-existing-column",
prefix: "",
}
testFilterMatchForColumns(t, columns, fp, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
t.Parallel()

View file

@ -85,6 +85,8 @@ func (fr *filterDayRange) applyToBlockResult(br *blockResult, bm *bitmap) {
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeInt64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:

View file

@ -125,6 +125,17 @@ func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) {
n := unmarshalUint64(valuesEncoded[idx])
return n == nNeeded
})
case valueTypeInt64:
nNeeded, ok := tryParseInt64(value)
if !ok {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := unmarshalInt64(valuesEncoded[idx])
return n == nNeeded
})
case valueTypeFloat64:
fNeeded, ok := tryParseFloat64Exact(value)
if !ok {
@ -208,6 +219,8 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchUint32ByExactValue(bs, ch, bm, value, tokens)
case valueTypeUint64:
matchUint64ByExactValue(bs, ch, bm, value, tokens)
case valueTypeInt64:
matchInt64ByExactValue(bs, ch, bm, value, tokens)
case valueTypeFloat64:
matchFloat64ByExactValue(bs, ch, bm, value, tokens)
case valueTypeIPv4:
@ -327,6 +340,18 @@ func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
bbPool.Put(bb)
}
func matchInt64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
n, ok := tryParseInt64(phrase)
if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) {
bm.resetBits()
return
}
bb := bbPool.Get()
bb.B = encoding.MarshalInt64(bb.B, n)
matchBinaryValue(bs, ch, bm, bb.B, tokens)
bbPool.Put(bb)
}
func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()

View file

@ -84,6 +84,8 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchUint32ByExactPrefix(bs, ch, bm, prefix, tokens)
case valueTypeUint64:
matchUint64ByExactPrefix(bs, ch, bm, prefix, tokens)
case valueTypeInt64:
matchInt64ByExactPrefix(bs, ch, bm, prefix, tokens)
case valueTypeFloat64:
matchFloat64ByExactPrefix(bs, ch, bm, prefix, tokens)
case valueTypeIPv4:
@ -222,6 +224,33 @@ func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
bbPool.Put(bb)
}
func matchInt64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" {
// An empty prefix matches all the values
return
}
if len(tokens) > 0 {
// Non-empty tokens means that the prefix contains at least two tokens.
// Multiple tokens cannot match any uint value.
bm.resetBits()
return
}
if prefix != "-" {
n, ok := tryParseInt64(prefix)
if !ok || n > int64(ch.maxValue) || n < int64(ch.minValue) {
bm.resetBits()
return
}
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toInt64String(bs, bb, v)
return matchExactPrefix(s, prefix)
})
bbPool.Put(bb)
}
func matchMinMaxExactPrefix(ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) bool {
if prefix == "" {
// An empty prefix matches all the values

View file

@ -430,6 +430,73 @@ func TestFilterExactPrefix(t *testing.T) {
testFilterMatchForColumns(t, columns, fep, "foo", nil)
})
t.Run("int64", func(t *testing.T) {
t.Parallel()
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"32",
"0",
"0",
"-12",
"1",
"-2",
"3",
"123456789012",
"5",
},
},
}
// match
fep := &filterExactPrefix{
fieldName: "foo",
prefix: "12",
}
testFilterMatchForColumns(t, columns, fep, "foo", []int{0, 1, 9})
fep = &filterExactPrefix{
fieldName: "foo",
prefix: "-12",
}
testFilterMatchForColumns(t, columns, fep, "foo", []int{5})
fep = &filterExactPrefix{
fieldName: "foo",
prefix: "-",
}
testFilterMatchForColumns(t, columns, fep, "foo", []int{5, 7})
fep = &filterExactPrefix{
fieldName: "foo",
prefix: "",
}
testFilterMatchForColumns(t, columns, fep, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
// mismatch
fep = &filterExactPrefix{
fieldName: "foo",
prefix: "bar",
}
testFilterMatchForColumns(t, columns, fep, "foo", nil)
fep = &filterExactPrefix{
fieldName: "foo",
prefix: "1234567890123",
}
testFilterMatchForColumns(t, columns, fep, "foo", nil)
fep = &filterExactPrefix{
fieldName: "foo",
prefix: "7",
}
testFilterMatchForColumns(t, columns, fep, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
t.Parallel()

View file

@ -412,6 +412,67 @@ func TestFilterExact(t *testing.T) {
testFilterMatchForColumns(t, columns, fe, "foo", nil)
})
t.Run("int64", func(t *testing.T) {
t.Parallel()
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"32",
"0",
"0",
"-12",
"12345678901",
"2",
"3",
"4",
"5",
},
},
}
// match
fe := &filterExact{
fieldName: "foo",
value: "12",
}
testFilterMatchForColumns(t, columns, fe, "foo", []int{1})
fe = &filterExact{
fieldName: "foo",
value: "-12",
}
testFilterMatchForColumns(t, columns, fe, "foo", []int{5})
fe = &filterExact{
fieldName: "non-existing-column",
value: "",
}
testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
// mismatch
fe = &filterExact{
fieldName: "foo",
value: "bar",
}
testFilterMatchForColumns(t, columns, fe, "foo", nil)
fe = &filterExact{
fieldName: "foo",
value: "",
}
testFilterMatchForColumns(t, columns, fe, "foo", nil)
fe = &filterExact{
fieldName: "foo",
value: "33",
}
testFilterMatchForColumns(t, columns, fe, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
t.Parallel()

View file

@ -48,6 +48,9 @@ type filterIn struct {
uint64ValuesOnce sync.Once
uint64Values map[string]struct{}
int64ValuesOnce sync.Once
int64Values map[string]struct{}
float64ValuesOnce sync.Once
float64Values map[string]struct{}
@ -185,6 +188,11 @@ func (fi *filterIn) getUint64Values() map[string]struct{} {
return fi.uint64Values
}
func (fi *filterIn) getInt64Values() map[string]struct{} {
fi.int64ValuesOnce.Do(fi.initInt64Values)
return fi.int64Values
}
func (fi *filterIn) initUint64Values() {
values := fi.values
m := make(map[string]struct{}, len(values))
@ -202,6 +210,23 @@ func (fi *filterIn) initUint64Values() {
fi.uint64Values = m
}
func (fi *filterIn) initInt64Values() {
values := fi.values
m := make(map[string]struct{}, len(values))
buf := make([]byte, 0, len(values)*8)
for _, v := range values {
n, ok := tryParseInt64(v)
if !ok {
continue
}
bufLen := len(buf)
buf = encoding.MarshalInt64(buf, n)
s := bytesutil.ToUnsafeString(buf[bufLen:])
m[s] = struct{}{}
}
fi.int64Values = m
}
func (fi *filterIn) getFloat64Values() map[string]struct{} {
fi.float64ValuesOnce.Do(fi.initFloat64Values)
return fi.float64Values
@ -320,6 +345,9 @@ func (fi *filterIn) applyToBlockResult(br *blockResult, bm *bitmap) {
case valueTypeUint64:
binValues := fi.getUint64Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeInt64:
binValues := fi.getInt64Values()
matchColumnByBinValues(br, bm, c, binValues)
case valueTypeFloat64:
binValues := fi.getFloat64Values()
matchColumnByBinValues(br, bm, c, binValues)
@ -407,6 +435,9 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
case valueTypeUint64:
binValues := fi.getUint64Values()
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
case valueTypeInt64:
binValues := fi.getInt64Values()
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
case valueTypeFloat64:
binValues := fi.getFloat64Values()
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)

View file

@ -504,6 +504,67 @@ func TestFilterIn(t *testing.T) {
testFilterMatchForColumns(t, columns, fi, "foo", nil)
})
t.Run("int64", func(t *testing.T) {
t.Parallel()
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"-32",
"0",
"0",
"12",
"12345678901",
"2",
"3",
"4",
"5",
},
},
}
// match
fi := &filterIn{
fieldName: "foo",
values: []string{"12", "-32"},
}
testFilterMatchForColumns(t, columns, fi, "foo", []int{1, 2, 5})
fi = &filterIn{
fieldName: "foo",
values: []string{"0"},
}
testFilterMatchForColumns(t, columns, fi, "foo", []int{3, 4})
fi = &filterIn{
fieldName: "non-existing-column",
values: []string{""},
}
testFilterMatchForColumns(t, columns, fi, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
// mismatch
fi = &filterIn{
fieldName: "foo",
values: []string{"bar"},
}
testFilterMatchForColumns(t, columns, fi, "foo", nil)
fi = &filterIn{
fieldName: "foo",
values: []string{},
}
testFilterMatchForColumns(t, columns, fi, "foo", nil)
fi = &filterIn{
fieldName: "foo",
values: []string{"33"},
}
testFilterMatchForColumns(t, columns, fi, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
t.Parallel()

View file

@ -77,6 +77,8 @@ func (fr *filterIPv4Range) applyToBlockResult(br *blockResult, bm *bitmap) {
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeInt64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:
@ -131,6 +133,8 @@ func (fr *filterIPv4Range) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeInt64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:

View file

@ -310,6 +310,37 @@ func TestFilterIPv4Range(t *testing.T) {
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("int64", func(t *testing.T) {
t.Parallel()
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"32",
"0",
"0",
"-12345678901",
"1",
"2",
"3",
"4",
"5",
},
},
}
// mismatch
fr := &filterIPv4Range{
fieldName: "foo",
minValue: 0,
maxValue: 0xffffffff,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
t.Parallel()

View file

@ -88,6 +88,12 @@ func (fr *filterLenRange) applyToBlockResult(br *blockResult, bm *bitmap) {
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeInt64:
if minLen > 21 || maxLen == 0 {
bm.resetBits()
return
}
matchColumnByLenRange(br, bm, c, minLen, maxLen)
case valueTypeFloat64:
if minLen > 24 || maxLen == 0 {
bm.resetBits()
@ -156,6 +162,8 @@ func (fr *filterLenRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchUint32ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeUint64:
matchUint64ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeInt64:
matchInt64ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeFloat64:
matchFloat64ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeIPv4:
@ -293,6 +301,33 @@ func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen
bbPool.Put(bb)
}
func matchInt64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > 21 || maxLen == 0 {
bm.resetBits()
return
}
bb := bbPool.Get()
bb.B = marshalInt64String(bb.B[:0], int64(ch.minValue))
maxvLen := len(bb.B)
bb.B = marshalInt64String(bb.B[:0], int64(ch.maxValue))
if len(bb.B) > maxvLen {
maxvLen = len(bb.B)
}
if uint64(maxvLen) < minLen {
bm.resetBits()
return
}
visitValues(bs, ch, bm, func(v string) bool {
s := toInt64String(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchLenRange(s string, minLen, maxLen uint64) bool {
sLen := uint64(utf8.RuneCountInString(s))
return sLen >= minLen && sLen <= maxLen

View file

@ -344,6 +344,52 @@ func TestFilterLenRange(t *testing.T) {
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("int64", func(t *testing.T) {
t.Parallel()
columns := []column{
{
name: "foo",
values: []string{
"123456789012",
"12",
"32",
"0",
"0",
"12",
"-1",
"2",
"3",
"4",
"5",
},
},
}
// match
fr := &filterLenRange{
fieldName: "foo",
minLen: 2,
maxLen: 2,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 2, 5, 6})
// mismatch
fr = &filterLenRange{
fieldName: "foo",
minLen: 0,
maxLen: 0,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterLenRange{
fieldName: "foo",
minLen: 20,
maxLen: 20,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
t.Parallel()

View file

@ -95,6 +95,8 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchUint32ByExactValue(bs, ch, bm, phrase, tokens)
case valueTypeUint64:
matchUint64ByExactValue(bs, ch, bm, phrase, tokens)
case valueTypeInt64:
matchInt64ByExactValue(bs, ch, bm, phrase, tokens)
case valueTypeFloat64:
matchFloat64ByPhrase(bs, ch, bm, phrase, tokens)
case valueTypeIPv4:
@ -401,6 +403,13 @@ func applyToBlockResultGeneric(br *blockResult, bm *bitmap, fieldName, phrase st
return
}
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeInt64:
_, ok := tryParseInt64(phrase)
if !ok {
bm.resetBits()
return
}
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeFloat64:
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
case valueTypeIPv4:

View file

@ -627,6 +627,71 @@ func TestFilterPhrase(t *testing.T) {
testFilterMatchForColumns(t, columns, pf, "foo", nil)
})
t.Run("int64", func(t *testing.T) {
t.Parallel()
columns := []column{
{
name: "foo",
values: []string{
"1234",
"0",
"3454",
"65536",
"-12345678901",
"1",
"2",
"3",
"4",
},
},
}
// match
pf := &filterPhrase{
fieldName: "foo",
phrase: "1234",
}
testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
pf = &filterPhrase{
fieldName: "foo",
phrase: "-12345678901",
}
testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
pf = &filterPhrase{
fieldName: "non-existing-column",
phrase: "",
}
testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
// mismatch
pf = &filterPhrase{
fieldName: "foo",
phrase: "bar",
}
testFilterMatchForColumns(t, columns, pf, "foo", nil)
pf = &filterPhrase{
fieldName: "foo",
phrase: "",
}
testFilterMatchForColumns(t, columns, pf, "foo", nil)
pf = &filterPhrase{
fieldName: "foo",
phrase: "33",
}
testFilterMatchForColumns(t, columns, pf, "foo", nil)
pf = &filterPhrase{
fieldName: "foo",
phrase: "12345678901234567890",
}
testFilterMatchForColumns(t, columns, pf, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
t.Parallel()

View file

@ -90,6 +90,8 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchUint32ByPrefix(bs, ch, bm, prefix)
case valueTypeUint64:
matchUint64ByPrefix(bs, ch, bm, prefix)
case valueTypeInt64:
matchInt64ByPrefix(bs, ch, bm, prefix)
case valueTypeFloat64:
matchFloat64ByPrefix(bs, ch, bm, prefix, tokens)
case valueTypeIPv4:
@ -286,6 +288,31 @@ func matchUint64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix s
bbPool.Put(bb)
}
func matchInt64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) {
if prefix == "" {
// Fast path - all the int64 values match an empty prefix aka `*`
return
}
// The prefix may contain a part of the number.
// For example, `foo:12*` must match `12` and `123`.
// This means we cannot search in binary representation of numbers.
// Instead, we need searching for the whole prefix in string representation of numbers :(
if prefix != "-" {
n, ok := tryParseInt64(prefix)
if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) {
bm.resetBits()
return
}
}
// There is no need in matching against bloom filters, since tokens is empty.
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toInt64String(bs, bb, v)
return matchPrefix(s, prefix)
})
bbPool.Put(bb)
}
func matchPrefix(s, prefix string) bool {
if len(prefix) == 0 {
// Special case - empty prefix matches any string.
@ -368,3 +395,12 @@ func toUint64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string
bb.B = marshalUint64String(bb.B[:0], n)
return bytesutil.ToUnsafeString(bb.B)
}
func toInt64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of int64 number; got %d; want 8", bs.partPath(), len(v))
}
n := unmarshalInt64(v)
bb.B = marshalInt64String(bb.B[:0], n)
return bytesutil.ToUnsafeString(bb.B)
}

View file

@ -123,6 +123,18 @@ func (fr *filterRange) applyToBlockResult(br *blockResult, bm *bitmap) {
n := unmarshalUint64(v)
return n >= minValueUint && n <= maxValueUint
})
case valueTypeInt64:
minValueInt, maxValueInt := toInt64Range(minValue, maxValue)
if minValueInt > int64(c.maxValue) || maxValueInt < int64(c.minValue) {
bm.resetBits()
return
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
v := valuesEncoded[idx]
n := unmarshalInt64(v)
return n >= minValueInt && n <= maxValueInt
})
case valueTypeFloat64:
if minValue > math.Float64frombits(c.maxValue) || maxValue < math.Float64frombits(c.minValue) {
bm.resetBits()
@ -202,6 +214,8 @@ func (fr *filterRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchUint32ByRange(bs, ch, bm, minValue, maxValue)
case valueTypeUint64:
matchUint64ByRange(bs, ch, bm, minValue, maxValue)
case valueTypeInt64:
matchInt64ByRange(bs, ch, bm, minValue, maxValue)
case valueTypeFloat64:
matchFloat64ByRange(bs, ch, bm, minValue, maxValue)
case valueTypeIPv4:
@ -316,6 +330,23 @@ func matchUint64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
bbPool.Put(bb)
}
func matchInt64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) {
minValueInt, maxValueInt := toInt64Range(minValue, maxValue)
if minValueInt > int64(ch.maxValue) || maxValueInt < int64(ch.minValue) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
if len(v) != 8 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of int64 number; got %d; want 8", bs.partPath(), len(v))
}
n := unmarshalInt64(v)
return n >= minValueInt && n <= maxValueInt
})
bbPool.Put(bb)
}
func matchTimestampISO8601ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) {
minValueInt, maxValueInt := toInt64Range(minValue, maxValue)
if maxValue < 0 || minValueInt > int64(ch.maxValue) || maxValueInt < int64(ch.minValue) {

View file

@ -493,6 +493,80 @@ func TestFilterRange(t *testing.T) {
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("int64", func(t *testing.T) {
t.Parallel()
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"-32",
"0",
"0",
"12345678901",
"1",
"2",
"3",
"4",
"5",
},
},
}
// match
fr := &filterRange{
fieldName: "foo",
minValue: -inf,
maxValue: 3,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 4, 6, 7, 8})
fr = &filterRange{
fieldName: "foo",
minValue: -10,
maxValue: 2.9,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{3, 4, 6, 7})
fr = &filterRange{
fieldName: "foo",
minValue: -1e18,
maxValue: 2.9,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 4, 6, 7})
fr = &filterRange{
fieldName: "foo",
minValue: 1000,
maxValue: inf,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{5})
// mismatch
fr = &filterRange{
fieldName: "foo",
minValue: -1,
maxValue: -0.1,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterRange{
fieldName: "foo",
minValue: 0.1,
maxValue: 0.9,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterRange{
fieldName: "foo",
minValue: 2.9,
maxValue: 0.1,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
t.Parallel()

View file

@ -111,6 +111,8 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchUint32ByRegexp(bs, ch, bm, re, tokens)
case valueTypeUint64:
matchUint64ByRegexp(bs, ch, bm, re, tokens)
case valueTypeInt64:
matchInt64ByRegexp(bs, ch, bm, re, tokens)
case valueTypeFloat64:
matchFloat64ByRegexp(bs, ch, bm, re, tokens)
case valueTypeIPv4:
@ -235,3 +237,16 @@ func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
})
bbPool.Put(bb)
}
func matchInt64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toInt64String(bs, bb, v)
return re.MatchString(s)
})
bbPool.Put(bb)
}

View file

@ -121,6 +121,8 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchUint32BySequence(bs, ch, bm, phrases, tokens)
case valueTypeUint64:
matchUint64BySequence(bs, ch, bm, phrases, tokens)
case valueTypeInt64:
matchInt64BySequence(bs, ch, bm, phrases, tokens)
case valueTypeFloat64:
matchFloat64BySequence(bs, ch, bm, phrases, tokens)
case valueTypeIPv4:
@ -245,6 +247,14 @@ func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
matchUint64ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchInt64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) > 1 {
bm.resetBits()
return
}
matchInt64ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchSequence(s string, phrases []string) bool {
for _, phrase := range phrases {
n := getPhrasePos(s, phrase)

View file

@ -581,6 +581,85 @@ func TestFilterSequence(t *testing.T) {
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("int64", func(t *testing.T) {
t.Parallel()
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"-32",
"0",
"0",
"12",
"12345678901",
"2",
"3",
"4",
"5",
},
},
}
// match
fs := &filterSequence{
fieldName: "foo",
phrases: []string{"12"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{1, 5})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"-32"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{2})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
// mismatch
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"", "bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"1234"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"1234", "567"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
t.Parallel()

View file

@ -95,6 +95,8 @@ func (fs *filterStream) applyToBlockResult(br *blockResult, bm *bitmap) {
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeInt64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:

View file

@ -112,6 +112,8 @@ func (fs *filterStreamID) applyToBlockResult(br *blockResult, bm *bitmap) {
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeInt64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:

View file

@ -82,6 +82,8 @@ func (fr *filterStringRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchUint32ByStringRange(bs, ch, bm, minValue, maxValue)
case valueTypeUint64:
matchUint64ByStringRange(bs, ch, bm, minValue, maxValue)
case valueTypeInt64:
matchInt64ByStringRange(bs, ch, bm, minValue, maxValue)
case valueTypeFloat64:
matchFloat64ByStringRange(bs, ch, bm, minValue, maxValue)
case valueTypeIPv4:
@ -206,6 +208,21 @@ func matchUint64ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, min
bbPool.Put(bb)
}
func matchStringRange(s, minValue, maxValue string) bool {
return !lessString(s, minValue) && lessString(s, maxValue)
func matchInt64ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue string) {
if minValue != "-" && minValue > "9" || maxValue != "-" && maxValue < "0" {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toInt64String(bs, bb, v)
return matchStringRange(s, minValue, maxValue)
})
bbPool.Put(bb)
}
func matchStringRange(s, minValue, maxValue string) bool {
// Do not use lessString() here, since string_range() filter
// works on plain strings without additional magic.
return s >= minValue && s < maxValue
}

View file

@ -212,8 +212,8 @@ func TestFilterStringRange(t *testing.T) {
// match
fr := &filterStringRange{
fieldName: "foo",
minValue: "33",
maxValue: "500",
minValue: "122",
maxValue: "125",
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
@ -265,8 +265,8 @@ func TestFilterStringRange(t *testing.T) {
// match
fr := &filterStringRange{
fieldName: "foo",
minValue: "33",
maxValue: "555",
minValue: "122",
maxValue: "125",
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
@ -318,8 +318,8 @@ func TestFilterStringRange(t *testing.T) {
// match
fr := &filterStringRange{
fieldName: "foo",
minValue: "33",
maxValue: "555",
minValue: "122",
maxValue: "125",
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
@ -371,10 +371,63 @@ func TestFilterStringRange(t *testing.T) {
// match
fr := &filterStringRange{
fieldName: "foo",
minValue: "33",
maxValue: "5555",
minValue: "122",
maxValue: "125",
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5})
// mismatch
fr = &filterStringRange{
fieldName: "foo",
minValue: "a",
maxValue: "b",
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterStringRange{
fieldName: "foo",
minValue: "100",
maxValue: "101",
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterStringRange{
fieldName: "foo",
minValue: "5",
maxValue: "33",
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("int64", func(t *testing.T) {
t.Parallel()
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"-32",
"0",
"0",
"12345678901",
"1",
"2",
"3",
"4",
"5",
},
},
}
// match
fr := &filterStringRange{
fieldName: "foo",
minValue: "122",
maxValue: "125",
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5})
// mismatch
fr = &filterStringRange{
@ -424,10 +477,10 @@ func TestFilterStringRange(t *testing.T) {
// match
fr := &filterStringRange{
fieldName: "foo",
minValue: "33",
maxValue: "555",
minValue: "122",
maxValue: "125",
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5})
// mismatch
fr = &filterStringRange{

View file

@ -79,6 +79,8 @@ func (ft *filterTime) applyToBlockResult(br *blockResult, bm *bitmap) {
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeInt64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:

View file

@ -87,6 +87,8 @@ func (fr *filterWeekRange) applyToBlockResult(br *blockResult, bm *bitmap) {
bm.resetBits()
case valueTypeUint64:
bm.resetBits()
case valueTypeInt64:
bm.resetBits()
case valueTypeFloat64:
bm.resetBits()
case valueTypeIPv4:

View file

@ -892,31 +892,6 @@ func parseBySortFields(lex *lexer) ([]*bySortField, error) {
}
}
func tryParseInt64(s string) (int64, bool) {
if len(s) == 0 {
return 0, false
}
isMinus := s[0] == '-'
if isMinus {
s = s[1:]
}
u64, ok := tryParseUint64(s)
if !ok {
return 0, false
}
if !isMinus {
if u64 > math.MaxInt64 {
return 0, false
}
return int64(u64), true
}
if u64 > -math.MinInt64 {
return 0, false
}
return -int64(u64), true
}
func marshalJSONKeyValue(dst []byte, k, v string) []byte {
dst = quicktemplate.AppendJSONString(dst, k, true)
dst = append(dst, ':')

View file

@ -75,7 +75,8 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
}
}
return 0
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
scp.rowsCount += uint64(br.rowsLen)
return 0
default:
@ -119,7 +120,8 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
dictIdx := valuesEncoded[i][0]
return c.dictValues[dictIdx] == ""
})
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
scp.rowsCount += uint64(br.rowsLen)
return 0
default:
@ -167,7 +169,8 @@ func (scp *statsCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) i
scp.rowsCount++
}
return 0
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
scp.rowsCount++
return 0
default:

View file

@ -77,7 +77,8 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
}
}
return 0
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
return 0
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
@ -116,7 +117,8 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
dictIdx := valuesEncoded[i][0]
return c.dictValues[dictIdx] == ""
})
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
return 0
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
@ -165,7 +167,8 @@ func (scp *statsCountEmptyProcessor) updateStatsForRow(br *blockResult, rowIdx i
scp.rowsCount++
}
return 0
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
return 0
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)

View file

@ -123,6 +123,11 @@ func (smp *statsMaxProcessor) updateStateForColumn(br *blockResult, c *blockResu
bb.B = marshalUint64String(bb.B[:0], c.maxValue)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeInt64:
bb := bbPool.Get()
bb.B = marshalInt64String(bb.B[:0], int64(c.maxValue))
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeFloat64:
f := math.Float64frombits(c.maxValue)
bb := bbPool.Get()

View file

@ -125,6 +125,11 @@ func (smp *statsMinProcessor) updateStateForColumn(br *blockResult, c *blockResu
bb.B = marshalUint64String(bb.B[:0], c.minValue)
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeInt64:
bb := bbPool.Get()
bb.B = marshalInt64String(bb.B[:0], int64(c.minValue))
smp.updateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeFloat64:
f := math.Float64frombits(c.minValue)
bb := bbPool.Get()

View file

@ -150,6 +150,14 @@ func (sqp *statsQuantileProcessor) updateStateForColumn(br *blockResult, c *bloc
stateSizeIncrease += h.update(bytesutil.ToUnsafeString(bb.B))
}
bbPool.Put(bb)
case valueTypeInt64:
bb := bbPool.Get()
for _, v := range c.getValuesEncoded(br) {
n := unmarshalInt64(v)
bb.B = marshalInt64String(bb.B[:0], n)
stateSizeIncrease += h.update(bytesutil.ToUnsafeString(bb.B))
}
bbPool.Put(bb)
case valueTypeFloat64:
bb := bbPool.Get()
for _, v := range c.getValuesEncoded(br) {

View file

@ -90,6 +90,11 @@ func (smp *statsRowMaxProcessor) updateStatsForAllRows(br *blockResult) int {
bb.B = marshalUint64String(bb.B[:0], c.maxValue)
needUpdateState = smp.needUpdateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeInt64:
bb := bbPool.Get()
bb.B = marshalInt64String(bb.B[:0], int64(c.maxValue))
needUpdateState = smp.needUpdateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeFloat64:
f := math.Float64frombits(c.maxValue)
bb := bbPool.Get()

View file

@ -90,6 +90,11 @@ func (smp *statsRowMinProcessor) updateStatsForAllRows(br *blockResult) int {
bb.B = marshalUint64String(bb.B[:0], c.minValue)
needUpdateState = smp.needUpdateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeInt64:
bb := bbPool.Get()
bb.B = marshalInt64String(bb.B[:0], int64(c.minValue))
needUpdateState = smp.needUpdateStateBytes(bb.B)
bbPool.Put(bb)
case valueTypeFloat64:
f := math.Float64frombits(c.minValue)
bb := bbPool.Get()

View file

@ -44,6 +44,9 @@ const (
// Every value occupies 8 bytes.
valueTypeUint64 = valueType(6)
// int values in the range [-(2^63) ... 2^63-1] are encoded into valueTypeInt64.
valueTypeInt64 = valueType(10)
// floating-point values are encoded into valueTypeFloat64.
valueTypeFloat64 = valueType(7)
@ -71,6 +74,8 @@ func (t valueType) String() string {
return "uint32"
case valueTypeUint64:
return "uint64"
case valueTypeInt64:
return "int64"
case valueTypeFloat64:
return "float64"
case valueTypeIPv4:
@ -122,6 +127,11 @@ func (ve *valuesEncoder) encode(values []string, dict *valuesDict) (valueType, u
return vt, minValue, maxValue
}
ve.buf, ve.values, vt, minValue, maxValue = tryIntEncoding(ve.buf[:0], ve.values[:0], values)
if vt != valueTypeUnknown {
return vt, minValue, maxValue
}
ve.buf, ve.values, vt, minValue, maxValue = tryFloat64Encoding(ve.buf[:0], ve.values[:0], values)
if vt != valueTypeUnknown {
return vt, minValue, maxValue
@ -231,6 +241,16 @@ func (vd *valuesDecoder) decodeInplace(values []string, vt valueType, dictValues
dstBuf = marshalUint64String(dstBuf, n)
values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
}
case valueTypeInt64:
for i, v := range values {
if len(v) != 8 {
return fmt.Errorf("unexpected value length for int64; got %d; want 8", len(v))
}
n := unmarshalInt64(v)
dstLen := len(dstBuf)
dstBuf = marshalInt64String(dstBuf, n)
values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
}
case valueTypeFloat64:
for i, v := range values {
if len(v) != 8 {
@ -550,6 +570,32 @@ func tryParseUint64(s string) (uint64, bool) {
return n, true
}
// tryParseInt64 parses s as int64 value.
func tryParseInt64(s string) (int64, bool) {
if len(s) == 0 {
return 0, false
}
isMinus := s[0] == '-'
if isMinus {
s = s[1:]
}
n, ok := tryParseUint64(s)
if !ok {
return 0, false
}
if n >= 1<<63 {
if isMinus && n == 1<<63 {
return -1 << 63, true
}
return 0, false
}
ni := int64(n)
if isMinus {
ni = -ni
}
return ni, true
}
func tryIPv4Encoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
u32s := encoding.GetUint32s(len(srcValues))
defer encoding.PutUint32s(u32s)
@ -710,7 +756,7 @@ func tryParseFloat64Internal(s string, isExact bool) (float64, bool) {
if !ok {
return 0, false
}
if isExact && n >= (1 << 53) {
if isExact && n >= (1<<53) {
// The integer cannot be represented as float64 without precision loss.
return 0, false
}
@ -1034,6 +1080,33 @@ const (
nsecsPerMicrosecond = 1e3
)
func tryIntEncoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
i64s := encoding.GetInt64s(len(srcValues))
defer encoding.PutInt64s(i64s)
a := i64s.A
var minValue, maxValue int64
for i, v := range srcValues {
n, ok := tryParseInt64(v)
if !ok {
return dstBuf, dstValues, valueTypeUnknown, 0, 0
}
a[i] = n
if i == 0 || n < minValue {
minValue = n
}
if i == 0 || n > maxValue {
maxValue = n
}
}
for _, n := range a {
dstLen := len(dstBuf)
dstBuf = encoding.MarshalInt64(dstBuf, n)
v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
dstValues = append(dstValues, v)
}
return dstBuf, dstValues, valueTypeInt64, uint64(minValue), uint64(maxValue)
}
func tryUintEncoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
u64s := encoding.GetUint64s(len(srcValues))
defer encoding.PutUint64s(u64s)
@ -1210,6 +1283,11 @@ func unmarshalUint64(v string) uint64 {
return encoding.UnmarshalUint64(b)
}
func unmarshalInt64(v string) int64 {
b := bytesutil.ToUnsafeBytes(v)
return encoding.UnmarshalInt64(b)
}
func unmarshalFloat64(v string) float64 {
n := unmarshalUint64(v)
return math.Float64frombits(n)
@ -1257,6 +1335,10 @@ func marshalUint64String(dst []byte, n uint64) []byte {
return strconv.AppendUint(dst, n, 10)
}
func marshalInt64String(dst []byte, n int64) []byte {
return strconv.AppendInt(dst, n, 10)
}
func marshalFloat64String(dst []byte, f float64) []byte {
return strconv.AppendFloat(dst, f, 'f', -1, 64)
}

View file

@ -714,6 +714,60 @@ func TestTryParseUint64_Failure(t *testing.T) {
// invalid value
f("foo")
f("1.2")
f("1e3")
}
func TestTryParseInt64_Success(t *testing.T) {
f := func(s string, resultExpected int64) {
t.Helper()
result, ok := tryParseInt64(s)
if !ok {
t.Fatalf("cannot parse %q", s)
}
if result != resultExpected {
t.Fatalf("unexpected value; got %d; want %d", result, resultExpected)
}
}
f("0", 0)
f("-0", 0)
f("123", 123)
f("-123", -123)
f("1345678901234567890", 1345678901234567890)
f("-1_345_678_901_234_567_890", -1345678901234567890)
// the maximum possible value
f("9223372036854775807", 9223372036854775807)
// the minimum possible value
f("-9223372036854775808", -9223372036854775808)
}
func TestTryParseInt64_Failure(t *testing.T) {
f := func(s string) {
t.Helper()
_, ok := tryParseInt64(s)
if ok {
t.Fatalf("expecting error when parsing %q", s)
}
}
// empty value
f("")
// too big value
f("9223372036854775808")
// too small value
f("-9223372036854775809")
// invalid value
f("foo")
f("1.2")
f("1e3")
}
func TestMarshalUint8String(t *testing.T) {