mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-02-19 15:30:17 +00:00
lib/logstorage: automatically detect columns with int64 values and store them as packed 8-byte int64 values
Previously columns with negative int64 values were stored either as float64 or string depending on whether the negative int64 values are bigger or smaller than -2^53. If the integer values are smaller than -2^53, then they are stored as string, since float64 cannot hold such values without precision loss. Now such values are stored as int64. This should improve compression ratio and query performance over columns with negative int64 values.
This commit is contained in:
parent
bd00e3a735
commit
df723a4870
42 changed files with 1229 additions and 51 deletions
app/vlogsgenerator
lib/logstorage
block_header.goblock_result.gofilter_any_case_phrase.gofilter_any_case_phrase_test.gofilter_any_case_prefix.gofilter_any_case_prefix_test.gofilter_day_range.gofilter_exact.gofilter_exact_prefix.gofilter_exact_prefix_test.gofilter_exact_test.gofilter_in.gofilter_in_test.gofilter_ipv4_range.gofilter_ipv4_range_test.gofilter_len_range.gofilter_len_range_test.gofilter_phrase.gofilter_phrase_test.gofilter_prefix.gofilter_range.gofilter_range_test.gofilter_regexp.gofilter_sequence.gofilter_sequence_test.gofilter_stream.gofilter_stream_id.gofilter_string_range.gofilter_string_range_test.gofilter_time.gofilter_week_range.gopipe_sort.gostats_count.gostats_count_empty.gostats_max.gostats_min.gostats_quantile.gostats_row_max.gostats_row_min.govalues_encoder.govalues_encoder_test.go
|
@ -45,6 +45,8 @@ var (
|
|||
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
|
||||
u64FieldsPerLog = flag.Int("u64FieldsPerLog", 1, "The number of fields with uint64 values to generate per each log entry; "+
|
||||
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
|
||||
i64FieldsPerLog = flag.Int("i64FieldsPerLog", 1, "The number of fields with int64 values to generate per each log entry; "+
|
||||
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
|
||||
floatFieldsPerLog = flag.Int("floatFieldsPerLog", 1, "The number of fields with float64 values to generate per each log entry; "+
|
||||
"see https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model")
|
||||
ipFieldsPerLog = flag.Int("ipFieldsPerLog", 1, "The number of fields with IPv4 values to generate per each log entry; "+
|
||||
|
@ -254,6 +256,9 @@ func generateLogsAtTimestamp(bw *bufio.Writer, workerID int, ts int64, firstStre
|
|||
for j := 0; j < *u64FieldsPerLog; j++ {
|
||||
fmt.Fprintf(bw, `,"u64_%d":"%d"`, j, rand.Uint64())
|
||||
}
|
||||
for j := 0; j < *i64FieldsPerLog; j++ {
|
||||
fmt.Fprintf(bw, `,"i64_%d":"%d"`, j, int64(rand.Uint64()))
|
||||
}
|
||||
for j := 0; j < *floatFieldsPerLog; j++ {
|
||||
fmt.Fprintf(bw, `,"float_%d":"%v"`, j, math.Round(10_000*rand.Float64())/1000)
|
||||
}
|
||||
|
|
|
@ -570,6 +570,7 @@ func getNamesFromColumnHeaders(chs []columnHeader) []string {
|
|||
// - valueTypeDict doesn't store anything in the bloom filter, since all the encoded values
|
||||
// are available directly in the valuesDict field
|
||||
// - valueTypeUint8, valueTypeUint16, valueTypeUint32 and valueTypeUint64 stores encoded uint values
|
||||
// - valueTypeInt64 stores encoded int64 values
|
||||
// - valueTypeFloat64 stores encoded float64 values
|
||||
// - valueTypeIPv4 stores encoded into uint32 ips
|
||||
// - valueTypeTimestampISO8601 stores encoded into uint64 timestamps
|
||||
|
@ -629,20 +630,29 @@ func (ch *columnHeader) reset() {
|
|||
// marshal appends marshaled ch to dst and returns the result.
|
||||
func (ch *columnHeader) marshal(dst []byte) []byte {
|
||||
// check minValue/maxValue
|
||||
if ch.valueType == valueTypeFloat64 {
|
||||
switch ch.valueType {
|
||||
case valueTypeInt64:
|
||||
minValue := int64(ch.minValue)
|
||||
maxValue := int64(ch.maxValue)
|
||||
if minValue > maxValue {
|
||||
logger.Panicf("BUG: minValue=%d must be smaller than maxValue=%d for valueTypeInt64", minValue, maxValue)
|
||||
}
|
||||
case valueTypeFloat64:
|
||||
minValue := math.Float64frombits(ch.minValue)
|
||||
maxValue := math.Float64frombits(ch.maxValue)
|
||||
if minValue > maxValue {
|
||||
logger.Panicf("BUG: minValue=%g must be smaller than maxValue=%g for valueTypeFloat64", minValue, maxValue)
|
||||
}
|
||||
} else if ch.valueType == valueTypeTimestampISO8601 {
|
||||
case valueTypeTimestampISO8601:
|
||||
minValue := int64(ch.minValue)
|
||||
maxValue := int64(ch.maxValue)
|
||||
if minValue > maxValue {
|
||||
logger.Panicf("BUG: minValue=%g must be smaller than maxValue=%g for valueTypeTimestampISO8601", minValue, maxValue)
|
||||
logger.Panicf("BUG: minValue=%d must be smaller than maxValue=%d for valueTypeTimestampISO8601", minValue, maxValue)
|
||||
}
|
||||
default:
|
||||
if ch.minValue > ch.maxValue {
|
||||
logger.Panicf("BUG: minValue=%d must be smaller than maxValue=%d for valueType=%d", ch.minValue, ch.maxValue, ch.valueType)
|
||||
}
|
||||
} else if ch.minValue > ch.maxValue {
|
||||
logger.Panicf("BUG: minValue=%d must be smaller than maxValue=%d for valueType=%d", ch.minValue, ch.maxValue, ch.valueType)
|
||||
}
|
||||
|
||||
// Do not encode ch.name, since it should be encoded at columnsHeaderIndex.columnHeadersRefs
|
||||
|
@ -673,6 +683,10 @@ func (ch *columnHeader) marshal(dst []byte) []byte {
|
|||
dst = encoding.MarshalUint64(dst, ch.minValue)
|
||||
dst = encoding.MarshalUint64(dst, ch.maxValue)
|
||||
dst = ch.marshalValuesAndBloomFilters(dst)
|
||||
case valueTypeInt64:
|
||||
dst = encoding.MarshalInt64(dst, int64(ch.minValue))
|
||||
dst = encoding.MarshalInt64(dst, int64(ch.maxValue))
|
||||
dst = ch.marshalValuesAndBloomFilters(dst)
|
||||
case valueTypeFloat64:
|
||||
// float64 values are encoded as uint64 via math.Float64bits()
|
||||
dst = encoding.MarshalUint64(dst, ch.minValue)
|
||||
|
@ -809,6 +823,19 @@ func (ch *columnHeader) unmarshalNoArena(src []byte, partFormatVersion uint) ([]
|
|||
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeUint64 for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
case valueTypeInt64:
|
||||
if len(src) < 16 {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeInt64 from %d bytes for column %q; need at least 16 bytes", len(src), ch.name)
|
||||
}
|
||||
ch.minValue = uint64(encoding.UnmarshalInt64(src))
|
||||
ch.maxValue = uint64(encoding.UnmarshalInt64(src[8:]))
|
||||
src = src[16:]
|
||||
|
||||
tail, err := ch.unmarshalValuesAndBloomFilters(src)
|
||||
if err != nil {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal values and bloom filters at valueTypeInt64 for column %q: %w", ch.name, err)
|
||||
}
|
||||
src = tail
|
||||
case valueTypeFloat64:
|
||||
if len(src) < 16 {
|
||||
return srcOrig, fmt.Errorf("cannot unmarshal min/max values at valueTypeFloat64 from %d bytes for column %q; need at least 16 bytes", len(src), ch.name)
|
||||
|
|
|
@ -519,6 +519,13 @@ func (br *blockResult) newValuesEncodedFromColumnHeader(bs *blockSearch, bm *bit
|
|||
}
|
||||
br.addValue(v)
|
||||
})
|
||||
case valueTypeInt64:
|
||||
visitValuesReadonly(bs, ch, bm, func(v string) {
|
||||
if len(v) != 8 {
|
||||
logger.Panicf("FATAL: %s: unexpected size for int64 column %q; got %d bytes; want 8 bytes", bs.partPath(), ch.name, len(v))
|
||||
}
|
||||
br.addValue(v)
|
||||
})
|
||||
case valueTypeFloat64:
|
||||
visitValuesReadonly(bs, ch, bm, func(v string) {
|
||||
if len(v) != 8 {
|
||||
|
@ -642,6 +649,8 @@ func (br *blockResult) newValuesBucketedForColumn(c *blockResultColumn, bf *bySt
|
|||
return br.getBucketedUint32Values(valuesEncoded, bf)
|
||||
case valueTypeUint64:
|
||||
return br.getBucketedUint64Values(valuesEncoded, bf)
|
||||
case valueTypeInt64:
|
||||
return br.getBucketedInt64Values(valuesEncoded, bf)
|
||||
case valueTypeFloat64:
|
||||
return br.getBucketedFloat64Values(valuesEncoded, bf)
|
||||
case valueTypeIPv4:
|
||||
|
@ -1010,6 +1019,64 @@ func (br *blockResult) getBucketedUint64Values(valuesEncoded []string, bf *bySta
|
|||
return br.valuesBuf[valuesBufLen:]
|
||||
}
|
||||
|
||||
func (br *blockResult) getBucketedInt64Values(valuesEncoded []string, bf *byStatsField) []string {
|
||||
buf := br.a.b
|
||||
valuesBuf := br.valuesBuf
|
||||
valuesBufLen := len(valuesBuf)
|
||||
|
||||
var s string
|
||||
|
||||
if !bf.hasBucketConfig() {
|
||||
for i, v := range valuesEncoded {
|
||||
if i > 0 && valuesEncoded[i-1] == valuesEncoded[i] {
|
||||
valuesBuf = append(valuesBuf, s)
|
||||
continue
|
||||
}
|
||||
|
||||
n := unmarshalInt64(v)
|
||||
bufLen := len(buf)
|
||||
buf = marshalInt64String(buf, n)
|
||||
s = bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
valuesBuf = append(valuesBuf, s)
|
||||
}
|
||||
} else {
|
||||
bucketSizeInt := int64(bf.bucketSize)
|
||||
if bucketSizeInt == 0 {
|
||||
bucketSizeInt = 1
|
||||
}
|
||||
bucketOffsetInt := int64(bf.bucketOffset)
|
||||
|
||||
nPrev := int64(0)
|
||||
for i, v := range valuesEncoded {
|
||||
if i > 0 && valuesEncoded[i-1] == valuesEncoded[i] {
|
||||
valuesBuf = append(valuesBuf, s)
|
||||
continue
|
||||
}
|
||||
|
||||
n := unmarshalInt64(v)
|
||||
n -= bucketOffsetInt
|
||||
n -= n % bucketSizeInt
|
||||
n += bucketOffsetInt
|
||||
|
||||
if i > 0 && nPrev == n {
|
||||
valuesBuf = append(valuesBuf, s)
|
||||
continue
|
||||
}
|
||||
nPrev = n
|
||||
|
||||
bufLen := len(buf)
|
||||
buf = marshalInt64String(buf, n)
|
||||
s = bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
valuesBuf = append(valuesBuf, s)
|
||||
}
|
||||
}
|
||||
|
||||
br.valuesBuf = valuesBuf
|
||||
br.a.b = buf
|
||||
|
||||
return br.valuesBuf[valuesBufLen:]
|
||||
}
|
||||
|
||||
func (br *blockResult) getBucketedFloat64Values(valuesEncoded []string, bf *byStatsField) []string {
|
||||
buf := br.a.b
|
||||
valuesBuf := br.valuesBuf
|
||||
|
@ -1820,6 +1887,9 @@ func (c *blockResultColumn) getFloatValueAtRow(br *blockResult, rowIdx int) (flo
|
|||
case valueTypeUint64:
|
||||
v := valuesEncoded[rowIdx]
|
||||
return float64(unmarshalUint64(v)), true
|
||||
case valueTypeInt64:
|
||||
v := valuesEncoded[rowIdx]
|
||||
return float64(unmarshalInt64(v)), true
|
||||
case valueTypeFloat64:
|
||||
v := valuesEncoded[rowIdx]
|
||||
f := unmarshalFloat64(v)
|
||||
|
@ -1863,6 +1933,8 @@ func (c *blockResultColumn) sumLenValues(br *blockResult) uint64 {
|
|||
return c.sumLenStringValues(br)
|
||||
case valueTypeUint64:
|
||||
return c.sumLenStringValues(br)
|
||||
case valueTypeInt64:
|
||||
return c.sumLenStringValues(br)
|
||||
case valueTypeFloat64:
|
||||
return c.sumLenStringValues(br)
|
||||
case valueTypeIPv4:
|
||||
|
@ -1960,6 +2032,12 @@ func (c *blockResultColumn) sumValues(br *blockResult) (float64, int) {
|
|||
sum += float64(unmarshalUint64(v))
|
||||
}
|
||||
return sum, br.rowsLen
|
||||
case valueTypeInt64:
|
||||
sum := float64(0)
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
sum += float64(unmarshalInt64(v))
|
||||
}
|
||||
return sum, br.rowsLen
|
||||
case valueTypeFloat64:
|
||||
sum := float64(0)
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
|
|
|
@ -120,6 +120,8 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
matchUint32ByExactValue(bs, ch, bm, phraseLowercase, tokens)
|
||||
case valueTypeUint64:
|
||||
matchUint64ByExactValue(bs, ch, bm, phraseLowercase, tokens)
|
||||
case valueTypeInt64:
|
||||
matchInt64ByExactValue(bs, ch, bm, phraseLowercase, tokens)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64ByPhrase(bs, ch, bm, phraseLowercase, tokens)
|
||||
case valueTypeIPv4:
|
||||
|
|
|
@ -622,6 +622,71 @@ func TestFilterAnyCasePhrase(t *testing.T) {
|
|||
testFilterMatchForColumns(t, columns, pf, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
columns := []column{
|
||||
{
|
||||
name: "foo",
|
||||
values: []string{
|
||||
"1234",
|
||||
"0",
|
||||
"3454",
|
||||
"65536",
|
||||
"-12345678901",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// match
|
||||
pf := &filterAnyCasePhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "1234",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
|
||||
|
||||
pf = &filterAnyCasePhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "-12345678901",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
|
||||
|
||||
pf = &filterAnyCasePhrase{
|
||||
fieldName: "non-existing-column",
|
||||
phrase: "",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
|
||||
|
||||
// mismatch
|
||||
pf = &filterAnyCasePhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "bar",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", nil)
|
||||
|
||||
pf = &filterAnyCasePhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", nil)
|
||||
|
||||
pf = &filterAnyCasePhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "33",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", nil)
|
||||
|
||||
pf = &filterAnyCasePhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "12345678901234567890",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("float64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
|
|
@ -121,6 +121,8 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
matchUint32ByPrefix(bs, ch, bm, prefixLowercase)
|
||||
case valueTypeUint64:
|
||||
matchUint64ByPrefix(bs, ch, bm, prefixLowercase)
|
||||
case valueTypeInt64:
|
||||
matchInt64ByPrefix(bs, ch, bm, prefixLowercase)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64ByPrefix(bs, ch, bm, prefixLowercase, tokens)
|
||||
case valueTypeIPv4:
|
||||
|
|
|
@ -658,6 +658,71 @@ func TestFilterAnyCasePrefix(t *testing.T) {
|
|||
testFilterMatchForColumns(t, columns, fp, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
columns := []column{
|
||||
{
|
||||
name: "foo",
|
||||
values: []string{
|
||||
"1234",
|
||||
"0",
|
||||
"3454",
|
||||
"65536",
|
||||
"-12345678901",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// match
|
||||
fp := &filterAnyCasePrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "1234",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 4})
|
||||
|
||||
fp = &filterAnyCasePrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
|
||||
|
||||
fp = &filterAnyCasePrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "-12345678901",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fp, "foo", []int{4})
|
||||
|
||||
// mismatch
|
||||
fp = &filterAnyCasePrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "bar",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fp, "foo", nil)
|
||||
|
||||
fp = &filterAnyCasePrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "33",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fp, "foo", nil)
|
||||
|
||||
fp = &filterAnyCasePrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "12345678901234567890",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fp, "foo", nil)
|
||||
|
||||
fp = &filterAnyCasePrefix{
|
||||
fieldName: "non-existing-column",
|
||||
prefix: "",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fp, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("float64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
|
|
@ -85,6 +85,8 @@ func (fr *filterDayRange) applyToBlockResult(br *blockResult, bm *bitmap) {
|
|||
bm.resetBits()
|
||||
case valueTypeUint64:
|
||||
bm.resetBits()
|
||||
case valueTypeInt64:
|
||||
bm.resetBits()
|
||||
case valueTypeFloat64:
|
||||
bm.resetBits()
|
||||
case valueTypeIPv4:
|
||||
|
|
|
@ -125,6 +125,17 @@ func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) {
|
|||
n := unmarshalUint64(valuesEncoded[idx])
|
||||
return n == nNeeded
|
||||
})
|
||||
case valueTypeInt64:
|
||||
nNeeded, ok := tryParseInt64(value)
|
||||
if !ok {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := unmarshalInt64(valuesEncoded[idx])
|
||||
return n == nNeeded
|
||||
})
|
||||
case valueTypeFloat64:
|
||||
fNeeded, ok := tryParseFloat64Exact(value)
|
||||
if !ok {
|
||||
|
@ -208,6 +219,8 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
matchUint32ByExactValue(bs, ch, bm, value, tokens)
|
||||
case valueTypeUint64:
|
||||
matchUint64ByExactValue(bs, ch, bm, value, tokens)
|
||||
case valueTypeInt64:
|
||||
matchInt64ByExactValue(bs, ch, bm, value, tokens)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64ByExactValue(bs, ch, bm, value, tokens)
|
||||
case valueTypeIPv4:
|
||||
|
@ -327,6 +340,18 @@ func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchInt64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
|
||||
n, ok := tryParseInt64(phrase)
|
||||
if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
bb.B = encoding.MarshalInt64(bb.B, n)
|
||||
matchBinaryValue(bs, ch, bm, bb.B, tokens)
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
|
|
|
@ -84,6 +84,8 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
matchUint32ByExactPrefix(bs, ch, bm, prefix, tokens)
|
||||
case valueTypeUint64:
|
||||
matchUint64ByExactPrefix(bs, ch, bm, prefix, tokens)
|
||||
case valueTypeInt64:
|
||||
matchInt64ByExactPrefix(bs, ch, bm, prefix, tokens)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64ByExactPrefix(bs, ch, bm, prefix, tokens)
|
||||
case valueTypeIPv4:
|
||||
|
@ -222,6 +224,33 @@ func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchInt64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if prefix == "" {
|
||||
// An empty prefix matches all the values
|
||||
return
|
||||
}
|
||||
if len(tokens) > 0 {
|
||||
// Non-empty tokens means that the prefix contains at least two tokens.
|
||||
// Multiple tokens cannot match any uint value.
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
if prefix != "-" {
|
||||
n, ok := tryParseInt64(prefix)
|
||||
if !ok || n > int64(ch.maxValue) || n < int64(ch.minValue) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toInt64String(bs, bb, v)
|
||||
return matchExactPrefix(s, prefix)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchMinMaxExactPrefix(ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) bool {
|
||||
if prefix == "" {
|
||||
// An empty prefix matches all the values
|
||||
|
|
|
@ -430,6 +430,73 @@ func TestFilterExactPrefix(t *testing.T) {
|
|||
testFilterMatchForColumns(t, columns, fep, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
columns := []column{
|
||||
{
|
||||
name: "foo",
|
||||
values: []string{
|
||||
"123",
|
||||
"12",
|
||||
"32",
|
||||
"0",
|
||||
"0",
|
||||
"-12",
|
||||
"1",
|
||||
"-2",
|
||||
"3",
|
||||
"123456789012",
|
||||
"5",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// match
|
||||
fep := &filterExactPrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "12",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fep, "foo", []int{0, 1, 9})
|
||||
|
||||
fep = &filterExactPrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "-12",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fep, "foo", []int{5})
|
||||
|
||||
fep = &filterExactPrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "-",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fep, "foo", []int{5, 7})
|
||||
|
||||
fep = &filterExactPrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fep, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
|
||||
|
||||
// mismatch
|
||||
fep = &filterExactPrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "bar",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fep, "foo", nil)
|
||||
|
||||
fep = &filterExactPrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "1234567890123",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fep, "foo", nil)
|
||||
|
||||
fep = &filterExactPrefix{
|
||||
fieldName: "foo",
|
||||
prefix: "7",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fep, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("float64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
|
|
@ -412,6 +412,67 @@ func TestFilterExact(t *testing.T) {
|
|||
testFilterMatchForColumns(t, columns, fe, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
columns := []column{
|
||||
{
|
||||
name: "foo",
|
||||
values: []string{
|
||||
"123",
|
||||
"12",
|
||||
"32",
|
||||
"0",
|
||||
"0",
|
||||
"-12",
|
||||
"12345678901",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// match
|
||||
fe := &filterExact{
|
||||
fieldName: "foo",
|
||||
value: "12",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fe, "foo", []int{1})
|
||||
|
||||
fe = &filterExact{
|
||||
fieldName: "foo",
|
||||
value: "-12",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fe, "foo", []int{5})
|
||||
|
||||
fe = &filterExact{
|
||||
fieldName: "non-existing-column",
|
||||
value: "",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
|
||||
|
||||
// mismatch
|
||||
fe = &filterExact{
|
||||
fieldName: "foo",
|
||||
value: "bar",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fe, "foo", nil)
|
||||
|
||||
fe = &filterExact{
|
||||
fieldName: "foo",
|
||||
value: "",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fe, "foo", nil)
|
||||
|
||||
fe = &filterExact{
|
||||
fieldName: "foo",
|
||||
value: "33",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fe, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("float64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
|
|
@ -48,6 +48,9 @@ type filterIn struct {
|
|||
uint64ValuesOnce sync.Once
|
||||
uint64Values map[string]struct{}
|
||||
|
||||
int64ValuesOnce sync.Once
|
||||
int64Values map[string]struct{}
|
||||
|
||||
float64ValuesOnce sync.Once
|
||||
float64Values map[string]struct{}
|
||||
|
||||
|
@ -185,6 +188,11 @@ func (fi *filterIn) getUint64Values() map[string]struct{} {
|
|||
return fi.uint64Values
|
||||
}
|
||||
|
||||
func (fi *filterIn) getInt64Values() map[string]struct{} {
|
||||
fi.int64ValuesOnce.Do(fi.initInt64Values)
|
||||
return fi.int64Values
|
||||
}
|
||||
|
||||
func (fi *filterIn) initUint64Values() {
|
||||
values := fi.values
|
||||
m := make(map[string]struct{}, len(values))
|
||||
|
@ -202,6 +210,23 @@ func (fi *filterIn) initUint64Values() {
|
|||
fi.uint64Values = m
|
||||
}
|
||||
|
||||
func (fi *filterIn) initInt64Values() {
|
||||
values := fi.values
|
||||
m := make(map[string]struct{}, len(values))
|
||||
buf := make([]byte, 0, len(values)*8)
|
||||
for _, v := range values {
|
||||
n, ok := tryParseInt64(v)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
bufLen := len(buf)
|
||||
buf = encoding.MarshalInt64(buf, n)
|
||||
s := bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
m[s] = struct{}{}
|
||||
}
|
||||
fi.int64Values = m
|
||||
}
|
||||
|
||||
func (fi *filterIn) getFloat64Values() map[string]struct{} {
|
||||
fi.float64ValuesOnce.Do(fi.initFloat64Values)
|
||||
return fi.float64Values
|
||||
|
@ -320,6 +345,9 @@ func (fi *filterIn) applyToBlockResult(br *blockResult, bm *bitmap) {
|
|||
case valueTypeUint64:
|
||||
binValues := fi.getUint64Values()
|
||||
matchColumnByBinValues(br, bm, c, binValues)
|
||||
case valueTypeInt64:
|
||||
binValues := fi.getInt64Values()
|
||||
matchColumnByBinValues(br, bm, c, binValues)
|
||||
case valueTypeFloat64:
|
||||
binValues := fi.getFloat64Values()
|
||||
matchColumnByBinValues(br, bm, c, binValues)
|
||||
|
@ -407,6 +435,9 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
case valueTypeUint64:
|
||||
binValues := fi.getUint64Values()
|
||||
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
|
||||
case valueTypeInt64:
|
||||
binValues := fi.getInt64Values()
|
||||
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
|
||||
case valueTypeFloat64:
|
||||
binValues := fi.getFloat64Values()
|
||||
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
|
||||
|
|
|
@ -504,6 +504,67 @@ func TestFilterIn(t *testing.T) {
|
|||
testFilterMatchForColumns(t, columns, fi, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
columns := []column{
|
||||
{
|
||||
name: "foo",
|
||||
values: []string{
|
||||
"123",
|
||||
"12",
|
||||
"-32",
|
||||
"0",
|
||||
"0",
|
||||
"12",
|
||||
"12345678901",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// match
|
||||
fi := &filterIn{
|
||||
fieldName: "foo",
|
||||
values: []string{"12", "-32"},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fi, "foo", []int{1, 2, 5})
|
||||
|
||||
fi = &filterIn{
|
||||
fieldName: "foo",
|
||||
values: []string{"0"},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fi, "foo", []int{3, 4})
|
||||
|
||||
fi = &filterIn{
|
||||
fieldName: "non-existing-column",
|
||||
values: []string{""},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fi, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
|
||||
|
||||
// mismatch
|
||||
fi = &filterIn{
|
||||
fieldName: "foo",
|
||||
values: []string{"bar"},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fi, "foo", nil)
|
||||
|
||||
fi = &filterIn{
|
||||
fieldName: "foo",
|
||||
values: []string{},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fi, "foo", nil)
|
||||
|
||||
fi = &filterIn{
|
||||
fieldName: "foo",
|
||||
values: []string{"33"},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fi, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("float64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
|
|
@ -77,6 +77,8 @@ func (fr *filterIPv4Range) applyToBlockResult(br *blockResult, bm *bitmap) {
|
|||
bm.resetBits()
|
||||
case valueTypeUint64:
|
||||
bm.resetBits()
|
||||
case valueTypeInt64:
|
||||
bm.resetBits()
|
||||
case valueTypeFloat64:
|
||||
bm.resetBits()
|
||||
case valueTypeIPv4:
|
||||
|
@ -131,6 +133,8 @@ func (fr *filterIPv4Range) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
bm.resetBits()
|
||||
case valueTypeUint64:
|
||||
bm.resetBits()
|
||||
case valueTypeInt64:
|
||||
bm.resetBits()
|
||||
case valueTypeFloat64:
|
||||
bm.resetBits()
|
||||
case valueTypeIPv4:
|
||||
|
|
|
@ -310,6 +310,37 @@ func TestFilterIPv4Range(t *testing.T) {
|
|||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
columns := []column{
|
||||
{
|
||||
name: "foo",
|
||||
values: []string{
|
||||
"123",
|
||||
"12",
|
||||
"32",
|
||||
"0",
|
||||
"0",
|
||||
"-12345678901",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// mismatch
|
||||
fr := &filterIPv4Range{
|
||||
fieldName: "foo",
|
||||
minValue: 0,
|
||||
maxValue: 0xffffffff,
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("float64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
|
|
@ -88,6 +88,12 @@ func (fr *filterLenRange) applyToBlockResult(br *blockResult, bm *bitmap) {
|
|||
return
|
||||
}
|
||||
matchColumnByLenRange(br, bm, c, minLen, maxLen)
|
||||
case valueTypeInt64:
|
||||
if minLen > 21 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByLenRange(br, bm, c, minLen, maxLen)
|
||||
case valueTypeFloat64:
|
||||
if minLen > 24 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
|
@ -156,6 +162,8 @@ func (fr *filterLenRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
matchUint32ByLenRange(bs, ch, bm, minLen, maxLen)
|
||||
case valueTypeUint64:
|
||||
matchUint64ByLenRange(bs, ch, bm, minLen, maxLen)
|
||||
case valueTypeInt64:
|
||||
matchInt64ByLenRange(bs, ch, bm, minLen, maxLen)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64ByLenRange(bs, ch, bm, minLen, maxLen)
|
||||
case valueTypeIPv4:
|
||||
|
@ -293,6 +301,33 @@ func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchInt64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
|
||||
if minLen > 21 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
|
||||
bb := bbPool.Get()
|
||||
|
||||
bb.B = marshalInt64String(bb.B[:0], int64(ch.minValue))
|
||||
maxvLen := len(bb.B)
|
||||
bb.B = marshalInt64String(bb.B[:0], int64(ch.maxValue))
|
||||
if len(bb.B) > maxvLen {
|
||||
maxvLen = len(bb.B)
|
||||
}
|
||||
if uint64(maxvLen) < minLen {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toInt64String(bs, bb, v)
|
||||
return matchLenRange(s, minLen, maxLen)
|
||||
})
|
||||
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchLenRange(s string, minLen, maxLen uint64) bool {
|
||||
sLen := uint64(utf8.RuneCountInString(s))
|
||||
return sLen >= minLen && sLen <= maxLen
|
||||
|
|
|
@ -344,6 +344,52 @@ func TestFilterLenRange(t *testing.T) {
|
|||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
columns := []column{
|
||||
{
|
||||
name: "foo",
|
||||
values: []string{
|
||||
"123456789012",
|
||||
"12",
|
||||
"32",
|
||||
"0",
|
||||
"0",
|
||||
"12",
|
||||
"-1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// match
|
||||
fr := &filterLenRange{
|
||||
fieldName: "foo",
|
||||
minLen: 2,
|
||||
maxLen: 2,
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 2, 5, 6})
|
||||
|
||||
// mismatch
|
||||
fr = &filterLenRange{
|
||||
fieldName: "foo",
|
||||
minLen: 0,
|
||||
maxLen: 0,
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
|
||||
fr = &filterLenRange{
|
||||
fieldName: "foo",
|
||||
minLen: 20,
|
||||
maxLen: 20,
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("float64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
|
|
@ -95,6 +95,8 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
matchUint32ByExactValue(bs, ch, bm, phrase, tokens)
|
||||
case valueTypeUint64:
|
||||
matchUint64ByExactValue(bs, ch, bm, phrase, tokens)
|
||||
case valueTypeInt64:
|
||||
matchInt64ByExactValue(bs, ch, bm, phrase, tokens)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64ByPhrase(bs, ch, bm, phrase, tokens)
|
||||
case valueTypeIPv4:
|
||||
|
@ -401,6 +403,13 @@ func applyToBlockResultGeneric(br *blockResult, bm *bitmap, fieldName, phrase st
|
|||
return
|
||||
}
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
case valueTypeInt64:
|
||||
_, ok := tryParseInt64(phrase)
|
||||
if !ok {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
case valueTypeFloat64:
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
case valueTypeIPv4:
|
||||
|
|
|
@ -627,6 +627,71 @@ func TestFilterPhrase(t *testing.T) {
|
|||
testFilterMatchForColumns(t, columns, pf, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
columns := []column{
|
||||
{
|
||||
name: "foo",
|
||||
values: []string{
|
||||
"1234",
|
||||
"0",
|
||||
"3454",
|
||||
"65536",
|
||||
"-12345678901",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// match
|
||||
pf := &filterPhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "1234",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", []int{0})
|
||||
|
||||
pf = &filterPhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "-12345678901",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", []int{4})
|
||||
|
||||
pf = &filterPhrase{
|
||||
fieldName: "non-existing-column",
|
||||
phrase: "",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
|
||||
|
||||
// mismatch
|
||||
pf = &filterPhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "bar",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", nil)
|
||||
|
||||
pf = &filterPhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", nil)
|
||||
|
||||
pf = &filterPhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "33",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", nil)
|
||||
|
||||
pf = &filterPhrase{
|
||||
fieldName: "foo",
|
||||
phrase: "12345678901234567890",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, pf, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("float64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
|
|
@ -90,6 +90,8 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
matchUint32ByPrefix(bs, ch, bm, prefix)
|
||||
case valueTypeUint64:
|
||||
matchUint64ByPrefix(bs, ch, bm, prefix)
|
||||
case valueTypeInt64:
|
||||
matchInt64ByPrefix(bs, ch, bm, prefix)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64ByPrefix(bs, ch, bm, prefix, tokens)
|
||||
case valueTypeIPv4:
|
||||
|
@ -286,6 +288,31 @@ func matchUint64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix s
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchInt64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) {
|
||||
if prefix == "" {
|
||||
// Fast path - all the int64 values match an empty prefix aka `*`
|
||||
return
|
||||
}
|
||||
// The prefix may contain a part of the number.
|
||||
// For example, `foo:12*` must match `12` and `123`.
|
||||
// This means we cannot search in binary representation of numbers.
|
||||
// Instead, we need searching for the whole prefix in string representation of numbers :(
|
||||
if prefix != "-" {
|
||||
n, ok := tryParseInt64(prefix)
|
||||
if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
}
|
||||
// There is no need in matching against bloom filters, since tokens is empty.
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toInt64String(bs, bb, v)
|
||||
return matchPrefix(s, prefix)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchPrefix(s, prefix string) bool {
|
||||
if len(prefix) == 0 {
|
||||
// Special case - empty prefix matches any string.
|
||||
|
@ -368,3 +395,12 @@ func toUint64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string
|
|||
bb.B = marshalUint64String(bb.B[:0], n)
|
||||
return bytesutil.ToUnsafeString(bb.B)
|
||||
}
|
||||
|
||||
func toInt64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
|
||||
if len(v) != 8 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of int64 number; got %d; want 8", bs.partPath(), len(v))
|
||||
}
|
||||
n := unmarshalInt64(v)
|
||||
bb.B = marshalInt64String(bb.B[:0], n)
|
||||
return bytesutil.ToUnsafeString(bb.B)
|
||||
}
|
||||
|
|
|
@ -123,6 +123,18 @@ func (fr *filterRange) applyToBlockResult(br *blockResult, bm *bitmap) {
|
|||
n := unmarshalUint64(v)
|
||||
return n >= minValueUint && n <= maxValueUint
|
||||
})
|
||||
case valueTypeInt64:
|
||||
minValueInt, maxValueInt := toInt64Range(minValue, maxValue)
|
||||
if minValueInt > int64(c.maxValue) || maxValueInt < int64(c.minValue) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := valuesEncoded[idx]
|
||||
n := unmarshalInt64(v)
|
||||
return n >= minValueInt && n <= maxValueInt
|
||||
})
|
||||
case valueTypeFloat64:
|
||||
if minValue > math.Float64frombits(c.maxValue) || maxValue < math.Float64frombits(c.minValue) {
|
||||
bm.resetBits()
|
||||
|
@ -202,6 +214,8 @@ func (fr *filterRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
matchUint32ByRange(bs, ch, bm, minValue, maxValue)
|
||||
case valueTypeUint64:
|
||||
matchUint64ByRange(bs, ch, bm, minValue, maxValue)
|
||||
case valueTypeInt64:
|
||||
matchInt64ByRange(bs, ch, bm, minValue, maxValue)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64ByRange(bs, ch, bm, minValue, maxValue)
|
||||
case valueTypeIPv4:
|
||||
|
@ -316,6 +330,23 @@ func matchUint64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchInt64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) {
|
||||
minValueInt, maxValueInt := toInt64Range(minValue, maxValue)
|
||||
if minValueInt > int64(ch.maxValue) || maxValueInt < int64(ch.minValue) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
if len(v) != 8 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of int64 number; got %d; want 8", bs.partPath(), len(v))
|
||||
}
|
||||
n := unmarshalInt64(v)
|
||||
return n >= minValueInt && n <= maxValueInt
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchTimestampISO8601ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) {
|
||||
minValueInt, maxValueInt := toInt64Range(minValue, maxValue)
|
||||
if maxValue < 0 || minValueInt > int64(ch.maxValue) || maxValueInt < int64(ch.minValue) {
|
||||
|
|
|
@ -493,6 +493,80 @@ func TestFilterRange(t *testing.T) {
|
|||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
columns := []column{
|
||||
{
|
||||
name: "foo",
|
||||
values: []string{
|
||||
"123",
|
||||
"12",
|
||||
"-32",
|
||||
"0",
|
||||
"0",
|
||||
"12345678901",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// match
|
||||
fr := &filterRange{
|
||||
fieldName: "foo",
|
||||
minValue: -inf,
|
||||
maxValue: 3,
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 4, 6, 7, 8})
|
||||
|
||||
fr = &filterRange{
|
||||
fieldName: "foo",
|
||||
minValue: -10,
|
||||
maxValue: 2.9,
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{3, 4, 6, 7})
|
||||
|
||||
fr = &filterRange{
|
||||
fieldName: "foo",
|
||||
minValue: -1e18,
|
||||
maxValue: 2.9,
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 4, 6, 7})
|
||||
|
||||
fr = &filterRange{
|
||||
fieldName: "foo",
|
||||
minValue: 1000,
|
||||
maxValue: inf,
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{5})
|
||||
|
||||
// mismatch
|
||||
fr = &filterRange{
|
||||
fieldName: "foo",
|
||||
minValue: -1,
|
||||
maxValue: -0.1,
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
|
||||
fr = &filterRange{
|
||||
fieldName: "foo",
|
||||
minValue: 0.1,
|
||||
maxValue: 0.9,
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
|
||||
fr = &filterRange{
|
||||
fieldName: "foo",
|
||||
minValue: 2.9,
|
||||
maxValue: 0.1,
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("float64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
|
|
@ -111,6 +111,8 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
matchUint32ByRegexp(bs, ch, bm, re, tokens)
|
||||
case valueTypeUint64:
|
||||
matchUint64ByRegexp(bs, ch, bm, re, tokens)
|
||||
case valueTypeInt64:
|
||||
matchInt64ByRegexp(bs, ch, bm, re, tokens)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64ByRegexp(bs, ch, bm, re, tokens)
|
||||
case valueTypeIPv4:
|
||||
|
@ -235,3 +237,16 @@ func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
|
|||
})
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchInt64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toInt64String(bs, bb, v)
|
||||
return re.MatchString(s)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
|
|
@ -121,6 +121,8 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
matchUint32BySequence(bs, ch, bm, phrases, tokens)
|
||||
case valueTypeUint64:
|
||||
matchUint64BySequence(bs, ch, bm, phrases, tokens)
|
||||
case valueTypeInt64:
|
||||
matchInt64BySequence(bs, ch, bm, phrases, tokens)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64BySequence(bs, ch, bm, phrases, tokens)
|
||||
case valueTypeIPv4:
|
||||
|
@ -245,6 +247,14 @@ func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
|
|||
matchUint64ByExactValue(bs, ch, bm, phrases[0], tokens)
|
||||
}
|
||||
|
||||
func matchInt64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
|
||||
if len(phrases) > 1 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchInt64ByExactValue(bs, ch, bm, phrases[0], tokens)
|
||||
}
|
||||
|
||||
func matchSequence(s string, phrases []string) bool {
|
||||
for _, phrase := range phrases {
|
||||
n := getPhrasePos(s, phrase)
|
||||
|
|
|
@ -581,6 +581,85 @@ func TestFilterSequence(t *testing.T) {
|
|||
testFilterMatchForColumns(t, columns, fs, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
columns := []column{
|
||||
{
|
||||
name: "foo",
|
||||
values: []string{
|
||||
"123",
|
||||
"12",
|
||||
"-32",
|
||||
"0",
|
||||
"0",
|
||||
"12",
|
||||
"12345678901",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// match
|
||||
fs := &filterSequence{
|
||||
fieldName: "foo",
|
||||
phrases: []string{"12"},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fs, "foo", []int{1, 5})
|
||||
|
||||
fs = &filterSequence{
|
||||
fieldName: "foo",
|
||||
phrases: []string{"-32"},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fs, "foo", []int{2})
|
||||
|
||||
fs = &filterSequence{
|
||||
fieldName: "foo",
|
||||
phrases: []string{},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
|
||||
|
||||
fs = &filterSequence{
|
||||
fieldName: "foo",
|
||||
phrases: []string{""},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
|
||||
|
||||
fs = &filterSequence{
|
||||
fieldName: "non-existing-column",
|
||||
phrases: []string{""},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
|
||||
|
||||
// mismatch
|
||||
fs = &filterSequence{
|
||||
fieldName: "foo",
|
||||
phrases: []string{"bar"},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fs, "foo", nil)
|
||||
|
||||
fs = &filterSequence{
|
||||
fieldName: "foo",
|
||||
phrases: []string{"", "bar"},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fs, "foo", nil)
|
||||
|
||||
fs = &filterSequence{
|
||||
fieldName: "foo",
|
||||
phrases: []string{"1234"},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fs, "foo", nil)
|
||||
|
||||
fs = &filterSequence{
|
||||
fieldName: "foo",
|
||||
phrases: []string{"1234", "567"},
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fs, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("float64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
|
|
@ -95,6 +95,8 @@ func (fs *filterStream) applyToBlockResult(br *blockResult, bm *bitmap) {
|
|||
bm.resetBits()
|
||||
case valueTypeUint64:
|
||||
bm.resetBits()
|
||||
case valueTypeInt64:
|
||||
bm.resetBits()
|
||||
case valueTypeFloat64:
|
||||
bm.resetBits()
|
||||
case valueTypeIPv4:
|
||||
|
|
|
@ -112,6 +112,8 @@ func (fs *filterStreamID) applyToBlockResult(br *blockResult, bm *bitmap) {
|
|||
bm.resetBits()
|
||||
case valueTypeUint64:
|
||||
bm.resetBits()
|
||||
case valueTypeInt64:
|
||||
bm.resetBits()
|
||||
case valueTypeFloat64:
|
||||
bm.resetBits()
|
||||
case valueTypeIPv4:
|
||||
|
|
|
@ -82,6 +82,8 @@ func (fr *filterStringRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
matchUint32ByStringRange(bs, ch, bm, minValue, maxValue)
|
||||
case valueTypeUint64:
|
||||
matchUint64ByStringRange(bs, ch, bm, minValue, maxValue)
|
||||
case valueTypeInt64:
|
||||
matchInt64ByStringRange(bs, ch, bm, minValue, maxValue)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64ByStringRange(bs, ch, bm, minValue, maxValue)
|
||||
case valueTypeIPv4:
|
||||
|
@ -206,6 +208,21 @@ func matchUint64ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, min
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchStringRange(s, minValue, maxValue string) bool {
|
||||
return !lessString(s, minValue) && lessString(s, maxValue)
|
||||
func matchInt64ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue string) {
|
||||
if minValue != "-" && minValue > "9" || maxValue != "-" && maxValue < "0" {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toInt64String(bs, bb, v)
|
||||
return matchStringRange(s, minValue, maxValue)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchStringRange(s, minValue, maxValue string) bool {
|
||||
// Do not use lessString() here, since string_range() filter
|
||||
// works on plain strings without additional magic.
|
||||
return s >= minValue && s < maxValue
|
||||
}
|
||||
|
|
|
@ -212,8 +212,8 @@ func TestFilterStringRange(t *testing.T) {
|
|||
// match
|
||||
fr := &filterStringRange{
|
||||
fieldName: "foo",
|
||||
minValue: "33",
|
||||
maxValue: "500",
|
||||
minValue: "122",
|
||||
maxValue: "125",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
|
||||
|
||||
|
@ -265,8 +265,8 @@ func TestFilterStringRange(t *testing.T) {
|
|||
// match
|
||||
fr := &filterStringRange{
|
||||
fieldName: "foo",
|
||||
minValue: "33",
|
||||
maxValue: "555",
|
||||
minValue: "122",
|
||||
maxValue: "125",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
|
||||
|
||||
|
@ -318,8 +318,8 @@ func TestFilterStringRange(t *testing.T) {
|
|||
// match
|
||||
fr := &filterStringRange{
|
||||
fieldName: "foo",
|
||||
minValue: "33",
|
||||
maxValue: "555",
|
||||
minValue: "122",
|
||||
maxValue: "125",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
|
||||
|
||||
|
@ -371,10 +371,63 @@ func TestFilterStringRange(t *testing.T) {
|
|||
// match
|
||||
fr := &filterStringRange{
|
||||
fieldName: "foo",
|
||||
minValue: "33",
|
||||
maxValue: "5555",
|
||||
minValue: "122",
|
||||
maxValue: "125",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5})
|
||||
|
||||
// mismatch
|
||||
fr = &filterStringRange{
|
||||
fieldName: "foo",
|
||||
minValue: "a",
|
||||
maxValue: "b",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
|
||||
fr = &filterStringRange{
|
||||
fieldName: "foo",
|
||||
minValue: "100",
|
||||
maxValue: "101",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
|
||||
fr = &filterStringRange{
|
||||
fieldName: "foo",
|
||||
minValue: "5",
|
||||
maxValue: "33",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", nil)
|
||||
})
|
||||
|
||||
t.Run("int64", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
columns := []column{
|
||||
{
|
||||
name: "foo",
|
||||
values: []string{
|
||||
"123",
|
||||
"12",
|
||||
"-32",
|
||||
"0",
|
||||
"0",
|
||||
"12345678901",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// match
|
||||
fr := &filterStringRange{
|
||||
fieldName: "foo",
|
||||
minValue: "122",
|
||||
maxValue: "125",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5})
|
||||
|
||||
// mismatch
|
||||
fr = &filterStringRange{
|
||||
|
@ -424,10 +477,10 @@ func TestFilterStringRange(t *testing.T) {
|
|||
// match
|
||||
fr := &filterStringRange{
|
||||
fieldName: "foo",
|
||||
minValue: "33",
|
||||
maxValue: "555",
|
||||
minValue: "122",
|
||||
maxValue: "125",
|
||||
}
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
|
||||
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5})
|
||||
|
||||
// mismatch
|
||||
fr = &filterStringRange{
|
||||
|
|
|
@ -79,6 +79,8 @@ func (ft *filterTime) applyToBlockResult(br *blockResult, bm *bitmap) {
|
|||
bm.resetBits()
|
||||
case valueTypeUint64:
|
||||
bm.resetBits()
|
||||
case valueTypeInt64:
|
||||
bm.resetBits()
|
||||
case valueTypeFloat64:
|
||||
bm.resetBits()
|
||||
case valueTypeIPv4:
|
||||
|
|
|
@ -87,6 +87,8 @@ func (fr *filterWeekRange) applyToBlockResult(br *blockResult, bm *bitmap) {
|
|||
bm.resetBits()
|
||||
case valueTypeUint64:
|
||||
bm.resetBits()
|
||||
case valueTypeInt64:
|
||||
bm.resetBits()
|
||||
case valueTypeFloat64:
|
||||
bm.resetBits()
|
||||
case valueTypeIPv4:
|
||||
|
|
|
@ -892,31 +892,6 @@ func parseBySortFields(lex *lexer) ([]*bySortField, error) {
|
|||
}
|
||||
}
|
||||
|
||||
func tryParseInt64(s string) (int64, bool) {
|
||||
if len(s) == 0 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
isMinus := s[0] == '-'
|
||||
if isMinus {
|
||||
s = s[1:]
|
||||
}
|
||||
u64, ok := tryParseUint64(s)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
if !isMinus {
|
||||
if u64 > math.MaxInt64 {
|
||||
return 0, false
|
||||
}
|
||||
return int64(u64), true
|
||||
}
|
||||
if u64 > -math.MinInt64 {
|
||||
return 0, false
|
||||
}
|
||||
return -int64(u64), true
|
||||
}
|
||||
|
||||
func marshalJSONKeyValue(dst []byte, k, v string) []byte {
|
||||
dst = quicktemplate.AppendJSONString(dst, k, true)
|
||||
dst = append(dst, ':')
|
||||
|
|
|
@ -75,7 +75,8 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
|
|||
}
|
||||
}
|
||||
return 0
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
|
||||
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
scp.rowsCount += uint64(br.rowsLen)
|
||||
return 0
|
||||
default:
|
||||
|
@ -119,7 +120,8 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
|
|||
dictIdx := valuesEncoded[i][0]
|
||||
return c.dictValues[dictIdx] == ""
|
||||
})
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
|
||||
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
scp.rowsCount += uint64(br.rowsLen)
|
||||
return 0
|
||||
default:
|
||||
|
@ -167,7 +169,8 @@ func (scp *statsCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) i
|
|||
scp.rowsCount++
|
||||
}
|
||||
return 0
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
|
||||
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
scp.rowsCount++
|
||||
return 0
|
||||
default:
|
||||
|
|
|
@ -77,7 +77,8 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
|
|||
}
|
||||
}
|
||||
return 0
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
|
||||
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
return 0
|
||||
default:
|
||||
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
|
||||
|
@ -116,7 +117,8 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
|
|||
dictIdx := valuesEncoded[i][0]
|
||||
return c.dictValues[dictIdx] == ""
|
||||
})
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
|
||||
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
return 0
|
||||
default:
|
||||
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
|
||||
|
@ -165,7 +167,8 @@ func (scp *statsCountEmptyProcessor) updateStatsForRow(br *blockResult, rowIdx i
|
|||
scp.rowsCount++
|
||||
}
|
||||
return 0
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeInt64,
|
||||
valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
return 0
|
||||
default:
|
||||
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
|
||||
|
|
|
@ -123,6 +123,11 @@ func (smp *statsMaxProcessor) updateStateForColumn(br *blockResult, c *blockResu
|
|||
bb.B = marshalUint64String(bb.B[:0], c.maxValue)
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeInt64:
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalInt64String(bb.B[:0], int64(c.maxValue))
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeFloat64:
|
||||
f := math.Float64frombits(c.maxValue)
|
||||
bb := bbPool.Get()
|
||||
|
|
|
@ -125,6 +125,11 @@ func (smp *statsMinProcessor) updateStateForColumn(br *blockResult, c *blockResu
|
|||
bb.B = marshalUint64String(bb.B[:0], c.minValue)
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeInt64:
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalInt64String(bb.B[:0], int64(c.minValue))
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeFloat64:
|
||||
f := math.Float64frombits(c.minValue)
|
||||
bb := bbPool.Get()
|
||||
|
|
|
@ -150,6 +150,14 @@ func (sqp *statsQuantileProcessor) updateStateForColumn(br *blockResult, c *bloc
|
|||
stateSizeIncrease += h.update(bytesutil.ToUnsafeString(bb.B))
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
case valueTypeInt64:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
n := unmarshalInt64(v)
|
||||
bb.B = marshalInt64String(bb.B[:0], n)
|
||||
stateSizeIncrease += h.update(bytesutil.ToUnsafeString(bb.B))
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
case valueTypeFloat64:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
|
|
|
@ -90,6 +90,11 @@ func (smp *statsRowMaxProcessor) updateStatsForAllRows(br *blockResult) int {
|
|||
bb.B = marshalUint64String(bb.B[:0], c.maxValue)
|
||||
needUpdateState = smp.needUpdateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeInt64:
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalInt64String(bb.B[:0], int64(c.maxValue))
|
||||
needUpdateState = smp.needUpdateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeFloat64:
|
||||
f := math.Float64frombits(c.maxValue)
|
||||
bb := bbPool.Get()
|
||||
|
|
|
@ -90,6 +90,11 @@ func (smp *statsRowMinProcessor) updateStatsForAllRows(br *blockResult) int {
|
|||
bb.B = marshalUint64String(bb.B[:0], c.minValue)
|
||||
needUpdateState = smp.needUpdateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeInt64:
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalInt64String(bb.B[:0], int64(c.minValue))
|
||||
needUpdateState = smp.needUpdateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeFloat64:
|
||||
f := math.Float64frombits(c.minValue)
|
||||
bb := bbPool.Get()
|
||||
|
|
|
@ -44,6 +44,9 @@ const (
|
|||
// Every value occupies 8 bytes.
|
||||
valueTypeUint64 = valueType(6)
|
||||
|
||||
// int values in the range [-(2^63) ... 2^63-1] are encoded into valueTypeInt64.
|
||||
valueTypeInt64 = valueType(10)
|
||||
|
||||
// floating-point values are encoded into valueTypeFloat64.
|
||||
valueTypeFloat64 = valueType(7)
|
||||
|
||||
|
@ -71,6 +74,8 @@ func (t valueType) String() string {
|
|||
return "uint32"
|
||||
case valueTypeUint64:
|
||||
return "uint64"
|
||||
case valueTypeInt64:
|
||||
return "int64"
|
||||
case valueTypeFloat64:
|
||||
return "float64"
|
||||
case valueTypeIPv4:
|
||||
|
@ -122,6 +127,11 @@ func (ve *valuesEncoder) encode(values []string, dict *valuesDict) (valueType, u
|
|||
return vt, minValue, maxValue
|
||||
}
|
||||
|
||||
ve.buf, ve.values, vt, minValue, maxValue = tryIntEncoding(ve.buf[:0], ve.values[:0], values)
|
||||
if vt != valueTypeUnknown {
|
||||
return vt, minValue, maxValue
|
||||
}
|
||||
|
||||
ve.buf, ve.values, vt, minValue, maxValue = tryFloat64Encoding(ve.buf[:0], ve.values[:0], values)
|
||||
if vt != valueTypeUnknown {
|
||||
return vt, minValue, maxValue
|
||||
|
@ -231,6 +241,16 @@ func (vd *valuesDecoder) decodeInplace(values []string, vt valueType, dictValues
|
|||
dstBuf = marshalUint64String(dstBuf, n)
|
||||
values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
}
|
||||
case valueTypeInt64:
|
||||
for i, v := range values {
|
||||
if len(v) != 8 {
|
||||
return fmt.Errorf("unexpected value length for int64; got %d; want 8", len(v))
|
||||
}
|
||||
n := unmarshalInt64(v)
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = marshalInt64String(dstBuf, n)
|
||||
values[i] = bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
}
|
||||
case valueTypeFloat64:
|
||||
for i, v := range values {
|
||||
if len(v) != 8 {
|
||||
|
@ -550,6 +570,32 @@ func tryParseUint64(s string) (uint64, bool) {
|
|||
return n, true
|
||||
}
|
||||
|
||||
// tryParseInt64 parses s as int64 value.
|
||||
func tryParseInt64(s string) (int64, bool) {
|
||||
if len(s) == 0 {
|
||||
return 0, false
|
||||
}
|
||||
isMinus := s[0] == '-'
|
||||
if isMinus {
|
||||
s = s[1:]
|
||||
}
|
||||
n, ok := tryParseUint64(s)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
if n >= 1<<63 {
|
||||
if isMinus && n == 1<<63 {
|
||||
return -1 << 63, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
ni := int64(n)
|
||||
if isMinus {
|
||||
ni = -ni
|
||||
}
|
||||
return ni, true
|
||||
}
|
||||
|
||||
func tryIPv4Encoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
|
||||
u32s := encoding.GetUint32s(len(srcValues))
|
||||
defer encoding.PutUint32s(u32s)
|
||||
|
@ -710,7 +756,7 @@ func tryParseFloat64Internal(s string, isExact bool) (float64, bool) {
|
|||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
if isExact && n >= (1 << 53) {
|
||||
if isExact && n >= (1<<53) {
|
||||
// The integer cannot be represented as float64 without precision loss.
|
||||
return 0, false
|
||||
}
|
||||
|
@ -1034,6 +1080,33 @@ const (
|
|||
nsecsPerMicrosecond = 1e3
|
||||
)
|
||||
|
||||
func tryIntEncoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
|
||||
i64s := encoding.GetInt64s(len(srcValues))
|
||||
defer encoding.PutInt64s(i64s)
|
||||
a := i64s.A
|
||||
var minValue, maxValue int64
|
||||
for i, v := range srcValues {
|
||||
n, ok := tryParseInt64(v)
|
||||
if !ok {
|
||||
return dstBuf, dstValues, valueTypeUnknown, 0, 0
|
||||
}
|
||||
a[i] = n
|
||||
if i == 0 || n < minValue {
|
||||
minValue = n
|
||||
}
|
||||
if i == 0 || n > maxValue {
|
||||
maxValue = n
|
||||
}
|
||||
}
|
||||
for _, n := range a {
|
||||
dstLen := len(dstBuf)
|
||||
dstBuf = encoding.MarshalInt64(dstBuf, n)
|
||||
v := bytesutil.ToUnsafeString(dstBuf[dstLen:])
|
||||
dstValues = append(dstValues, v)
|
||||
}
|
||||
return dstBuf, dstValues, valueTypeInt64, uint64(minValue), uint64(maxValue)
|
||||
}
|
||||
|
||||
func tryUintEncoding(dstBuf []byte, dstValues, srcValues []string) ([]byte, []string, valueType, uint64, uint64) {
|
||||
u64s := encoding.GetUint64s(len(srcValues))
|
||||
defer encoding.PutUint64s(u64s)
|
||||
|
@ -1210,6 +1283,11 @@ func unmarshalUint64(v string) uint64 {
|
|||
return encoding.UnmarshalUint64(b)
|
||||
}
|
||||
|
||||
func unmarshalInt64(v string) int64 {
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
return encoding.UnmarshalInt64(b)
|
||||
}
|
||||
|
||||
func unmarshalFloat64(v string) float64 {
|
||||
n := unmarshalUint64(v)
|
||||
return math.Float64frombits(n)
|
||||
|
@ -1257,6 +1335,10 @@ func marshalUint64String(dst []byte, n uint64) []byte {
|
|||
return strconv.AppendUint(dst, n, 10)
|
||||
}
|
||||
|
||||
func marshalInt64String(dst []byte, n int64) []byte {
|
||||
return strconv.AppendInt(dst, n, 10)
|
||||
}
|
||||
|
||||
func marshalFloat64String(dst []byte, f float64) []byte {
|
||||
return strconv.AppendFloat(dst, f, 'f', -1, 64)
|
||||
}
|
||||
|
|
|
@ -714,6 +714,60 @@ func TestTryParseUint64_Failure(t *testing.T) {
|
|||
|
||||
// invalid value
|
||||
f("foo")
|
||||
f("1.2")
|
||||
f("1e3")
|
||||
}
|
||||
|
||||
func TestTryParseInt64_Success(t *testing.T) {
|
||||
f := func(s string, resultExpected int64) {
|
||||
t.Helper()
|
||||
|
||||
result, ok := tryParseInt64(s)
|
||||
if !ok {
|
||||
t.Fatalf("cannot parse %q", s)
|
||||
}
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected value; got %d; want %d", result, resultExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("0", 0)
|
||||
f("-0", 0)
|
||||
f("123", 123)
|
||||
f("-123", -123)
|
||||
f("1345678901234567890", 1345678901234567890)
|
||||
f("-1_345_678_901_234_567_890", -1345678901234567890)
|
||||
|
||||
// the maximum possible value
|
||||
f("9223372036854775807", 9223372036854775807)
|
||||
|
||||
// the minimum possible value
|
||||
f("-9223372036854775808", -9223372036854775808)
|
||||
}
|
||||
|
||||
func TestTryParseInt64_Failure(t *testing.T) {
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
|
||||
_, ok := tryParseInt64(s)
|
||||
if ok {
|
||||
t.Fatalf("expecting error when parsing %q", s)
|
||||
}
|
||||
}
|
||||
|
||||
// empty value
|
||||
f("")
|
||||
|
||||
// too big value
|
||||
f("9223372036854775808")
|
||||
|
||||
// too small value
|
||||
f("-9223372036854775809")
|
||||
|
||||
// invalid value
|
||||
f("foo")
|
||||
f("1.2")
|
||||
f("1e3")
|
||||
}
|
||||
|
||||
func TestMarshalUint8String(t *testing.T) {
|
||||
|
|
Loading…
Reference in a new issue