mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
wip
This commit is contained in:
parent
4e9790bc6a
commit
6b63f65baf
5 changed files with 233 additions and 138 deletions
|
@ -1182,12 +1182,18 @@ and then by [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/
|
||||||
_time:5m | sort by (_stream, _time)
|
_time:5m | sort by (_stream, _time)
|
||||||
```
|
```
|
||||||
|
|
||||||
Add `desc` after the given log field in order to sort in reverse order. For example, the folliwng query sorts log fields in reverse order of `request_duration_seconds` field:
|
Add `desc` after the given log field in order to sort in reverse order of this field. For example, the following query sorts log fields in reverse order of `request_duration_seconds` field:
|
||||||
|
|
||||||
```logsql
|
```logsql
|
||||||
_time:5m | sort by (request_duration_seconds desc)
|
_time:5m | sort by (request_duration_seconds desc)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The reverse order can be applied globally via `desc` keyword after `by(...)` clause:
|
||||||
|
|
||||||
|
```logsql
|
||||||
|
_time:5m | sort by (foo, bar) desc
|
||||||
|
```
|
||||||
|
|
||||||
Note that sorting of big number of logs can be slow and can consume a lot of additional memory.
|
Note that sorting of big number of logs can be slow and can consume a lot of additional memory.
|
||||||
It is recommended limiting the number of logs before sorting with the following approaches:
|
It is recommended limiting the number of logs before sorting with the following approaches:
|
||||||
|
|
||||||
|
|
|
@ -94,10 +94,14 @@ func (br *blockResult) cloneValues(values []string) []string {
|
||||||
valuesBufLen := len(valuesBuf)
|
valuesBufLen := len(valuesBuf)
|
||||||
|
|
||||||
for _, v := range values {
|
for _, v := range values {
|
||||||
|
if len(valuesBuf) > 0 && v == valuesBuf[len(valuesBuf)-1] {
|
||||||
|
valuesBuf = append(valuesBuf, v)
|
||||||
|
} else {
|
||||||
bufLen := len(buf)
|
bufLen := len(buf)
|
||||||
buf = append(buf, v...)
|
buf = append(buf, v...)
|
||||||
valuesBuf = append(valuesBuf, bytesutil.ToUnsafeString(buf[bufLen:]))
|
valuesBuf = append(valuesBuf, bytesutil.ToUnsafeString(buf[bufLen:]))
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
br.valuesBuf = valuesBuf
|
br.valuesBuf = valuesBuf
|
||||||
br.buf = buf
|
br.buf = buf
|
||||||
|
@ -149,14 +153,16 @@ func (br *blockResult) setResultColumns(rcs []resultColumn) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (br *blockResult) fetchAllColumns(bs *blockSearch, bm *bitmap) {
|
func (br *blockResult) fetchAllColumns(bs *blockSearch, bm *bitmap) {
|
||||||
|
// Add _time column
|
||||||
|
br.addTimeColumn()
|
||||||
|
|
||||||
|
// Add _stream column
|
||||||
if !br.addStreamColumn(bs) {
|
if !br.addStreamColumn(bs) {
|
||||||
// Skip the current block, since the associated stream tags are missing.
|
// Skip the current block, since the associated stream tags are missing.
|
||||||
br.reset()
|
br.reset()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
br.addTimeColumn()
|
|
||||||
|
|
||||||
// Add _msg column
|
// Add _msg column
|
||||||
v := bs.csh.getConstColumnValue("_msg")
|
v := bs.csh.getConstColumnValue("_msg")
|
||||||
if v != "" {
|
if v != "" {
|
||||||
|
@ -246,10 +252,13 @@ func (br *blockResult) addColumn(bs *blockSearch, ch *columnHeader, bm *bitmap)
|
||||||
var dictValues []string
|
var dictValues []string
|
||||||
|
|
||||||
appendValue := func(v string) {
|
appendValue := func(v string) {
|
||||||
|
if len(valuesBuf) > 0 && v == valuesBuf[len(valuesBuf)-1] {
|
||||||
|
valuesBuf = append(valuesBuf, v)
|
||||||
|
} else {
|
||||||
bufLen := len(buf)
|
bufLen := len(buf)
|
||||||
buf = append(buf, v...)
|
buf = append(buf, v...)
|
||||||
s := bytesutil.ToUnsafeString(buf[bufLen:])
|
valuesBuf = append(valuesBuf, bytesutil.ToUnsafeString(buf[bufLen:]))
|
||||||
valuesBuf = append(valuesBuf, s)
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch ch.valueType {
|
switch ch.valueType {
|
||||||
|
@ -1679,9 +1688,15 @@ func (rc *resultColumn) resetKeepName() {
|
||||||
|
|
||||||
// addValue adds the given values v to rc.
|
// addValue adds the given values v to rc.
|
||||||
func (rc *resultColumn) addValue(v string) {
|
func (rc *resultColumn) addValue(v string) {
|
||||||
|
values := rc.values
|
||||||
|
if len(values) > 0 && string(v) == values[len(values)-1] {
|
||||||
|
rc.values = append(rc.values, values[len(values)-1])
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
bufLen := len(rc.buf)
|
bufLen := len(rc.buf)
|
||||||
rc.buf = append(rc.buf, v...)
|
rc.buf = append(rc.buf, v...)
|
||||||
rc.values = append(rc.values, bytesutil.ToUnsafeString(rc.buf[bufLen:]))
|
rc.values = append(values, bytesutil.ToUnsafeString(rc.buf[bufLen:]))
|
||||||
}
|
}
|
||||||
|
|
||||||
var nan = math.NaN()
|
var nan = math.NaN()
|
||||||
|
|
|
@ -207,18 +207,19 @@ func (q *Query) getNeededColumns() []string {
|
||||||
|
|
||||||
pipes := q.pipes
|
pipes := q.pipes
|
||||||
for i := len(pipes) - 1; i >= 0; i-- {
|
for i := len(pipes) - 1; i >= 0; i-- {
|
||||||
neededFields, m := pipes[i].getNeededFields()
|
neededFields, mapping := pipes[i].getNeededFields()
|
||||||
neededFields = normalizeFields(neededFields)
|
neededFields = normalizeFields(neededFields)
|
||||||
|
|
||||||
referredFields := make(map[string]int)
|
referredFields := make(map[string]int)
|
||||||
for _, a := range m {
|
for _, inFields := range mapping {
|
||||||
for _, f := range a {
|
for _, f := range inFields {
|
||||||
referredFields[f]++
|
referredFields[f]++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for k := range dropFields {
|
for k := range dropFields {
|
||||||
for _, f := range m[k] {
|
inFields := mapping[k]
|
||||||
|
for _, f := range inFields {
|
||||||
referredFields[f]--
|
referredFields[f]--
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -228,7 +229,7 @@ func (q *Query) getNeededColumns() []string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dropFieldsNext := make(map[string]struct{})
|
dropFieldsNext := make(map[string]struct{})
|
||||||
for k := range m {
|
for k := range mapping {
|
||||||
if k != "*" && referredFields[k] == 0 {
|
if k != "*" && referredFields[k] == 0 {
|
||||||
dropFieldsNext[k] = struct{}{}
|
dropFieldsNext[k] = struct{}{}
|
||||||
}
|
}
|
||||||
|
@ -252,33 +253,27 @@ func (q *Query) getNeededColumns() []string {
|
||||||
if len(neededFields) == 0 {
|
if len(neededFields) == 0 {
|
||||||
input = nil
|
input = nil
|
||||||
}
|
}
|
||||||
if len(input) == 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// transform upper input fields to the current input fields according to the given mapping.
|
// transform upper input fields to the current input fields according to the given mapping.
|
||||||
if input[0] != "*" {
|
if len(input) == 0 || input[0] != "*" {
|
||||||
var dst []string
|
var dst []string
|
||||||
for _, f := range input {
|
for _, f := range input {
|
||||||
if a, ok := m[f]; ok {
|
if a, ok := mapping[f]; ok {
|
||||||
dst = append(dst, a...)
|
dst = append(dst, a...)
|
||||||
} else {
|
} else {
|
||||||
dst = append(dst, f)
|
dst = append(dst, f)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if a, ok := m["*"]; ok {
|
if a, ok := mapping["*"]; ok {
|
||||||
dst = append(dst, a...)
|
dst = append(dst, a...)
|
||||||
}
|
}
|
||||||
input = normalizeFields(dst)
|
input = normalizeFields(dst)
|
||||||
if len(input) == 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// intersect neededFields with input
|
// intersect neededFields with input
|
||||||
if neededFields[0] != "*" {
|
if len(neededFields) == 0 || neededFields[0] != "*" {
|
||||||
clear(dropFields)
|
clear(dropFields)
|
||||||
if input[0] == "*" {
|
if len(input) > 0 && input[0] == "*" {
|
||||||
input = neededFields
|
input = neededFields
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -336,7 +331,7 @@ func ParseQuery(s string) (*Query, error) {
|
||||||
q.pipes = pipes
|
q.pipes = pipes
|
||||||
|
|
||||||
if !lex.isEnd() {
|
if !lex.isEnd() {
|
||||||
return nil, fmt.Errorf("unexpected unparsed tail; context: [%s]", lex.context())
|
return nil, fmt.Errorf("unexpected unparsed tail; context: [%s]; tail: [%s]", lex.context(), lex.s)
|
||||||
}
|
}
|
||||||
|
|
||||||
return q, nil
|
return q, nil
|
||||||
|
|
|
@ -929,9 +929,12 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||||
f(`* | stats by(_time:1d offset -2.5h5m) count() as foo`, `* | stats by (_time:1d offset -2.5h5m) count(*) as foo`)
|
f(`* | stats by(_time:1d offset -2.5h5m) count() as foo`, `* | stats by (_time:1d offset -2.5h5m) count(*) as foo`)
|
||||||
|
|
||||||
// sort pipe
|
// sort pipe
|
||||||
|
f(`* | sort`, `* | sort`)
|
||||||
|
f(`* | sort desc`, `* | sort desc`)
|
||||||
|
f(`* | sort by()`, `* | sort`)
|
||||||
f(`* | sort bY (foo)`, `* | sort by (foo)`)
|
f(`* | sort bY (foo)`, `* | sort by (foo)`)
|
||||||
f(`* | sORt bY (_time, _stream DEsc, host)`, `* | sort by (_time, _stream desc, host)`)
|
f(`* | sORt bY (_time, _stream DEsc, host)`, `* | sort by (_time, _stream desc, host)`)
|
||||||
f(`* | sort bY (foo, bar,)`, `* | sort by (foo, bar)`)
|
f(`* | sort bY (foo desc, bar,) desc`, `* | sort by (foo desc, bar) desc`)
|
||||||
|
|
||||||
// multiple different pipes
|
// multiple different pipes
|
||||||
f(`* | fields foo, bar | limit 100 | stats by(foo,bar) count(baz) as qwert`, `* | fields foo, bar | limit 100 | stats by (foo, bar) count(baz) as qwert`)
|
f(`* | fields foo, bar | limit 100 | stats by(foo,bar) count(baz) as qwert`, `* | fields foo, bar | limit 100 | stats by (foo, bar) count(baz) as qwert`)
|
||||||
|
@ -1245,13 +1248,12 @@ func TestParseQueryFailure(t *testing.T) {
|
||||||
f(`foo | stats by(bar)`)
|
f(`foo | stats by(bar)`)
|
||||||
|
|
||||||
// invalid sort pipe
|
// invalid sort pipe
|
||||||
f(`foo | sort`)
|
|
||||||
f(`foo | sort bar`)
|
f(`foo | sort bar`)
|
||||||
f(`foo | sort by`)
|
f(`foo | sort by`)
|
||||||
f(`foo | sort by(`)
|
f(`foo | sort by(`)
|
||||||
f(`foo | sort by()`)
|
|
||||||
f(`foo | sort by(baz`)
|
f(`foo | sort by(baz`)
|
||||||
f(`foo | sort by(baz,`)
|
f(`foo | sort by(baz,`)
|
||||||
|
f(`foo | sort by(bar) foo`)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNormalizeFields(t *testing.T) {
|
func TestNormalizeFields(t *testing.T) {
|
||||||
|
|
|
@ -5,12 +5,13 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -20,25 +21,37 @@ import (
|
||||||
type pipeSort struct {
|
type pipeSort struct {
|
||||||
// byFields contains field names for sorting from 'by(...)' clause.
|
// byFields contains field names for sorting from 'by(...)' clause.
|
||||||
byFields []*bySortField
|
byFields []*bySortField
|
||||||
|
|
||||||
|
// whether to apply descending order
|
||||||
|
isDesc bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ps *pipeSort) String() string {
|
func (ps *pipeSort) String() string {
|
||||||
if len(ps.byFields) == 0 {
|
s := "sort"
|
||||||
logger.Panicf("BUG: pipeSort must contain at least a single byField")
|
if len(ps.byFields) > 0 {
|
||||||
}
|
|
||||||
|
|
||||||
a := make([]string, len(ps.byFields))
|
a := make([]string, len(ps.byFields))
|
||||||
for i := range ps.byFields {
|
for i := range ps.byFields {
|
||||||
a[i] = ps.byFields[i].String()
|
a[i] = ps.byFields[i].String()
|
||||||
}
|
}
|
||||||
s := "sort by (" + strings.Join(a, ", ") + ")"
|
s += " by (" + strings.Join(a, ", ") + ")"
|
||||||
|
}
|
||||||
|
if ps.isDesc {
|
||||||
|
s += " desc"
|
||||||
|
}
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ps *pipeSort) getNeededFields() ([]string, map[string][]string) {
|
func (ps *pipeSort) getNeededFields() ([]string, map[string][]string) {
|
||||||
fields := make([]string, len(ps.byFields))
|
byFields := ps.byFields
|
||||||
for i, bf := range ps.byFields {
|
|
||||||
|
if len(byFields) == 0 {
|
||||||
|
return []string{"*"}, map[string][]string{
|
||||||
|
"*": {"*"},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fields := make([]string, len(byFields))
|
||||||
|
for i, bf := range byFields {
|
||||||
fields[i] = bf.name
|
fields[i] = bf.name
|
||||||
}
|
}
|
||||||
m := map[string][]string{
|
m := map[string][]string{
|
||||||
|
@ -96,12 +109,6 @@ type pipeSortProcessorShardNopad struct {
|
||||||
// ps point to the parent pipeSort.
|
// ps point to the parent pipeSort.
|
||||||
ps *pipeSort
|
ps *pipeSort
|
||||||
|
|
||||||
// u64ValuesBuf holds uint64 values parsed from values for speeding up the sorting.
|
|
||||||
u64ValuesBuf []uint64
|
|
||||||
|
|
||||||
// f64ValuesBuf holds float64 values parsed from values for speeding up the sorting.
|
|
||||||
f64ValuesBuf []float64
|
|
||||||
|
|
||||||
// blocks holds all the blocks with logs written to the shard.
|
// blocks holds all the blocks with logs written to the shard.
|
||||||
blocks []sortBlock
|
blocks []sortBlock
|
||||||
|
|
||||||
|
@ -135,8 +142,8 @@ type sortBlockByColumn struct {
|
||||||
// c contains column data
|
// c contains column data
|
||||||
c blockResultColumn
|
c blockResultColumn
|
||||||
|
|
||||||
// u64Values contains uint64 numbers parsed from values
|
// i64Values contains int64 numbers parsed from values
|
||||||
u64Values []uint64
|
i64Values []int64
|
||||||
|
|
||||||
// f64Values contains float64 numbers parsed from values
|
// f64Values contains float64 numbers parsed from values
|
||||||
f64Values []float64
|
f64Values []float64
|
||||||
|
@ -151,11 +158,11 @@ type sortRowRef struct {
|
||||||
rowIdx int
|
rowIdx int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *sortBlockByColumn) getU64ValueAtRow(rowIdx int) uint64 {
|
func (c *sortBlockByColumn) getI64ValueAtRow(rowIdx int) int64 {
|
||||||
if c.c.isConst {
|
if c.c.isConst {
|
||||||
return c.u64Values[0]
|
return c.i64Values[0]
|
||||||
}
|
}
|
||||||
return c.u64Values[rowIdx]
|
return c.i64Values[rowIdx]
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *sortBlockByColumn) getF64ValueAtRow(rowIdx int) float64 {
|
func (c *sortBlockByColumn) getF64ValueAtRow(rowIdx int) float64 {
|
||||||
|
@ -169,9 +176,47 @@ func (c *sortBlockByColumn) getF64ValueAtRow(rowIdx int) float64 {
|
||||||
func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
|
func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
|
||||||
// clone br, so it could be owned by shard
|
// clone br, so it could be owned by shard
|
||||||
br = br.clone()
|
br = br.clone()
|
||||||
|
cs := br.getColumns()
|
||||||
|
|
||||||
byFields := shard.ps.byFields
|
byFields := shard.ps.byFields
|
||||||
|
if len(byFields) == 0 {
|
||||||
|
// Sort by all the columns
|
||||||
|
|
||||||
|
// Generate byColumns
|
||||||
|
var rc resultColumn
|
||||||
|
bb := bbPool.Get()
|
||||||
|
for i := range br.timestamps {
|
||||||
|
// JSON-encode all the columns per each row into a single string
|
||||||
|
// and sort rows by the resulting string.
|
||||||
|
bb.B = bb.B[:0]
|
||||||
|
for j := range cs {
|
||||||
|
c := &cs[j]
|
||||||
|
v := c.getValueAtRow(br, i)
|
||||||
|
bb.B = marshalJSONKeyValue(bb.B, c.name, v)
|
||||||
|
bb.B = append(bb.B, ',')
|
||||||
|
}
|
||||||
|
rc.addValue(bytesutil.ToUnsafeString(bb.B))
|
||||||
|
}
|
||||||
|
bbPool.Put(bb)
|
||||||
|
byColumns := []sortBlockByColumn{
|
||||||
|
{
|
||||||
|
c: blockResultColumn{
|
||||||
|
valueType: valueTypeString,
|
||||||
|
encodedValues: rc.values,
|
||||||
|
},
|
||||||
|
i64Values: make([]int64, len(br.timestamps)),
|
||||||
|
f64Values: make([]float64, len(br.timestamps)),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
shard.stateSizeBudget -= int(unsafe.Sizeof(byColumns[0]))
|
||||||
|
|
||||||
|
// Append br to shard.blocks.
|
||||||
|
shard.blocks = append(shard.blocks, sortBlock{
|
||||||
|
br: br,
|
||||||
|
byColumns: byColumns,
|
||||||
|
otherColumns: cs,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
// Collect values for columns from byFields.
|
// Collect values for columns from byFields.
|
||||||
byColumns := make([]sortBlockByColumn, len(byFields))
|
byColumns := make([]sortBlockByColumn, len(byFields))
|
||||||
for i, bf := range byFields {
|
for i, bf := range byFields {
|
||||||
|
@ -180,26 +225,24 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
|
||||||
bc.c = c
|
bc.c = c
|
||||||
|
|
||||||
if c.isTime {
|
if c.isTime {
|
||||||
// Do not initialize bc.values, bc.u64Values and bc.f64Values, since they aren't used.
|
// Do not initialize bc.i64Values and bc.f64Values, since they aren't used.
|
||||||
// This saves some memory.
|
// This saves some memory.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if c.isConst {
|
if c.isConst {
|
||||||
// Do not initialize bc.values in order to save some memory.
|
bc.i64Values = shard.createInt64Values(c.encodedValues)
|
||||||
bc.u64Values = shard.createUint64Values(c.encodedValues)
|
|
||||||
bc.f64Values = shard.createFloat64Values(c.encodedValues)
|
bc.f64Values = shard.createFloat64Values(c.encodedValues)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// pre-populate values in order to track better br memory usage
|
// pre-populate values in order to track better br memory usage
|
||||||
values := c.getValues(br)
|
values := c.getValues(br)
|
||||||
bc.u64Values = shard.createUint64Values(values)
|
bc.i64Values = shard.createInt64Values(values)
|
||||||
bc.f64Values = shard.createFloat64Values(values)
|
bc.f64Values = shard.createFloat64Values(values)
|
||||||
}
|
}
|
||||||
shard.stateSizeBudget -= len(byColumns) * int(unsafe.Sizeof(byColumns[0]))
|
shard.stateSizeBudget -= len(byColumns) * int(unsafe.Sizeof(byColumns[0]))
|
||||||
|
|
||||||
// Collect values for other columns.
|
// Collect values for other columns.
|
||||||
cs := br.getColumns()
|
|
||||||
otherColumns := make([]blockResultColumn, 0, len(cs))
|
otherColumns := make([]blockResultColumn, 0, len(cs))
|
||||||
for _, c := range cs {
|
for _, c := range cs {
|
||||||
isByField := false
|
isByField := false
|
||||||
|
@ -215,8 +258,19 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
|
||||||
}
|
}
|
||||||
shard.stateSizeBudget -= len(otherColumns) * int(unsafe.Sizeof(otherColumns[0]))
|
shard.stateSizeBudget -= len(otherColumns) * int(unsafe.Sizeof(otherColumns[0]))
|
||||||
|
|
||||||
|
// Append br to shard.blocks.
|
||||||
|
shard.blocks = append(shard.blocks, sortBlock{
|
||||||
|
br: br,
|
||||||
|
byColumns: byColumns,
|
||||||
|
otherColumns: otherColumns,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
shard.stateSizeBudget -= br.sizeBytes()
|
||||||
|
shard.stateSizeBudget -= int(unsafe.Sizeof(shard.blocks[0]))
|
||||||
|
|
||||||
// Add row references to rowRefs.
|
// Add row references to rowRefs.
|
||||||
blockIdx := len(shard.blocks)
|
blockIdx := len(shard.blocks) - 1
|
||||||
rowRefs := shard.rowRefs
|
rowRefs := shard.rowRefs
|
||||||
rowRefsLen := len(rowRefs)
|
rowRefsLen := len(rowRefs)
|
||||||
for i := range br.timestamps {
|
for i := range br.timestamps {
|
||||||
|
@ -227,53 +281,40 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
|
||||||
}
|
}
|
||||||
shard.rowRefs = rowRefs
|
shard.rowRefs = rowRefs
|
||||||
shard.stateSizeBudget -= (len(rowRefs) - rowRefsLen) * int(unsafe.Sizeof(rowRefs[0]))
|
shard.stateSizeBudget -= (len(rowRefs) - rowRefsLen) * int(unsafe.Sizeof(rowRefs[0]))
|
||||||
|
|
||||||
// Append br to shard.blocks.
|
|
||||||
shard.blocks = append(shard.blocks, sortBlock{
|
|
||||||
br: br,
|
|
||||||
byColumns: byColumns,
|
|
||||||
otherColumns: otherColumns,
|
|
||||||
})
|
|
||||||
shard.stateSizeBudget -= br.sizeBytes()
|
|
||||||
shard.stateSizeBudget -= int(unsafe.Sizeof(shard.blocks[0]))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (shard *pipeSortProcessorShard) createUint64Values(values []string) []uint64 {
|
func (shard *pipeSortProcessorShard) createInt64Values(values []string) []int64 {
|
||||||
u64ValuesBuf := shard.u64ValuesBuf
|
a := make([]int64, len(values))
|
||||||
u64ValuesBufLen := len(u64ValuesBuf)
|
for i, v := range values {
|
||||||
for _, v := range values {
|
i64, ok := tryParseInt64(v)
|
||||||
u64, ok := tryParseUint64(v)
|
|
||||||
if ok {
|
if ok {
|
||||||
u64ValuesBuf = append(u64ValuesBuf, u64)
|
a[i] = i64
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
u32, _ := tryParseIPv4(v)
|
u32, _ := tryParseIPv4(v)
|
||||||
u64ValuesBuf = append(u64ValuesBuf, uint64(u32))
|
a[i] = int64(u32)
|
||||||
// Do not try parsing timestamp and duration, since they may be negative.
|
// Do not try parsing timestamp and duration, since they may be negative.
|
||||||
// This breaks sorting.
|
// This breaks sorting.
|
||||||
}
|
}
|
||||||
shard.u64ValuesBuf = u64ValuesBuf
|
|
||||||
|
|
||||||
shard.stateSizeBudget -= (len(u64ValuesBuf) - u64ValuesBufLen) * int(unsafe.Sizeof(u64ValuesBuf[0]))
|
shard.stateSizeBudget -= len(a) * int(unsafe.Sizeof(a[0]))
|
||||||
|
|
||||||
return u64ValuesBuf[u64ValuesBufLen:]
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
func (shard *pipeSortProcessorShard) createFloat64Values(values []string) []float64 {
|
func (shard *pipeSortProcessorShard) createFloat64Values(values []string) []float64 {
|
||||||
f64ValuesBuf := shard.f64ValuesBuf
|
a := make([]float64, len(values))
|
||||||
f64ValuesBufLen := len(f64ValuesBuf)
|
for i, v := range values {
|
||||||
for _, v := range values {
|
|
||||||
f, ok := tryParseFloat64(v)
|
f, ok := tryParseFloat64(v)
|
||||||
if !ok {
|
if !ok {
|
||||||
f = nan
|
f = nan
|
||||||
}
|
}
|
||||||
f64ValuesBuf = append(f64ValuesBuf, f)
|
a[i] = f
|
||||||
}
|
}
|
||||||
shard.f64ValuesBuf = f64ValuesBuf
|
|
||||||
|
|
||||||
shard.stateSizeBudget -= (len(f64ValuesBuf) - f64ValuesBufLen) * int(unsafe.Sizeof(f64ValuesBuf[0]))
|
shard.stateSizeBudget -= len(a) * int(unsafe.Sizeof(a[0]))
|
||||||
|
|
||||||
return f64ValuesBuf[f64ValuesBufLen:]
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
func (psp *pipeSortProcessorShard) Len() int {
|
func (psp *pipeSortProcessorShard) Len() int {
|
||||||
|
@ -435,7 +476,7 @@ func (wctx *pipeSortWriteContext) writeRow(shard *pipeSortProcessorShard, rowIdx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !areEqualColumns {
|
if !areEqualColumns {
|
||||||
// send the current block to bbBase and construct new columns
|
// send the current block to bbBase and construct a block with new set of columns
|
||||||
wctx.flush()
|
wctx.flush()
|
||||||
|
|
||||||
rcs = wctx.rcs[:0]
|
rcs = wctx.rcs[:0]
|
||||||
|
@ -454,7 +495,7 @@ func (wctx *pipeSortWriteContext) writeRow(shard *pipeSortProcessorShard, rowIdx
|
||||||
|
|
||||||
br := b.br
|
br := b.br
|
||||||
byColumns := b.byColumns
|
byColumns := b.byColumns
|
||||||
for i := range byColumns {
|
for i := range byFields {
|
||||||
v := byColumns[i].c.getValueAtRow(br, rr.rowIdx)
|
v := byColumns[i].c.getValueAtRow(br, rr.rowIdx)
|
||||||
rcs[i].addValue(v)
|
rcs[i].addValue(v)
|
||||||
wctx.valuesLen += len(v)
|
wctx.valuesLen += len(v)
|
||||||
|
@ -532,7 +573,10 @@ func sortBlockLess(shardA *pipeSortProcessorShard, rowIdxA int, shardB *pipeSort
|
||||||
for idx := range bA.byColumns {
|
for idx := range bA.byColumns {
|
||||||
cA := &bA.byColumns[idx]
|
cA := &bA.byColumns[idx]
|
||||||
cB := &bB.byColumns[idx]
|
cB := &bB.byColumns[idx]
|
||||||
bf := byFields[idx]
|
isDesc := len(byFields) > 0 && byFields[idx].isDesc
|
||||||
|
if shardA.ps.isDesc {
|
||||||
|
isDesc = !isDesc
|
||||||
|
}
|
||||||
|
|
||||||
if cA.c.isConst && cB.c.isConst {
|
if cA.c.isConst && cB.c.isConst {
|
||||||
// Fast path - compare const values
|
// Fast path - compare const values
|
||||||
|
@ -546,7 +590,7 @@ func sortBlockLess(shardA *pipeSortProcessorShard, rowIdxA int, shardB *pipeSort
|
||||||
if tA == tB {
|
if tA == tB {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if bf.isDesc {
|
if isDesc {
|
||||||
return tB < tA
|
return tB < tA
|
||||||
}
|
}
|
||||||
return tA < tB
|
return tA < tB
|
||||||
|
@ -560,14 +604,14 @@ func sortBlockLess(shardA *pipeSortProcessorShard, rowIdxA int, shardB *pipeSort
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try sorting by uint64 values at first
|
// Try sorting by int64 values at first
|
||||||
uA := cA.getU64ValueAtRow(rrA.rowIdx)
|
uA := cA.getI64ValueAtRow(rrA.rowIdx)
|
||||||
uB := cB.getU64ValueAtRow(rrB.rowIdx)
|
uB := cB.getI64ValueAtRow(rrB.rowIdx)
|
||||||
if uA != 0 && uB != 0 {
|
if uA != 0 && uB != 0 {
|
||||||
if uA == uB {
|
if uA == uB {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if bf.isDesc {
|
if isDesc {
|
||||||
return uB < uA
|
return uB < uA
|
||||||
}
|
}
|
||||||
return uA < uB
|
return uA < uB
|
||||||
|
@ -580,7 +624,7 @@ func sortBlockLess(shardA *pipeSortProcessorShard, rowIdxA int, shardB *pipeSort
|
||||||
if fA == fB {
|
if fA == fB {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if bf.isDesc {
|
if isDesc {
|
||||||
return fB < fA
|
return fB < fA
|
||||||
}
|
}
|
||||||
return fA < fB
|
return fA < fB
|
||||||
|
@ -592,7 +636,7 @@ func sortBlockLess(shardA *pipeSortProcessorShard, rowIdxA int, shardB *pipeSort
|
||||||
if sA == sB {
|
if sA == sB {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if bf.isDesc {
|
if isDesc {
|
||||||
return sB < sA
|
return sB < sA
|
||||||
}
|
}
|
||||||
return sA < sB
|
return sA < sB
|
||||||
|
@ -605,19 +649,23 @@ func parsePipeSort(lex *lexer) (*pipeSort, error) {
|
||||||
return nil, fmt.Errorf("expecting 'sort'; got %q", lex.token)
|
return nil, fmt.Errorf("expecting 'sort'; got %q", lex.token)
|
||||||
}
|
}
|
||||||
lex.nextToken()
|
lex.nextToken()
|
||||||
if !lex.isKeyword("by") {
|
|
||||||
return nil, fmt.Errorf("expecting 'by'; got %q", lex.token)
|
var ps pipeSort
|
||||||
}
|
if lex.isKeyword("by") {
|
||||||
lex.nextToken()
|
lex.nextToken()
|
||||||
bfs, err := parseBySortFields(lex)
|
bfs, err := parseBySortFields(lex)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)
|
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)
|
||||||
}
|
}
|
||||||
|
ps.byFields = bfs
|
||||||
ps := &pipeSort{
|
|
||||||
byFields: bfs,
|
|
||||||
}
|
}
|
||||||
return ps, nil
|
|
||||||
|
if lex.isKeyword("desc") {
|
||||||
|
lex.nextToken()
|
||||||
|
ps.isDesc = true
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ps, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// bySortField represents 'by (...)' part of the pipeSort.
|
// bySortField represents 'by (...)' part of the pipeSort.
|
||||||
|
@ -646,9 +694,6 @@ func parseBySortFields(lex *lexer) ([]*bySortField, error) {
|
||||||
lex.nextToken()
|
lex.nextToken()
|
||||||
if lex.isKeyword(")") {
|
if lex.isKeyword(")") {
|
||||||
lex.nextToken()
|
lex.nextToken()
|
||||||
if len(bfs) == 0 {
|
|
||||||
return nil, fmt.Errorf("sort fields list cannot be empty")
|
|
||||||
}
|
|
||||||
return bfs, nil
|
return bfs, nil
|
||||||
}
|
}
|
||||||
fieldName, err := parseFieldName(lex)
|
fieldName, err := parseFieldName(lex)
|
||||||
|
@ -673,3 +718,35 @@ func parseBySortFields(lex *lexer) ([]*bySortField, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func marshalJSONKeyValue(dst []byte, k, v string) []byte {
|
||||||
|
dst = strconv.AppendQuote(dst, k)
|
||||||
|
dst = append(dst, ':')
|
||||||
|
dst = strconv.AppendQuote(dst, v)
|
||||||
|
return dst
|
||||||
|
}
|
||||||
|
|
||||||
|
func tryParseInt64(s string) (int64, bool) {
|
||||||
|
if len(s) == 0 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
|
isMinus := s[0] == '-'
|
||||||
|
if isMinus {
|
||||||
|
s = s[1:]
|
||||||
|
}
|
||||||
|
u64, ok := tryParseUint64(s)
|
||||||
|
if !ok {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
if !isMinus {
|
||||||
|
if u64 > math.MaxInt64 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return int64(u64), true
|
||||||
|
}
|
||||||
|
if u64 > -math.MinInt64 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return -int64(u64), true
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue