This commit is contained in:
Aliaksandr Valialkin 2024-04-30 23:03:34 +02:00
parent f7dad8bd61
commit 56c115c455
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
13 changed files with 787 additions and 315 deletions

37
lib/encoding/float.go Normal file
View file

@ -0,0 +1,37 @@
package encoding
import (
"sync"
)
// GetFloat64s returns a slice of float64 values with the given size.
//
// When the returned slice is no longer needed, it is advised calling PutFloat64s() on it,
// so it could be re-used.
func GetFloat64s(size int) *Float64s {
v := float64sPool.Get()
if v == nil {
v = &Float64s{}
}
a := v.(*Float64s)
if n := size - cap(a.A); n > 0 {
a.A = append(a.A[:cap(a.A)], make([]float64, n)...)
}
a.A = a.A[:size]
return a
}
// PutFloat64s returns a to the pool, so it can be re-used via GetFloat64s.
//
// The a cannot be used after returning to the pull.
func PutFloat64s(a *Float64s) {
a.A = a.A[:0]
float64sPool.Put(a)
}
var float64sPool sync.Pool
// Float64s holds an array of float64 values.
type Float64s struct {
A []float64
}

View file

@ -1,6 +1,7 @@
package logstorage package logstorage
import ( import (
"math"
"strconv" "strconv"
"sync" "sync"
"time" "time"
@ -274,25 +275,26 @@ func (ih *indexBlockHeader) mustReadBlockHeaders(dst []blockHeader, p *part) []b
} }
type blockResult struct { type blockResult struct {
buf []byte // buf holds all the bytes behind the requested column values in the block.
buf []byte
// values holds all the requested column values in the block.
valuesBuf []string valuesBuf []string
// streamID is streamID for the given blockResult // streamID is streamID for the given blockResult.
streamID streamID streamID streamID
// cs contain values for the requested columns. // timestamps contain timestamps for the selected log entries in the block.
//
// The corresponding requested column names are stored at columnsNames.
cs []blockResultColumn
// timestamps contain timestamps for the selected log entries
timestamps []int64 timestamps []int64
// columnNamesBuf is used only if all the columns must be fetched. // csOffset contains cs offset for the requested columns.
columnNamesBuf []string //
// columns with indexes below csOffset are ignored.
// This is needed for simplifying data transformations at pipe stages.
csOffset int
// columnNames references the list of names for cs columns. // cs contains requested columns.
columnNames []string cs []blockResultColumn
} }
func (br *blockResult) reset() { func (br *blockResult) reset() {
@ -303,65 +305,60 @@ func (br *blockResult) reset() {
br.streamID.reset() br.streamID.reset()
br.timestamps = br.timestamps[:0]
br.csOffset = 0
cs := br.cs cs := br.cs
for i := range cs { for i := range cs {
cs[i].reset() cs[i].reset()
} }
br.cs = cs[:0] br.cs = cs[:0]
br.timestamps = br.timestamps[:0]
clear(br.columnNamesBuf)
br.columnNamesBuf = br.columnNamesBuf[:0]
br.columnNames = nil
} }
func (br *blockResult) fetchAllColumns(bs *blockSearch, bm *bitmap) { func (br *blockResult) fetchAllColumns(bs *blockSearch, bm *bitmap) {
// Add _stream column
br.columnNamesBuf = append(br.columnNamesBuf, "_stream")
if !br.addStreamColumn(bs) { if !br.addStreamColumn(bs) {
// Skip the current block, since the associated stream tags are missing. // Skip the current block, since the associated stream tags are missing.
br.reset() br.reset()
return return
} }
// Add _time column
br.columnNamesBuf = append(br.columnNamesBuf, "_time")
br.addTimeColumn() br.addTimeColumn()
// Add _msg column // Add _msg column
v := bs.csh.getConstColumnValue("_msg") v := bs.csh.getConstColumnValue("_msg")
if v != "" { if v != "" {
br.columnNamesBuf = append(br.columnNamesBuf, "_msg") br.addConstColumn("_msg", v)
br.addConstColumn(v)
} else if ch := bs.csh.getColumnHeader("_msg"); ch != nil { } else if ch := bs.csh.getColumnHeader("_msg"); ch != nil {
br.columnNamesBuf = append(br.columnNamesBuf, "_msg")
br.addColumn(bs, ch, bm) br.addColumn(bs, ch, bm)
} else {
br.addConstColumn("_msg", "")
} }
// Add other const columns
for _, cc := range bs.csh.constColumns { for _, cc := range bs.csh.constColumns {
if isMsgFieldName(cc.Name) { if isMsgFieldName(cc.Name) {
continue continue
} }
br.columnNamesBuf = append(br.columnNamesBuf, cc.Name) br.addConstColumn(cc.Name, cc.Value)
br.addConstColumn(cc.Value)
} }
// Add other non-const columns
chs := bs.csh.columnHeaders chs := bs.csh.columnHeaders
for i := range chs { for i := range chs {
ch := &chs[i] ch := &chs[i]
if isMsgFieldName(ch.name) { if isMsgFieldName(ch.name) {
continue continue
} }
br.columnNamesBuf = append(br.columnNamesBuf, ch.name)
br.addColumn(bs, ch, bm) br.addColumn(bs, ch, bm)
} }
br.columnNames = br.columnNamesBuf
} }
func (br *blockResult) fetchRequestedColumns(bs *blockSearch, bm *bitmap) { func (br *blockResult) fetchRequestedColumns(bs *blockSearch, bm *bitmap) {
for _, columnName := range bs.bsw.so.resultColumnNames { for _, columnName := range bs.bsw.so.resultColumnNames {
if columnName == "" {
columnName = "_msg"
}
switch columnName { switch columnName {
case "_stream": case "_stream":
if !br.addStreamColumn(bs) { if !br.addStreamColumn(bs) {
@ -374,22 +371,14 @@ func (br *blockResult) fetchRequestedColumns(bs *blockSearch, bm *bitmap) {
default: default:
v := bs.csh.getConstColumnValue(columnName) v := bs.csh.getConstColumnValue(columnName)
if v != "" { if v != "" {
br.addConstColumn(v) br.addConstColumn(columnName, v)
continue } else if ch := bs.csh.getColumnHeader(columnName); ch != nil {
}
ch := bs.csh.getColumnHeader(columnName)
if ch == nil {
br.addConstColumn("")
} else {
br.addColumn(bs, ch, bm) br.addColumn(bs, ch, bm)
} else {
br.addConstColumn(columnName, "")
} }
} }
} }
br.columnNames = bs.bsw.so.resultColumnNames
}
func (br *blockResult) RowsCount() int {
return len(br.timestamps)
} }
func (br *blockResult) mustInit(bs *blockSearch, bm *bitmap) { func (br *blockResult) mustInit(bs *blockSearch, bm *bitmap) {
@ -401,13 +390,17 @@ func (br *blockResult) mustInit(bs *blockSearch, bm *bitmap) {
// Nothing to initialize for zero matching log entries in the block. // Nothing to initialize for zero matching log entries in the block.
return return
} }
// Initialize timestamps, since they are used for determining the number of rows in br.RowsCount()
// Initialize timestamps, since they are required for all the further work with br.
srcTimestamps := bs.getTimestamps() srcTimestamps := bs.getTimestamps()
if bm.areAllBitsSet() { if bm.areAllBitsSet() {
// Fast path - all the rows in the block are selected, so copy all the timestamps without any filtering.
br.timestamps = append(br.timestamps[:0], srcTimestamps...) br.timestamps = append(br.timestamps[:0], srcTimestamps...)
return return
} }
// Slow path - copy only the needed timestamps to br according to filter results.
dstTimestamps := br.timestamps[:0] dstTimestamps := br.timestamps[:0]
bm.forEachSetBit(func(idx int) bool { bm.forEachSetBit(func(idx int) bool {
ts := srcTimestamps[idx] ts := srcTimestamps[idx]
@ -517,7 +510,12 @@ func (br *blockResult) addColumn(bs *blockSearch, ch *columnHeader, bm *bitmap)
} }
dictValues = valuesBuf[valuesBufLen:] dictValues = valuesBuf[valuesBufLen:]
name := ch.name
if name == "" {
name = "_msg"
}
br.cs = append(br.cs, blockResultColumn{ br.cs = append(br.cs, blockResultColumn{
name: name,
valueType: ch.valueType, valueType: ch.valueType,
dictValues: dictValues, dictValues: dictValues,
encodedValues: encodedValues, encodedValues: encodedValues,
@ -528,6 +526,7 @@ func (br *blockResult) addColumn(bs *blockSearch, ch *columnHeader, bm *bitmap)
func (br *blockResult) addTimeColumn() { func (br *blockResult) addTimeColumn() {
br.cs = append(br.cs, blockResultColumn{ br.cs = append(br.cs, blockResultColumn{
name: "_time",
isTime: true, isTime: true,
}) })
} }
@ -551,11 +550,11 @@ func (br *blockResult) addStreamColumn(bs *blockSearch) bool {
PutStreamTags(st) PutStreamTags(st)
s := bytesutil.ToUnsafeString(bb.B) s := bytesutil.ToUnsafeString(bb.B)
br.addConstColumn(s) br.addConstColumn("_stream", s)
return true return true
} }
func (br *blockResult) addConstColumn(value string) { func (br *blockResult) addConstColumn(name, value string) {
buf := br.buf buf := br.buf
bufLen := len(buf) bufLen := len(buf)
buf = append(buf, value...) buf = append(buf, value...)
@ -568,17 +567,198 @@ func (br *blockResult) addConstColumn(value string) {
br.valuesBuf = valuesBuf br.valuesBuf = valuesBuf
br.cs = append(br.cs, blockResultColumn{ br.cs = append(br.cs, blockResultColumn{
name: name,
isConst: true, isConst: true,
valueType: valueTypeUnknown,
encodedValues: valuesBuf[valuesBufLen:], encodedValues: valuesBuf[valuesBufLen:],
}) })
} }
// getColumnValues returns values for the column with the given idx. func (br *blockResult) updateColumns(columnNames []string) {
if br.areSameColumns(columnNames) {
// Fast path - nothing to change.
return
}
// Slow path - construct the requested columns
cs := br.cs
csOffset := len(cs)
for _, columnName := range columnNames {
c := br.getColumnByName(columnName)
cs = append(cs, c)
}
br.csOffset = csOffset
br.cs = cs
}
func (br *blockResult) areSameColumns(columnNames []string) bool {
cs := br.getColumns()
if len(cs) != len(columnNames) {
return false
}
for i := range cs {
if cs[i].name != columnNames[i] {
return false
}
}
return true
}
func (br *blockResult) getColumnByName(columnName string) blockResultColumn {
if columnName == "" {
columnName = "_msg"
}
cs := br.getColumns()
for i := range cs {
if cs[i].name == columnName {
return cs[i]
}
}
return blockResultColumn{
name: columnName,
isConst: true,
encodedValues: getEmptyStrings(1),
}
}
func (br *blockResult) getColumns() []blockResultColumn {
return br.cs[br.csOffset:]
}
func (br *blockResult) skipRows(skipRows int) {
br.timestamps = append(br.timestamps[:0], br.timestamps[skipRows:]...)
cs := br.getColumns()
for i := range cs {
c := &cs[i]
if c.values != nil {
c.values = append(c.values[:0], c.values[skipRows:]...)
}
if c.isConst {
continue
}
if c.encodedValues != nil {
c.encodedValues = append(c.encodedValues[:0], c.encodedValues[skipRows:]...)
}
}
}
func (br *blockResult) truncateRows(keepRows int) {
br.timestamps = br.timestamps[:keepRows]
cs := br.getColumns()
for i := range cs {
c := &cs[i]
if c.values != nil {
c.values = c.values[:keepRows]
}
if c.isConst {
continue
}
if c.encodedValues != nil {
c.encodedValues = c.encodedValues[:keepRows]
}
}
}
func (br *blockResult) appendColumnValues(dst [][]string, columnNames []string) [][]string {
for _, columnName := range columnNames {
c := br.getColumnByName(columnName)
values := c.getValues(br)
dst = append(dst, values)
}
return dst
}
type blockResultColumn struct {
// name is column name.
name string
// isConst is set to true if the column is const.
//
// The column value is stored in encodedValues[0]
isConst bool
// isTime is set to true if the column contains _time values.
//
// The column values are stored in blockResult.timestamps
isTime bool
// valueType is the type of non-cost value
valueType valueType
// dictValues contain dictionary values for valueTypeDict column
dictValues []string
// encodedValues contain encoded values for non-const column
encodedValues []string
// values contain decoded values after getValues() call for the given column
values []string
}
func (c *blockResultColumn) reset() {
c.name = ""
c.isConst = false
c.isTime = false
c.valueType = valueTypeUnknown
c.dictValues = nil
c.encodedValues = nil
c.values = nil
}
// getEncodedValues returns encoded values for the given column.
//
// The returned encoded values are valid until br.reset() is called.
func (c *blockResultColumn) getEncodedValues(br *blockResult) []string {
if c.encodedValues != nil {
return c.encodedValues
}
if !c.isTime {
logger.Panicf("BUG: encodedValues may be missing only for _time column; got %q column", c.name)
}
buf := br.buf
valuesBuf := br.valuesBuf
valuesBufLen := len(valuesBuf)
for _, timestamp := range br.timestamps {
bufLen := len(buf)
buf = encoding.MarshalInt64(buf, timestamp)
s := bytesutil.ToUnsafeString(buf[bufLen:])
valuesBuf = append(valuesBuf, s)
}
c.encodedValues = valuesBuf[valuesBufLen:]
br.valuesBuf = valuesBuf
br.buf = buf
return c.encodedValues
}
// getValueAtRow returns value for the value at the given rowIdx.
//
// The returned value is valid until br.reset() is called.
func (c *blockResultColumn) getValueAtRow(br *blockResult, rowIdx int) string {
if c.isConst {
// Fast path for const column.
return c.encodedValues[0]
}
if c.values != nil {
// Fast path, which avoids call overhead for getValues().
return c.values[rowIdx]
}
// Slow path - decode all the values and return the given value.
values := c.getValues(br)
return values[rowIdx]
}
// getValues returns values for the given column.
// //
// The returned values are valid until br.reset() is called. // The returned values are valid until br.reset() is called.
func (br *blockResult) getColumnValues(idx int) []string { func (c *blockResultColumn) getValues(br *blockResult) []string {
c := &br.cs[idx]
if c.values != nil { if c.values != nil {
return c.values return c.values
} }
@ -589,6 +769,13 @@ func (br *blockResult) getColumnValues(idx int) []string {
if c.isConst { if c.isConst {
v := c.encodedValues[0] v := c.encodedValues[0]
if v == "" {
// Fast path - return a slice of empty strings without constructing it.
c.values = getEmptyStrings(len(br.timestamps))
return c.values
}
// Slower path - construct slice of identical values with the len(br.timestamps)
for range br.timestamps { for range br.timestamps {
valuesBuf = append(valuesBuf, v) valuesBuf = append(valuesBuf, v)
} }
@ -694,35 +881,137 @@ func (br *blockResult) getColumnValues(idx int) []string {
return c.values return c.values
} }
type blockResultColumn struct { func (c *blockResultColumn) getFloatValueAtRow(rowIdx int) float64 {
// isConst is set to true if the column is const. if c.isConst {
// v := c.encodedValues[0]
// The column value is stored in encodedValues[0] f, _ := tryParseFloat64(v)
isConst bool return f
}
if c.isTime {
return 0
}
// isTime is set to true if the column contains _time values. switch c.valueType {
// case valueTypeString:
// The column values are stored in blockResult.timestamps f, _ := tryParseFloat64(c.encodedValues[rowIdx])
isTime bool return f
case valueTypeDict:
// valueType is the type of non-cost value dictIdx := c.encodedValues[rowIdx][0]
valueType valueType f, _ := tryParseFloat64(c.dictValues[dictIdx])
return f
// dictValues contain dictionary values for valueTypeDict column case valueTypeUint8:
dictValues []string return float64(c.encodedValues[rowIdx][0])
case valueTypeUint16:
// encodedValues contain encoded values for non-const column b := bytesutil.ToUnsafeBytes(c.encodedValues[rowIdx])
encodedValues []string return float64(encoding.UnmarshalUint16(b))
case valueTypeUint32:
// values contain decoded values after getColumnValues() call for the given column b := bytesutil.ToUnsafeBytes(c.encodedValues[rowIdx])
values []string return float64(encoding.UnmarshalUint32(b))
case valueTypeUint64:
b := bytesutil.ToUnsafeBytes(c.encodedValues[rowIdx])
return float64(encoding.UnmarshalUint64(b))
case valueTypeFloat64:
b := bytesutil.ToUnsafeBytes(c.encodedValues[rowIdx])
n := encoding.UnmarshalUint64(b)
return math.Float64frombits(n)
case valueTypeIPv4:
return 0
case valueTypeTimestampISO8601:
return 0
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
return 0
}
} }
func (c *blockResultColumn) reset() { func (c *blockResultColumn) sumValues(br *blockResult) float64 {
c.isConst = false if c.isConst {
c.isTime = false v := c.encodedValues[0]
c.valueType = valueTypeUnknown f, _ := tryParseFloat64(v)
c.dictValues = nil if f == 0 || math.IsNaN(f) {
c.encodedValues = nil return 0
c.values = nil }
return f * float64(len(br.timestamps))
}
if c.isTime {
return 0
}
switch c.valueType {
case valueTypeString:
sum := float64(0)
f := float64(0)
values := c.encodedValues
for i := range values {
if i == 0 || values[i-1] != values[i] {
f, _ = tryParseFloat64(values[i])
}
if !math.IsNaN(f) {
sum += f
}
}
return sum
case valueTypeDict:
a := encoding.GetFloat64s(len(c.dictValues))
dictValuesFloat := a.A
for i, v := range c.dictValues {
f, _ := tryParseFloat64(v)
if math.IsNaN(f) {
f = 0
}
dictValuesFloat[i] = f
}
sum := float64(0)
for _, v := range c.encodedValues {
dictIdx := v[0]
sum += dictValuesFloat[dictIdx]
}
encoding.PutFloat64s(a)
return sum
case valueTypeUint8:
sum := uint64(0)
for _, v := range c.encodedValues {
sum += uint64(v[0])
}
return float64(sum)
case valueTypeUint16:
sum := uint64(0)
for _, v := range c.encodedValues {
b := bytesutil.ToUnsafeBytes(v)
sum += uint64(encoding.UnmarshalUint16(b))
}
return float64(sum)
case valueTypeUint32:
sum := uint64(0)
for _, v := range c.encodedValues {
b := bytesutil.ToUnsafeBytes(v)
sum += uint64(encoding.UnmarshalUint32(b))
}
return float64(sum)
case valueTypeUint64:
sum := float64(0)
for _, v := range c.encodedValues {
b := bytesutil.ToUnsafeBytes(v)
sum += float64(encoding.UnmarshalUint64(b))
}
return sum
case valueTypeFloat64:
sum := float64(0)
for _, v := range c.encodedValues {
b := bytesutil.ToUnsafeBytes(v)
n := encoding.UnmarshalUint64(b)
f := math.Float64frombits(n)
if !math.IsNaN(f) {
sum += f
}
}
return sum
case valueTypeIPv4:
return 0
case valueTypeTimestampISO8601:
return 0
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
return 0
}
} }

View file

@ -203,10 +203,11 @@ func testFilterMatchForStorage(t *testing.T, s *Storage, tenantID TenantID, f fi
} }
// Verify columns // Verify columns
if len(br.cs) != 1 { cs := br.getColumns()
t.Fatalf("unexpected number of columns in blockResult; got %d; want 1", len(br.cs)) if len(cs) != 1 {
t.Fatalf("unexpected number of columns in blockResult; got %d; want 1", len(cs))
} }
results := br.getColumnValues(0) results := cs[0].getValues(br)
if !reflect.DeepEqual(results, expectedResults) { if !reflect.DeepEqual(results, expectedResults) {
t.Fatalf("unexpected results matched;\ngot\n%q\nwant\n%q", results, expectedResults) t.Fatalf("unexpected results matched;\ngot\n%q\nwant\n%q", results, expectedResults)
} }

View file

@ -27,13 +27,13 @@ type pipeProcessor interface {
// The workerID is the id of the worker goroutine, which calls the writeBlock. // The workerID is the id of the worker goroutine, which calls the writeBlock.
// It is in the range 0 ... workersCount-1 . // It is in the range 0 ... workersCount-1 .
// //
// It is forbidden to hold references to columns after returning from writeBlock, since the caller re-uses columns. // It is forbidden to hold references br after returning from writeBlock, since the caller re-uses it.
// //
// If any error occurs at writeBlock, then cancel() must be called by pipeProcessor in order to notify worker goroutines // If any error occurs at writeBlock, then cancel() must be called by pipeProcessor in order to notify worker goroutines
// to stop sending new data. The occurred error must be returned from flush(). // to stop sending new data. The occurred error must be returned from flush().
// //
// cancel() may be called also when the pipeProcessor decides to stop accepting new data, even if there is no any error. // cancel() may be called also when the pipeProcessor decides to stop accepting new data, even if there is no any error.
writeBlock(workerID uint, timestamps []int64, columns []BlockColumn) writeBlock(workerID uint, br *blockResult)
// flush must flush all the data accumulated in the pipeProcessor to the base pipeProcessor. // flush must flush all the data accumulated in the pipeProcessor to the base pipeProcessor.
// //
@ -43,14 +43,14 @@ type pipeProcessor interface {
flush() error flush() error
} }
type defaultPipeProcessor func(workerID uint, timestamps []int64, columns []BlockColumn) type defaultPipeProcessor func(workerID uint, br *blockResult)
func newDefaultPipeProcessor(writeBlock func(workerID uint, timestamps []int64, columns []BlockColumn)) pipeProcessor { func newDefaultPipeProcessor(writeBlock func(workerID uint, br *blockResult)) pipeProcessor {
return defaultPipeProcessor(writeBlock) return defaultPipeProcessor(writeBlock)
} }
func (dpp defaultPipeProcessor) writeBlock(workerID uint, timestamps []int64, columns []BlockColumn) { func (dpp defaultPipeProcessor) writeBlock(workerID uint, br *blockResult) {
dpp(workerID, timestamps, columns) dpp(workerID, br)
} }
func (dpp defaultPipeProcessor) flush() error { func (dpp defaultPipeProcessor) flush() error {

View file

@ -34,26 +34,11 @@ type pipeFieldsProcessor struct {
ppBase pipeProcessor ppBase pipeProcessor
} }
func (fpp *pipeFieldsProcessor) writeBlock(workerID uint, timestamps []int64, columns []BlockColumn) { func (fpp *pipeFieldsProcessor) writeBlock(workerID uint, br *blockResult) {
if fpp.pf.containsStar || areSameBlockColumns(columns, fpp.pf.fields) { if !fpp.pf.containsStar {
// Fast path - there is no need in additional transformations before writing the block to ppBase. br.updateColumns(fpp.pf.fields)
fpp.ppBase.writeBlock(workerID, timestamps, columns)
return
} }
fpp.ppBase.writeBlock(workerID, br)
// Slow path - construct columns for fpp.pf.fields before writing them to ppBase.
brs := getBlockRows()
cs := brs.cs
for _, f := range fpp.pf.fields {
values := getBlockColumnValues(columns, f, len(timestamps))
cs = append(cs, BlockColumn{
Name: f,
Values: values,
})
}
fpp.ppBase.writeBlock(workerID, timestamps, cs)
brs.cs = cs
putBlockRows(brs)
} }
func (fpp *pipeFieldsProcessor) flush() error { func (fpp *pipeFieldsProcessor) flush() error {

View file

@ -33,31 +33,25 @@ type pipeHeadProcessor struct {
rowsProcessed atomic.Uint64 rowsProcessed atomic.Uint64
} }
func (hpp *pipeHeadProcessor) writeBlock(workerID uint, timestamps []int64, columns []BlockColumn) { func (hpp *pipeHeadProcessor) writeBlock(workerID uint, br *blockResult) {
rowsProcessed := hpp.rowsProcessed.Add(uint64(len(timestamps))) rowsProcessed := hpp.rowsProcessed.Add(uint64(len(br.timestamps)))
if rowsProcessed <= hpp.ph.n { if rowsProcessed <= hpp.ph.n {
// Fast path - write all the rows to ppBase. // Fast path - write all the rows to ppBase.
hpp.ppBase.writeBlock(workerID, timestamps, columns) hpp.ppBase.writeBlock(workerID, br)
return return
} }
// Slow path - overflow. Write the remaining rows if needed. // Slow path - overflow. Write the remaining rows if needed.
rowsProcessed -= uint64(len(timestamps)) rowsProcessed -= uint64(len(br.timestamps))
if rowsProcessed >= hpp.ph.n { if rowsProcessed >= hpp.ph.n {
// Nothing to write. There is no need in cancel() call, since it has been called by another goroutine. // Nothing to write. There is no need in cancel() call, since it has been called by another goroutine.
return return
} }
// Write remaining rows. // Write remaining rows.
rowsRemaining := hpp.ph.n - rowsProcessed keepRows := hpp.ph.n - rowsProcessed
cs := make([]BlockColumn, len(columns)) br.truncateRows(int(keepRows))
for i, c := range columns { hpp.ppBase.writeBlock(workerID, br)
cDst := &cs[i]
cDst.Name = c.Name
cDst.Values = c.Values[:rowsRemaining]
}
timestamps = timestamps[:rowsRemaining]
hpp.ppBase.writeBlock(workerID, timestamps, cs)
// Notify the caller that it should stop passing more data to writeBlock(). // Notify the caller that it should stop passing more data to writeBlock().
hpp.cancel() hpp.cancel()

View file

@ -27,27 +27,21 @@ type pipeSkipProcessor struct {
rowsProcessed atomic.Uint64 rowsProcessed atomic.Uint64
} }
func (spp *pipeSkipProcessor) writeBlock(workerID uint, timestamps []int64, columns []BlockColumn) { func (spp *pipeSkipProcessor) writeBlock(workerID uint, br *blockResult) {
rowsProcessed := spp.rowsProcessed.Add(uint64(len(timestamps))) rowsProcessed := spp.rowsProcessed.Add(uint64(len(br.timestamps)))
if rowsProcessed <= spp.ps.n { if rowsProcessed <= spp.ps.n {
return return
} }
rowsProcessed -= uint64(len(timestamps)) rowsProcessed -= uint64(len(br.timestamps))
if rowsProcessed >= spp.ps.n { if rowsProcessed >= spp.ps.n {
spp.ppBase.writeBlock(workerID, timestamps, columns) spp.ppBase.writeBlock(workerID, br)
return return
} }
rowsRemaining := spp.ps.n - rowsProcessed rowsSkip := spp.ps.n - rowsProcessed
cs := make([]BlockColumn, len(columns)) br.skipRows(int(rowsSkip))
for i, c := range columns { spp.ppBase.writeBlock(workerID, br)
cDst := &cs[i]
cDst.Name = c.Name
cDst.Values = c.Values[rowsRemaining:]
}
timestamps = timestamps[rowsRemaining:]
spp.ppBase.writeBlock(workerID, timestamps, cs)
} }
func (spp *pipeSkipProcessor) flush() error { func (spp *pipeSkipProcessor) flush() error {

View file

@ -35,15 +35,15 @@ type statsFunc interface {
// All the statsProcessor methods are called from a single goroutine at a time, // All the statsProcessor methods are called from a single goroutine at a time,
// so there is no need in the internal synchronization. // so there is no need in the internal synchronization.
type statsProcessor interface { type statsProcessor interface {
// updateStatsForAllRows must update statsProcessor stats from all the rows. // updateStatsForAllRows must update statsProcessor stats for all the rows in br.
// //
// It must return the increase of internal state size in bytes for the statsProcessor. // It must return the change of internal state size in bytes for the statsProcessor.
updateStatsForAllRows(timestamps []int64, columns []BlockColumn) int updateStatsForAllRows(br *blockResult) int
// updateStatsForRow must update statsProcessor stats from the row at rowIndex. // updateStatsForRow must update statsProcessor stats for the row at rowIndex in br.
// //
// It must return the increase of internal state size in bytes for the statsProcessor. // It must return the change of internal state size in bytes for the statsProcessor.
updateStatsForRow(timestamps []int64, columns []BlockColumn, rowIndex int) int updateStatsForRow(br *blockResult, rowIndex int) int
// mergeState must merge sfp state into statsProcessor state. // mergeState must merge sfp state into statsProcessor state.
mergeState(sfp statsProcessor) mergeState(sfp statsProcessor)
@ -149,7 +149,7 @@ type pipeStatsGroup struct {
sfps []statsProcessor sfps []statsProcessor
} }
func (spp *pipeStatsProcessor) writeBlock(workerID uint, timestamps []int64, columns []BlockColumn) { func (spp *pipeStatsProcessor) writeBlock(workerID uint, br *blockResult) {
shard := &spp.shards[workerID] shard := &spp.shards[workerID]
for shard.stateSizeBudget < 0 { for shard.stateSizeBudget < 0 {
@ -170,60 +170,69 @@ func (spp *pipeStatsProcessor) writeBlock(workerID uint, timestamps []int64, col
if len(byFields) == 0 { if len(byFields) == 0 {
// Fast path - pass all the rows to a single group with empty key. // Fast path - pass all the rows to a single group with empty key.
for _, sfp := range shard.getStatsProcessors(nil) { for _, sfp := range shard.getStatsProcessors(nil) {
shard.stateSizeBudget -= sfp.updateStatsForAllRows(timestamps, columns) shard.stateSizeBudget -= sfp.updateStatsForAllRows(br)
} }
return return
} }
if len(byFields) == 1 { if len(byFields) == 1 {
// Special case for grouping by a single column. // Special case for grouping by a single column.
values := getBlockColumnValues(columns, byFields[0], len(timestamps)) c := br.getColumnByName(byFields[0])
if isConstValue(values) { if c.isConst {
// Fast path for column with constant value. // Fast path for column with constant value.
shard.keyBuf = encoding.MarshalBytes(shard.keyBuf[:0], bytesutil.ToUnsafeBytes(values[0])) shard.keyBuf = encoding.MarshalBytes(shard.keyBuf[:0], bytesutil.ToUnsafeBytes(c.encodedValues[0]))
for _, sfp := range shard.getStatsProcessors(shard.keyBuf) { for _, sfp := range shard.getStatsProcessors(shard.keyBuf) {
shard.stateSizeBudget -= sfp.updateStatsForAllRows(timestamps, columns) shard.stateSizeBudget -= sfp.updateStatsForAllRows(br)
} }
return return
} }
// Slower path for column with different values. // Slower path for column with different values.
values := c.getValues(br)
var sfps []statsProcessor var sfps []statsProcessor
keyBuf := shard.keyBuf keyBuf := shard.keyBuf[:0]
for i := range timestamps { for i := range br.timestamps {
if i <= 0 || values[i-1] != values[i] { if i <= 0 || values[i-1] != values[i] {
keyBuf = encoding.MarshalBytes(keyBuf[:0], bytesutil.ToUnsafeBytes(values[i])) keyBuf = encoding.MarshalBytes(keyBuf[:0], bytesutil.ToUnsafeBytes(values[i]))
sfps = shard.getStatsProcessors(keyBuf) sfps = shard.getStatsProcessors(keyBuf)
} }
for _, sfp := range sfps { for _, sfp := range sfps {
shard.stateSizeBudget -= sfp.updateStatsForRow(timestamps, columns, i) shard.stateSizeBudget -= sfp.updateStatsForRow(br, i)
} }
} }
shard.keyBuf = keyBuf shard.keyBuf = keyBuf
return return
} }
// Pre-calculate column values for byFields in order to speed up building group key in the loop below. // Verify whether all the 'by (...)' columns are constant.
shard.columnValues = appendBlockColumnValues(shard.columnValues[:0], columns, byFields, len(timestamps)) areAllConstColumns := true
columnValues := shard.columnValues keyBuf := shard.keyBuf[:0]
for _, f := range byFields {
c := br.getColumnByName(f)
if !c.isConst {
areAllConstColumns = false
break
}
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.encodedValues[0]))
}
shard.keyBuf = keyBuf
if areConstValues(columnValues) { if areAllConstColumns {
// Fast path for columns with constant values. // Fast path for constant 'by (...)' columns.
keyBuf := shard.keyBuf[:0]
for _, values := range columnValues {
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(values[0]))
}
for _, sfp := range shard.getStatsProcessors(keyBuf) { for _, sfp := range shard.getStatsProcessors(keyBuf) {
shard.stateSizeBudget -= sfp.updateStatsForAllRows(timestamps, columns) shard.stateSizeBudget -= sfp.updateStatsForAllRows(br)
} }
shard.keyBuf = keyBuf
return return
} }
// The slowest path - group by multiple columns. // The slowest path - group by multiple columns with different values across rows.
// Pre-calculate column values for byFields in order to speed up building group key in the loop below.
shard.columnValues = br.appendColumnValues(shard.columnValues[:0], byFields)
columnValues := shard.columnValues
var sfps []statsProcessor var sfps []statsProcessor
keyBuf := shard.keyBuf for i := range br.timestamps {
for i := range timestamps { // Verify whether the key for 'by (...)' fields equals the previous key
// verify whether the key for 'by (...)' fields equals the previous key
sameValue := sfps != nil sameValue := sfps != nil
for _, values := range columnValues { for _, values := range columnValues {
if i <= 0 || values[i-1] != values[i] { if i <= 0 || values[i-1] != values[i] {
@ -240,35 +249,12 @@ func (spp *pipeStatsProcessor) writeBlock(workerID uint, timestamps []int64, col
sfps = shard.getStatsProcessors(keyBuf) sfps = shard.getStatsProcessors(keyBuf)
} }
for _, sfp := range sfps { for _, sfp := range sfps {
shard.stateSizeBudget -= sfp.updateStatsForRow(timestamps, columns, i) shard.stateSizeBudget -= sfp.updateStatsForRow(br, i)
} }
} }
shard.keyBuf = keyBuf shard.keyBuf = keyBuf
} }
func areConstValues(valuess [][]string) bool {
for _, values := range valuess {
if !isConstValue(values) {
return false
}
}
return true
}
func isConstValue(values []string) bool {
if len(values) == 0 {
// Return false, since it is impossible to get values[0] value from empty values.
return false
}
vFirst := values[0]
for _, v := range values[1:] {
if v != vFirst {
return false
}
}
return true
}
func (spp *pipeStatsProcessor) flush() error { func (spp *pipeStatsProcessor) flush() error {
if n := spp.stateSizeBudget.Load(); n <= 0 { if n := spp.stateSizeBudget.Load(); n <= 0 {
return fmt.Errorf("cannot calculate [%s], since it requires more than %dMB of memory", spp.ps.String(), spp.maxStateSize/(1<<20)) return fmt.Errorf("cannot calculate [%s], since it requires more than %dMB of memory", spp.ps.String(), spp.maxStateSize/(1<<20))
@ -282,7 +268,7 @@ func (spp *pipeStatsProcessor) flush() error {
shard := &shards[i] shard := &shards[i]
for key, spg := range shard.m { for key, spg := range shard.m {
// shard.m may be quite big, so this loop can take a lot of time and CPU. // shard.m may be quite big, so this loop can take a lot of time and CPU.
// Stop processing data as soon as stopCh is closed without wasting CPU time. // Stop processing data as soon as stopCh is closed without wasting additional CPU time.
select { select {
case <-spp.stopCh: case <-spp.stopCh:
return nil return nil
@ -309,10 +295,10 @@ func (spp *pipeStatsProcessor) flush() error {
} }
var values []string var values []string
var columns []BlockColumn var br blockResult
for key, spg := range m { for key, spg := range m {
// m may be quite big, so this loop can take a lot of time and CPU. // m may be quite big, so this loop can take a lot of time and CPU.
// Stop processing data as soon as stopCh is closed without wasting CPU time. // Stop processing data as soon as stopCh is closed without wasting additional CPU time.
select { select {
case <-spp.stopCh: case <-spp.stopCh:
return nil return nil
@ -334,24 +320,20 @@ func (spp *pipeStatsProcessor) flush() error {
logger.Panicf("BUG: unexpected number of values decoded from keyBuf; got %d; want %d", len(values), len(byFields)) logger.Panicf("BUG: unexpected number of values decoded from keyBuf; got %d; want %d", len(values), len(byFields))
} }
br.reset()
// construct columns for byFields // construct columns for byFields
columns = columns[:0]
for i, f := range byFields { for i, f := range byFields {
columns = append(columns, BlockColumn{ br.addConstColumn(f, values[i])
Name: f,
Values: values[i : i+1],
})
} }
// construct columns for stats functions // construct columns for stats functions
for _, sfp := range spg.sfps { for _, sfp := range spg.sfps {
name, value := sfp.finalizeStats() name, value := sfp.finalizeStats()
columns = append(columns, BlockColumn{ br.addConstColumn(name, value)
Name: name,
Values: []string{value},
})
} }
spp.ppBase.writeBlock(0, []int64{0}, columns)
spp.ppBase.writeBlock(0, &br)
} }
return nil return nil

View file

@ -5,6 +5,8 @@ import (
"slices" "slices"
"strconv" "strconv"
"unsafe" "unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
type statsCount struct { type statsCount struct {
@ -35,49 +37,149 @@ type statsCountProcessor struct {
rowsCount uint64 rowsCount uint64
} }
func (scp *statsCountProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) int { func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
fields := scp.sc.fields fields := scp.sc.fields
if len(fields) == 0 || scp.sc.containsStar { if len(fields) == 0 || scp.sc.containsStar {
// Fast path - count all the columns. // Fast path - unconditionally count all the columns.
scp.rowsCount += uint64(len(timestamps)) scp.rowsCount += uint64(len(br.timestamps))
return 0 return 0
} }
if len(fields) == 1 {
// Fast path for count(single_column)
c := br.getColumnByName(fields[0])
if c.isConst {
if c.encodedValues[0] != "" {
scp.rowsCount += uint64(len(br.timestamps))
}
return 0
}
if c.isTime {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
switch c.valueType {
case valueTypeString:
for _, v := range c.encodedValues {
if v != "" {
scp.rowsCount++
}
}
return 0
case valueTypeDict:
zeroDictIdx := slices.Index(c.dictValues, "")
if zeroDictIdx < 0 {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
for _, v := range c.encodedValues {
if int(v[0]) != zeroDictIdx {
scp.rowsCount++
}
}
return 0
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
scp.rowsCount += uint64(len(br.timestamps))
return 0
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
return 0
}
}
// Slow path - count rows containing at least a single non-empty value for the fields enumerated inside count(). // Slow path - count rows containing at least a single non-empty value for the fields enumerated inside count().
bm := getBitmap(len(timestamps)) bm := getBitmap(len(br.timestamps))
defer putBitmap(bm) defer putBitmap(bm)
bm.setBits() bm.setBits()
for _, f := range fields { for _, f := range fields {
if idx := getBlockColumnIndex(columns, f); idx >= 0 { c := br.getColumnByName(f)
values := columns[idx].Values if c.isConst {
if c.encodedValues[0] != "" {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
continue
}
if c.isTime {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
switch c.valueType {
case valueTypeString:
bm.forEachSetBit(func(i int) bool { bm.forEachSetBit(func(i int) bool {
return values[i] == "" return c.encodedValues[i] == ""
}) })
case valueTypeDict:
if !slices.Contains(c.dictValues, "") {
scp.rowsCount += uint64(len(br.timestamps))
return 0
}
bm.forEachSetBit(func(i int) bool {
dictIdx := c.encodedValues[i][0]
return c.dictValues[dictIdx] == ""
})
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
scp.rowsCount += uint64(len(br.timestamps))
return 0
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
return 0
} }
} }
emptyValues := 0 scp.rowsCount += uint64(len(br.timestamps))
bm.forEachSetBit(func(i int) bool { bm.forEachSetBit(func(i int) bool {
emptyValues++ scp.rowsCount--
return true return true
}) })
scp.rowsCount += uint64(len(timestamps) - emptyValues)
return 0 return 0
} }
func (scp *statsCountProcessor) updateStatsForRow(_ []int64, columns []BlockColumn, rowIdx int) int { func (scp *statsCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
fields := scp.sc.fields fields := scp.sc.fields
if len(fields) == 0 || scp.sc.containsStar { if len(fields) == 0 || scp.sc.containsStar {
// Fast path - count the given column // Fast path - unconditionally count the given column
scp.rowsCount++ scp.rowsCount++
return 0 return 0
} }
if len(fields) == 1 {
// Fast path for count(single_column)
c := br.getColumnByName(fields[0])
if c.isConst {
if c.encodedValues[0] != "" {
scp.rowsCount++
}
return 0
}
if c.isTime {
scp.rowsCount++
return 0
}
switch c.valueType {
case valueTypeString:
if v := c.encodedValues[rowIdx]; v != "" {
scp.rowsCount++
}
return 0
case valueTypeDict:
dictIdx := c.encodedValues[rowIdx][0]
if v := c.dictValues[dictIdx]; v != "" {
scp.rowsCount++
}
return 0
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
scp.rowsCount++
return 0
default:
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
return 0
}
}
// Slow path - count the row at rowIdx if at least a single field enumerated inside count() is non-empty // Slow path - count the row at rowIdx if at least a single field enumerated inside count() is non-empty
for _, f := range fields { for _, f := range fields {
if idx := getBlockColumnIndex(columns, f); idx >= 0 && columns[idx].Values[rowIdx] != "" { c := br.getColumnByName(f)
if v := c.getValueAtRow(br, rowIdx); v != "" {
scp.rowsCount++ scp.rowsCount++
return 0 return 0
} }

View file

@ -35,46 +35,28 @@ type statsSumProcessor struct {
sum float64 sum float64
} }
func (ssp *statsSumProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) int { func (ssp *statsSumProcessor) updateStatsForAllRows(br *blockResult) int {
if ssp.ss.containsStar { if ssp.ss.containsStar {
// Sum all the columns // Sum all the columns
for _, c := range columns { for _, c := range br.getColumns() {
ssp.sum += sumValues(c.Values) ssp.sum += c.sumValues(br)
} }
return 0 return 0
} }
// Sum the requested columns // Sum the requested columns
for _, field := range ssp.ss.fields { for _, field := range ssp.ss.fields {
if idx := getBlockColumnIndex(columns, field); idx >= 0 { c := br.getColumnByName(field)
ssp.sum += sumValues(columns[idx].Values) ssp.sum += c.sumValues(br)
}
} }
return 0 return 0
} }
func sumValues(values []string) float64 { func (ssp *statsSumProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
sum := float64(0)
f := float64(0)
for i, v := range values {
if i == 0 || values[i-1] != v {
f, _ = tryParseFloat64(v)
if math.IsNaN(f) {
// Ignore NaN values, since this is the expected behaviour by most users.
f = 0
}
}
sum += f
}
return sum
}
func (ssp *statsSumProcessor) updateStatsForRow(_ []int64, columns []BlockColumn, rowIdx int) int {
if ssp.ss.containsStar { if ssp.ss.containsStar {
// Sum all the fields for the given row // Sum all the fields for the given row
for _, c := range columns { for _, c := range br.getColumns() {
v := c.Values[rowIdx] f := c.getFloatValueAtRow(rowIdx)
f, _ := tryParseFloat64(v)
if !math.IsNaN(f) { if !math.IsNaN(f) {
ssp.sum += f ssp.sum += f
} }
@ -84,12 +66,10 @@ func (ssp *statsSumProcessor) updateStatsForRow(_ []int64, columns []BlockColumn
// Sum only the given fields for the given row // Sum only the given fields for the given row
for _, field := range ssp.ss.fields { for _, field := range ssp.ss.fields {
if idx := getBlockColumnIndex(columns, field); idx >= 0 { c := br.getColumnByName(field)
v := columns[idx].Values[rowIdx] f := c.getFloatValueAtRow(rowIdx)
f, _ := tryParseFloat64(v) if !math.IsNaN(f) {
if !math.IsNaN(f) { ssp.sum += f
ssp.sum += f
}
} }
} }
return 0 return 0

View file

@ -4,7 +4,6 @@ import (
"fmt" "fmt"
"slices" "slices"
"strconv" "strconv"
"strings"
"unsafe" "unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@ -43,36 +42,38 @@ type statsUniqProcessor struct {
keyBuf []byte keyBuf []byte
} }
func (sup *statsUniqProcessor) updateStatsForAllRows(timestamps []int64, columns []BlockColumn) int { func (sup *statsUniqProcessor) updateStatsForAllRows(br *blockResult) int {
fields := sup.su.fields fields := sup.su.fields
m := sup.m m := sup.m
stateSizeIncrease := 0 stateSizeIncrease := 0
if len(fields) == 0 || sup.su.containsStar { if len(fields) == 0 || sup.su.containsStar {
// Count unique rows // Count unique rows
keyBuf := sup.keyBuf columns := br.getColumns()
for i := range timestamps { keyBuf := sup.keyBuf[:0]
for i := range br.timestamps {
seenKey := true seenKey := true
for _, c := range columns { for _, c := range columns {
values := c.Values values := c.getValues(br)
if i == 0 || values[i-1] != values[i] { if i == 0 || values[i-1] != values[i] {
seenKey = false seenKey = false
break break
} }
} }
if seenKey { if seenKey {
// This key has been already counted.
continue continue
} }
allEmptyValues := true allEmptyValues := true
keyBuf = keyBuf[:0] keyBuf = keyBuf[:0]
for _, c := range columns { for _, c := range columns {
v := c.Values[i] v := c.getValueAtRow(br, i)
if v != "" { if v != "" {
allEmptyValues = false allEmptyValues = false
} }
// Put column name into key, since every block can contain different set of columns for '*' selector. // Put column name into key, since every block can contain different set of columns for '*' selector.
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.Name)) keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.name))
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v)) keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
} }
if allEmptyValues { if allEmptyValues {
@ -88,39 +89,103 @@ func (sup *statsUniqProcessor) updateStatsForAllRows(timestamps []int64, columns
return stateSizeIncrease return stateSizeIncrease
} }
if len(fields) == 1 { if len(fields) == 1 {
// Fast path for a single column // Fast path for a single column.
if idx := getBlockColumnIndex(columns, fields[0]); idx >= 0 { // The unique key is formed as "<is_time> <value_type>? <encodedValue>",
values := columns[idx].Values // where <value_type> is skipped if <is_time> == 1.
for i, v := range values { // This guarantees that keys do not clash for different column types acorss blocks.
c := br.getColumnByName(fields[0])
if c.isTime {
// Count unique br.timestamps
timestamps := br.timestamps
keyBuf := sup.keyBuf[:0]
for i, timestamp := range timestamps {
if i > 0 && timestamps[i-1] == timestamps[i] {
// This timestamp has been already counted.
continue
}
keyBuf = append(keyBuf[:0], 1)
keyBuf = encoding.MarshalInt64(keyBuf, timestamp)
if _, ok := m[string(keyBuf)]; !ok {
m[string(keyBuf)] = struct{}{}
stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
}
}
sup.keyBuf = keyBuf
return stateSizeIncrease
}
if c.isConst {
// count unique const values
v := c.encodedValues[0]
if v == "" {
// Do not count empty values
return stateSizeIncrease
}
keyBuf := sup.keyBuf[:0]
keyBuf = append(keyBuf[:0], 0, byte(valueTypeString))
keyBuf = append(keyBuf, v...)
if _, ok := m[string(keyBuf)]; !ok {
m[string(keyBuf)] = struct{}{}
stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
}
sup.keyBuf = keyBuf
return stateSizeIncrease
}
if c.valueType == valueTypeDict {
// count unique non-zero c.dictValues
keyBuf := sup.keyBuf[:0]
for i, v := range c.dictValues {
if v == "" { if v == "" {
// Do not count empty values // Do not count empty values
continue continue
} }
if i > 0 && values[i-1] == v { keyBuf = append(keyBuf[:0], 0, byte(valueTypeDict))
continue keyBuf = append(keyBuf, byte(i))
} if _, ok := m[string(keyBuf)]; !ok {
if _, ok := m[v]; !ok { m[string(keyBuf)] = struct{}{}
vCopy := strings.Clone(v) stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
m[vCopy] = struct{}{}
stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy))
} }
} }
sup.keyBuf = keyBuf
return stateSizeIncrease
} }
// Count unique values across encodedValues
encodedValues := c.getEncodedValues(br)
isStringValueType := c.valueType == valueTypeString
keyBuf := sup.keyBuf[:0]
for i, v := range encodedValues {
if isStringValueType && v == "" {
// Do not count empty values
continue
}
if i > 0 && encodedValues[i-1] == v {
// This value has been already counted.
continue
}
keyBuf = append(keyBuf[:0], 0, byte(c.valueType))
keyBuf = append(keyBuf, v...)
if _, ok := m[string(keyBuf)]; !ok {
m[string(keyBuf)] = struct{}{}
stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
}
}
keyBuf = sup.keyBuf
return stateSizeIncrease return stateSizeIncrease
} }
// Slow path for multiple columns. // Slow path for multiple columns.
// Pre-calculate column values for byFields in order to speed up building group key in the loop below. // Pre-calculate column values for byFields in order to speed up building group key in the loop below.
sup.columnValues = appendBlockColumnValues(sup.columnValues[:0], columns, fields, len(timestamps)) sup.columnValues = br.appendColumnValues(sup.columnValues[:0], fields)
columnValues := sup.columnValues columnValues := sup.columnValues
keyBuf := sup.keyBuf keyBuf := sup.keyBuf[:0]
for i := range timestamps { for i := range br.timestamps {
seenKey := true seenKey := true
for _, values := range columnValues { for _, values := range columnValues {
if i == 0 || values[i-1] != values[i] { if i == 0 || values[i-1] != values[i] {
seenKey = false seenKey = false
break
} }
} }
if seenKey { if seenKey {
@ -149,7 +214,7 @@ func (sup *statsUniqProcessor) updateStatsForAllRows(timestamps []int64, columns
return stateSizeIncrease return stateSizeIncrease
} }
func (sup *statsUniqProcessor) updateStatsForRow(timestamps []int64, columns []BlockColumn, rowIdx int) int { func (sup *statsUniqProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
fields := sup.su.fields fields := sup.su.fields
m := sup.m m := sup.m
@ -158,13 +223,13 @@ func (sup *statsUniqProcessor) updateStatsForRow(timestamps []int64, columns []B
// Count unique rows // Count unique rows
allEmptyValues := true allEmptyValues := true
keyBuf := sup.keyBuf[:0] keyBuf := sup.keyBuf[:0]
for _, c := range columns { for _, c := range br.getColumns() {
v := c.Values[rowIdx] v := c.getValueAtRow(br, rowIdx)
if v != "" { if v != "" {
allEmptyValues = false allEmptyValues = false
} }
// Put column name into key, since every block can contain different set of columns for '*' selector. // Put column name into key, since every block can contain different set of columns for '*' selector.
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.Name)) keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.name))
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v)) keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
} }
sup.keyBuf = keyBuf sup.keyBuf = keyBuf
@ -180,19 +245,73 @@ func (sup *statsUniqProcessor) updateStatsForRow(timestamps []int64, columns []B
return stateSizeIncrease return stateSizeIncrease
} }
if len(fields) == 1 { if len(fields) == 1 {
// Fast path for a single column // Fast path for a single column.
if idx := getBlockColumnIndex(columns, fields[0]); idx >= 0 { // The unique key is formed as "<is_time> <value_type>? <encodedValue>",
v := columns[idx].Values[rowIdx] // where <value_type> is skipped if <is_time> == 1.
// This guarantees that keys do not clash for different column types acorss blocks.
c := br.getColumnByName(fields[0])
if c.isTime {
// Count unique br.timestamps
keyBuf := sup.keyBuf[:0]
keyBuf = append(keyBuf[:0], 1)
keyBuf = encoding.MarshalInt64(keyBuf, br.timestamps[rowIdx])
if _, ok := m[string(keyBuf)]; !ok {
m[string(keyBuf)] = struct{}{}
stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
}
sup.keyBuf = keyBuf
return stateSizeIncrease
}
if c.isConst {
// count unique const values
v := c.encodedValues[0]
if v == "" { if v == "" {
// Do not count empty values // Do not count empty values
return stateSizeIncrease return stateSizeIncrease
} }
if _, ok := m[v]; !ok { keyBuf := sup.keyBuf[:0]
vCopy := strings.Clone(v) keyBuf = append(keyBuf[:0], 0, byte(valueTypeString))
m[vCopy] = struct{}{} keyBuf = append(keyBuf, v...)
stateSizeIncrease += len(vCopy) + int(unsafe.Sizeof(vCopy)) if _, ok := m[string(keyBuf)]; !ok {
m[string(keyBuf)] = struct{}{}
stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
} }
sup.keyBuf = keyBuf
return stateSizeIncrease
} }
if c.valueType == valueTypeDict {
// count unique non-zero c.dictValues
dictIdx := c.encodedValues[rowIdx][0]
if c.dictValues[dictIdx] == "" {
// Do not count empty values
return stateSizeIncrease
}
keyBuf := sup.keyBuf[:0]
keyBuf = append(keyBuf[:0], 0, byte(valueTypeDict))
keyBuf = append(keyBuf, dictIdx)
if _, ok := m[string(keyBuf)]; !ok {
m[string(keyBuf)] = struct{}{}
stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
}
sup.keyBuf = keyBuf
return stateSizeIncrease
}
// Count unique values across encodedValues
encodedValues := c.getEncodedValues(br)
v := encodedValues[rowIdx]
if c.valueType == valueTypeString && v == "" {
// Do not count empty values
return stateSizeIncrease
}
keyBuf := sup.keyBuf[:0]
keyBuf = append(keyBuf[:0], 0, byte(c.valueType))
keyBuf = append(keyBuf, v...)
if _, ok := m[string(keyBuf)]; !ok {
m[string(keyBuf)] = struct{}{}
stateSizeIncrease += len(keyBuf) + int(unsafe.Sizeof(""))
}
keyBuf = sup.keyBuf
return stateSizeIncrease return stateSizeIncrease
} }
@ -200,10 +319,8 @@ func (sup *statsUniqProcessor) updateStatsForRow(timestamps []int64, columns []B
allEmptyValues := true allEmptyValues := true
keyBuf := sup.keyBuf[:0] keyBuf := sup.keyBuf[:0]
for _, f := range fields { for _, f := range fields {
v := "" c := br.getColumnByName(f)
if idx := getBlockColumnIndex(columns, f); idx >= 0 { v := c.getValueAtRow(br, rowIdx)
v = columns[idx].Values[rowIdx]
}
if v != "" { if v != "" {
allEmptyValues = false allEmptyValues = false
} }

View file

@ -63,7 +63,24 @@ func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query,
workersCount := cgroup.AvailableCPUs() workersCount := cgroup.AvailableCPUs()
pp := newDefaultPipeProcessor(writeBlock) pp := newDefaultPipeProcessor(func(workerID uint, br *blockResult) {
brs := getBlockRows()
csDst := brs.cs
csSrc := br.getColumns()
for _, c := range csSrc {
values := c.getValues(br)
csDst = append(csDst, BlockColumn{
Name: c.name,
Values: values,
})
}
writeBlock(workerID, br.timestamps, csDst)
brs.cs = csDst
putBlockRows(brs)
})
ppMain := pp ppMain := pp
stopCh := ctx.Done() stopCh := ctx.Done()
cancels := make([]func(), len(q.pipes)) cancels := make([]func(), len(q.pipes))
@ -79,21 +96,7 @@ func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query,
pps[i] = pp pps[i] = pp
} }
s.search(workersCount, so, stopCh, func(workerID uint, br *blockResult) { s.search(workersCount, so, stopCh, pp.writeBlock)
brs := getBlockRows()
cs := brs.cs
for i, columnName := range br.columnNames {
cs = append(cs, BlockColumn{
Name: columnName,
Values: br.getColumnValues(i),
})
}
pp.writeBlock(workerID, br.timestamps, cs)
brs.cs = cs
putBlockRows(brs)
})
var errFlush error var errFlush error
for i, pp := range pps { for i, pp := range pps {
@ -150,18 +153,6 @@ func (c *BlockColumn) reset() {
c.Values = nil c.Values = nil
} }
func areSameBlockColumns(columns []BlockColumn, columnNames []string) bool {
if len(columnNames) != len(columns) {
return false
}
for i, name := range columnNames {
if columns[i].Name != name {
return false
}
}
return true
}
func getBlockColumnIndex(columns []BlockColumn, columnName string) int { func getBlockColumnIndex(columns []BlockColumn, columnName string) int {
for i, c := range columns { for i, c := range columns {
if c.Name == columnName { if c.Name == columnName {
@ -237,7 +228,7 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
} }
bs.search(bsw) bs.search(bsw)
if bs.br.RowsCount() > 0 { if len(bs.br.timestamps) > 0 {
processBlockResult(workerID, &bs.br) processBlockResult(workerID, &bs.br)
} }
} }

View file

@ -466,7 +466,7 @@ func TestStorageSearch(t *testing.T) {
if !br.streamID.tenantID.equal(&tenantID) { if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID)) panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
} }
rowsCountTotal.Add(uint32(br.RowsCount())) rowsCountTotal.Add(uint32(len(br.timestamps)))
} }
s.search(workersCount, so, nil, processBlock) s.search(workersCount, so, nil, processBlock)
@ -487,7 +487,7 @@ func TestStorageSearch(t *testing.T) {
} }
var rowsCountTotal atomic.Uint32 var rowsCountTotal atomic.Uint32
processBlock := func(_ uint, br *blockResult) { processBlock := func(_ uint, br *blockResult) {
rowsCountTotal.Add(uint32(br.RowsCount())) rowsCountTotal.Add(uint32(len(br.timestamps)))
} }
s.search(workersCount, so, nil, processBlock) s.search(workersCount, so, nil, processBlock)
@ -531,7 +531,7 @@ func TestStorageSearch(t *testing.T) {
if !br.streamID.tenantID.equal(&tenantID) { if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID)) panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
} }
rowsCountTotal.Add(uint32(br.RowsCount())) rowsCountTotal.Add(uint32(len(br.timestamps)))
} }
s.search(workersCount, so, nil, processBlock) s.search(workersCount, so, nil, processBlock)
@ -560,7 +560,7 @@ func TestStorageSearch(t *testing.T) {
if !br.streamID.tenantID.equal(&tenantID) { if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID)) panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
} }
rowsCountTotal.Add(uint32(br.RowsCount())) rowsCountTotal.Add(uint32(len(br.timestamps)))
} }
s.search(workersCount, so, nil, processBlock) s.search(workersCount, so, nil, processBlock)
@ -597,7 +597,7 @@ func TestStorageSearch(t *testing.T) {
if !br.streamID.tenantID.equal(&tenantID) { if !br.streamID.tenantID.equal(&tenantID) {
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID)) panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
} }
rowsCountTotal.Add(uint32(br.RowsCount())) rowsCountTotal.Add(uint32(len(br.timestamps)))
} }
s.search(workersCount, so, nil, processBlock) s.search(workersCount, so, nil, processBlock)
@ -622,7 +622,7 @@ func TestStorageSearch(t *testing.T) {
} }
var rowsCountTotal atomic.Uint32 var rowsCountTotal atomic.Uint32
processBlock := func(_ uint, br *blockResult) { processBlock := func(_ uint, br *blockResult) {
rowsCountTotal.Add(uint32(br.RowsCount())) rowsCountTotal.Add(uint32(len(br.timestamps)))
} }
s.search(workersCount, so, nil, processBlock) s.search(workersCount, so, nil, processBlock)