mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
beeb80e4f8
Refer the original byte slice with the marshaled columnsHeader for columns names and dictionary-encoded column values.
This improves query performance a bit when big number of blocks with big number of columns are scanned during the query.
(cherry picked from commit 279e25e7c8
)
390 lines
12 KiB
Go
390 lines
12 KiB
Go
package logstorage
|
|
|
|
import (
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
|
)
|
|
|
|
// blockData contains packed data for a single block.
|
|
//
|
|
// The main purpose of this struct is to reduce the work needed during background merge of parts.
|
|
// If the block is full, then the blockData can be written to the destination part
|
|
// without the need to unpack it.
|
|
type blockData struct {
|
|
// streamID is id of the stream for the data
|
|
streamID streamID
|
|
|
|
// uncompressedSizeBytes is the original (uncompressed) size of log entries stored in the block
|
|
uncompressedSizeBytes uint64
|
|
|
|
// rowsCount is the number of log entries in the block
|
|
rowsCount uint64
|
|
|
|
// timestampsData contains the encoded timestamps data for the block
|
|
timestampsData timestampsData
|
|
|
|
// columnsData contains packed per-column data
|
|
columnsData []columnData
|
|
|
|
// constColumns contains data for const columns across the block
|
|
constColumns []Field
|
|
}
|
|
|
|
// reset resets bd for subsequent re-use
|
|
func (bd *blockData) reset() {
|
|
bd.streamID.reset()
|
|
bd.uncompressedSizeBytes = 0
|
|
bd.rowsCount = 0
|
|
bd.timestampsData.reset()
|
|
|
|
cds := bd.columnsData
|
|
for i := range cds {
|
|
cds[i].reset()
|
|
}
|
|
bd.columnsData = cds[:0]
|
|
|
|
ccs := bd.constColumns
|
|
for i := range ccs {
|
|
ccs[i].Reset()
|
|
}
|
|
bd.constColumns = ccs[:0]
|
|
}
|
|
|
|
func (bd *blockData) resizeColumnsData(columnsDataLen int) []columnData {
|
|
bd.columnsData = slicesutil.SetLength(bd.columnsData, columnsDataLen)
|
|
return bd.columnsData
|
|
}
|
|
|
|
// copyFrom copies src to bd.
|
|
//
|
|
// bd is valid until a.reset() is called.
|
|
func (bd *blockData) copyFrom(a *arena, src *blockData) {
|
|
bd.reset()
|
|
|
|
bd.streamID = src.streamID
|
|
bd.uncompressedSizeBytes = src.uncompressedSizeBytes
|
|
bd.rowsCount = src.rowsCount
|
|
bd.timestampsData.copyFrom(a, &src.timestampsData)
|
|
|
|
cdsSrc := src.columnsData
|
|
cds := bd.resizeColumnsData(len(cdsSrc))
|
|
for i := range cds {
|
|
cds[i].copyFrom(a, &cdsSrc[i])
|
|
}
|
|
bd.columnsData = cds
|
|
|
|
bd.constColumns = appendFields(a, bd.constColumns[:0], src.constColumns)
|
|
}
|
|
|
|
// unmarshalRows appends unmarshaled from bd log entries to dst.
|
|
//
|
|
// The unmarshaled log entries are valid until sbu and vd are reset.
|
|
func (bd *blockData) unmarshalRows(dst *rows, sbu *stringsBlockUnmarshaler, vd *valuesDecoder) error {
|
|
b := getBlock()
|
|
defer putBlock(b)
|
|
|
|
if err := b.InitFromBlockData(bd, sbu, vd); err != nil {
|
|
return err
|
|
}
|
|
b.appendRowsTo(dst)
|
|
return nil
|
|
}
|
|
|
|
// mustWriteTo writes bd to sw and updates bh accordingly
|
|
func (bd *blockData) mustWriteTo(bh *blockHeader, sw *streamWriters) {
|
|
// Do not store the version used for encoding directly in the block data, since:
|
|
// - all the blocks in the same part use the same encoding
|
|
// - the block encoding version can be put in metadata file for the part (aka metadataFilename)
|
|
|
|
bh.reset()
|
|
|
|
bh.streamID = bd.streamID
|
|
bh.uncompressedSizeBytes = bd.uncompressedSizeBytes
|
|
bh.rowsCount = bd.rowsCount
|
|
|
|
// Marshal timestamps
|
|
bd.timestampsData.mustWriteTo(&bh.timestampsHeader, sw)
|
|
|
|
// Marshal columns
|
|
cds := bd.columnsData
|
|
|
|
csh := getColumnsHeader()
|
|
|
|
chs := csh.resizeColumnHeaders(len(cds))
|
|
for i := range cds {
|
|
cds[i].mustWriteToNoArena(&chs[i], sw)
|
|
}
|
|
csh.constColumns = append(csh.constColumns[:0], bd.constColumns...)
|
|
|
|
bb := longTermBufPool.Get()
|
|
bb.B = csh.marshal(bb.B)
|
|
|
|
putColumnsHeader(csh)
|
|
|
|
bh.columnsHeaderOffset = sw.columnsHeaderWriter.bytesWritten
|
|
bh.columnsHeaderSize = uint64(len(bb.B))
|
|
if bh.columnsHeaderSize > maxColumnsHeaderSize {
|
|
logger.Panicf("BUG: too big columnsHeaderSize: %d bytes; mustn't exceed %d bytes", bh.columnsHeaderSize, maxColumnsHeaderSize)
|
|
}
|
|
sw.columnsHeaderWriter.MustWrite(bb.B)
|
|
longTermBufPool.Put(bb)
|
|
}
|
|
|
|
// mustReadFrom reads block data associated with bh from sr to bd.
|
|
//
|
|
// The bd is valid until a.reset() is called.
|
|
func (bd *blockData) mustReadFrom(a *arena, bh *blockHeader, sr *streamReaders) {
|
|
bd.reset()
|
|
|
|
bd.streamID = bh.streamID
|
|
bd.uncompressedSizeBytes = bh.uncompressedSizeBytes
|
|
bd.rowsCount = bh.rowsCount
|
|
|
|
// Read timestamps
|
|
bd.timestampsData.mustReadFrom(a, &bh.timestampsHeader, sr)
|
|
|
|
// Read columns
|
|
if bh.columnsHeaderOffset != sr.columnsHeaderReader.bytesRead {
|
|
logger.Panicf("FATAL: %s: unexpected columnsHeaderOffset=%d; must equal to the number of bytes read: %d",
|
|
sr.columnsHeaderReader.Path(), bh.columnsHeaderOffset, sr.columnsHeaderReader.bytesRead)
|
|
}
|
|
columnsHeaderSize := bh.columnsHeaderSize
|
|
if columnsHeaderSize > maxColumnsHeaderSize {
|
|
logger.Panicf("BUG: %s: too big columnsHeaderSize: %d bytes; mustn't exceed %d bytes", sr.columnsHeaderReader.Path(), columnsHeaderSize, maxColumnsHeaderSize)
|
|
}
|
|
bb := longTermBufPool.Get()
|
|
bb.B = bytesutil.ResizeNoCopyMayOverallocate(bb.B, int(columnsHeaderSize))
|
|
sr.columnsHeaderReader.MustReadFull(bb.B)
|
|
|
|
csh := getColumnsHeader()
|
|
if err := csh.unmarshalNoArena(bb.B); err != nil {
|
|
logger.Panicf("FATAL: %s: cannot unmarshal columnsHeader: %s", sr.columnsHeaderReader.Path(), err)
|
|
}
|
|
chs := csh.columnHeaders
|
|
cds := bd.resizeColumnsData(len(chs))
|
|
for i := range chs {
|
|
cds[i].mustReadFrom(a, &chs[i], sr)
|
|
}
|
|
bd.constColumns = appendFields(a, bd.constColumns[:0], csh.constColumns)
|
|
putColumnsHeader(csh)
|
|
longTermBufPool.Put(bb)
|
|
}
|
|
|
|
// timestampsData contains the encoded timestamps data.
|
|
type timestampsData struct {
|
|
// data contains packed timestamps data.
|
|
data []byte
|
|
|
|
// marshalType is the marshal type for timestamps
|
|
marshalType encoding.MarshalType
|
|
|
|
// minTimestamp is the minimum timestamp in the timestamps data
|
|
minTimestamp int64
|
|
|
|
// maxTimestamp is the maximum timestamp in the timestamps data
|
|
maxTimestamp int64
|
|
}
|
|
|
|
// reset resets td for subsequent re-use
|
|
func (td *timestampsData) reset() {
|
|
td.data = nil
|
|
td.marshalType = 0
|
|
td.minTimestamp = 0
|
|
td.maxTimestamp = 0
|
|
}
|
|
|
|
// copyFrom copies src to td.
|
|
//
|
|
// td is valid until a.reset() is called.
|
|
func (td *timestampsData) copyFrom(a *arena, src *timestampsData) {
|
|
td.reset()
|
|
|
|
td.data = a.copyBytes(src.data)
|
|
td.marshalType = src.marshalType
|
|
td.minTimestamp = src.minTimestamp
|
|
td.maxTimestamp = src.maxTimestamp
|
|
}
|
|
|
|
// mustWriteTo writes td to sw and updates th accordingly
|
|
func (td *timestampsData) mustWriteTo(th *timestampsHeader, sw *streamWriters) {
|
|
th.reset()
|
|
|
|
th.marshalType = td.marshalType
|
|
th.minTimestamp = td.minTimestamp
|
|
th.maxTimestamp = td.maxTimestamp
|
|
th.blockOffset = sw.timestampsWriter.bytesWritten
|
|
th.blockSize = uint64(len(td.data))
|
|
if th.blockSize > maxTimestampsBlockSize {
|
|
logger.Panicf("BUG: too big timestampsHeader.blockSize: %d bytes; mustn't exceed %d bytes", th.blockSize, maxTimestampsBlockSize)
|
|
}
|
|
sw.timestampsWriter.MustWrite(td.data)
|
|
}
|
|
|
|
// mustReadFrom reads timestamps data associated with th from sr to td.
|
|
//
|
|
// td is valid until a.reset() is called.
|
|
func (td *timestampsData) mustReadFrom(a *arena, th *timestampsHeader, sr *streamReaders) {
|
|
td.reset()
|
|
|
|
td.marshalType = th.marshalType
|
|
td.minTimestamp = th.minTimestamp
|
|
td.maxTimestamp = th.maxTimestamp
|
|
|
|
timestampsReader := &sr.timestampsReader
|
|
if th.blockOffset != timestampsReader.bytesRead {
|
|
logger.Panicf("FATAL: %s: unexpected timestampsHeader.blockOffset=%d; must equal to the number of bytes read: %d",
|
|
timestampsReader.Path(), th.blockOffset, timestampsReader.bytesRead)
|
|
}
|
|
timestampsBlockSize := th.blockSize
|
|
if timestampsBlockSize > maxTimestampsBlockSize {
|
|
logger.Panicf("FATAL: %s: too big timestamps block with %d bytes; the maximum supported block size is %d bytes",
|
|
timestampsReader.Path(), timestampsBlockSize, maxTimestampsBlockSize)
|
|
}
|
|
td.data = a.newBytes(int(timestampsBlockSize))
|
|
timestampsReader.MustReadFull(td.data)
|
|
}
|
|
|
|
// columnData contains packed data for a single column.
|
|
type columnData struct {
|
|
// name is the column name
|
|
name string
|
|
|
|
// valueType is the type of values stored in valuesData
|
|
valueType valueType
|
|
|
|
// minValue is the minimum encoded uint* or float64 value in the columnHeader
|
|
//
|
|
// It is used for fast detection of whether the given columnHeader contains values in the given range
|
|
minValue uint64
|
|
|
|
// maxValue is the maximum encoded uint* or float64 value in the columnHeader
|
|
//
|
|
// It is used for fast detection of whether the given columnHeader contains values in the given range
|
|
maxValue uint64
|
|
|
|
// valuesDict contains unique values for valueType = valueTypeDict
|
|
valuesDict valuesDict
|
|
|
|
// valuesData contains packed values data for the given column
|
|
valuesData []byte
|
|
|
|
// bloomFilterData contains packed bloomFilter data for the given column
|
|
bloomFilterData []byte
|
|
}
|
|
|
|
// reset rests cd for subsequent re-use
|
|
func (cd *columnData) reset() {
|
|
cd.name = ""
|
|
cd.valueType = 0
|
|
|
|
cd.minValue = 0
|
|
cd.maxValue = 0
|
|
cd.valuesDict.reset()
|
|
|
|
cd.valuesData = nil
|
|
cd.bloomFilterData = nil
|
|
}
|
|
|
|
// copyFrom copies src to cd.
|
|
//
|
|
// cd is valid until a.reset() is called.
|
|
func (cd *columnData) copyFrom(a *arena, src *columnData) {
|
|
cd.reset()
|
|
|
|
cd.name = a.copyString(src.name)
|
|
cd.valueType = src.valueType
|
|
|
|
cd.minValue = src.minValue
|
|
cd.maxValue = src.maxValue
|
|
cd.valuesDict.copyFrom(a, &src.valuesDict)
|
|
|
|
cd.valuesData = a.copyBytes(src.valuesData)
|
|
cd.bloomFilterData = a.copyBytes(src.bloomFilterData)
|
|
}
|
|
|
|
// mustWriteTo writes cd to sw and updates ch accordingly.
|
|
//
|
|
// ch is valid until cd is changed.
|
|
func (cd *columnData) mustWriteToNoArena(ch *columnHeader, sw *streamWriters) {
|
|
ch.reset()
|
|
|
|
valuesWriter := &sw.fieldValuesWriter
|
|
bloomFilterWriter := &sw.fieldBloomFilterWriter
|
|
if cd.name == "" {
|
|
valuesWriter = &sw.messageValuesWriter
|
|
bloomFilterWriter = &sw.messageBloomFilterWriter
|
|
}
|
|
|
|
ch.name = cd.name
|
|
ch.valueType = cd.valueType
|
|
|
|
ch.minValue = cd.minValue
|
|
ch.maxValue = cd.maxValue
|
|
ch.valuesDict.copyFromNoArena(&cd.valuesDict)
|
|
|
|
// marshal values
|
|
ch.valuesSize = uint64(len(cd.valuesData))
|
|
if ch.valuesSize > maxValuesBlockSize {
|
|
logger.Panicf("BUG: too big valuesSize: %d bytes; mustn't exceed %d bytes", ch.valuesSize, maxValuesBlockSize)
|
|
}
|
|
ch.valuesOffset = valuesWriter.bytesWritten
|
|
valuesWriter.MustWrite(cd.valuesData)
|
|
|
|
// marshal bloom filter
|
|
ch.bloomFilterSize = uint64(len(cd.bloomFilterData))
|
|
if ch.bloomFilterSize > maxBloomFilterBlockSize {
|
|
logger.Panicf("BUG: too big bloomFilterSize: %d bytes; mustn't exceed %d bytes", ch.bloomFilterSize, maxBloomFilterBlockSize)
|
|
}
|
|
ch.bloomFilterOffset = bloomFilterWriter.bytesWritten
|
|
bloomFilterWriter.MustWrite(cd.bloomFilterData)
|
|
}
|
|
|
|
// mustReadFrom reads columns data associated with ch from sr to cd.
|
|
//
|
|
// cd is valid until a.reset() is called.
|
|
func (cd *columnData) mustReadFrom(a *arena, ch *columnHeader, sr *streamReaders) {
|
|
cd.reset()
|
|
|
|
valuesReader := &sr.fieldValuesReader
|
|
bloomFilterReader := &sr.fieldBloomFilterReader
|
|
if ch.name == "" {
|
|
valuesReader = &sr.messageValuesReader
|
|
bloomFilterReader = &sr.messageBloomFilterReader
|
|
}
|
|
|
|
cd.name = a.copyString(ch.name)
|
|
cd.valueType = ch.valueType
|
|
|
|
cd.minValue = ch.minValue
|
|
cd.maxValue = ch.maxValue
|
|
cd.valuesDict.copyFrom(a, &ch.valuesDict)
|
|
|
|
// read values
|
|
if ch.valuesOffset != valuesReader.bytesRead {
|
|
logger.Panicf("FATAL: %s: unexpected columnHeader.valuesOffset=%d; must equal to the number of bytes read: %d",
|
|
valuesReader.Path(), ch.valuesOffset, valuesReader.bytesRead)
|
|
}
|
|
valuesSize := ch.valuesSize
|
|
if valuesSize > maxValuesBlockSize {
|
|
logger.Panicf("FATAL: %s: values block size cannot exceed %d bytes; got %d bytes", valuesReader.Path(), maxValuesBlockSize, valuesSize)
|
|
}
|
|
cd.valuesData = a.newBytes(int(valuesSize))
|
|
valuesReader.MustReadFull(cd.valuesData)
|
|
|
|
// read bloom filter
|
|
// bloom filter is missing in valueTypeDict.
|
|
if ch.valueType != valueTypeDict {
|
|
if ch.bloomFilterOffset != bloomFilterReader.bytesRead {
|
|
logger.Panicf("FATAL: %s: unexpected columnHeader.bloomFilterOffset=%d; must equal to the number of bytes read: %d",
|
|
bloomFilterReader.Path(), ch.bloomFilterOffset, bloomFilterReader.bytesRead)
|
|
}
|
|
bloomFilterSize := ch.bloomFilterSize
|
|
if bloomFilterSize > maxBloomFilterBlockSize {
|
|
logger.Panicf("FATAL: %s: bloom filter block size cannot exceed %d bytes; got %d bytes", bloomFilterReader.Path(), maxBloomFilterBlockSize, bloomFilterSize)
|
|
}
|
|
cd.bloomFilterData = a.newBytes(int(bloomFilterSize))
|
|
bloomFilterReader.MustReadFull(cd.bloomFilterData)
|
|
}
|
|
}
|