VictoriaMetrics/lib/protoparser/clusternative/streamparser.go

package clusternative

import (
	"fmt"
	"io"
	"sync"
	"time"

	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/consts"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
	"github.com/VictoriaMetrics/metrics"
)

// ParseStream parses data sent from vminsert to bc and calls callback for parsed rows.
// Optional function isReadOnly must return true if the storage cannot accept new data.
// In thic case the data read from bc isn't accepted and the readonly status is sent back bc.
//
// The callback can be called concurrently multiple times for streamed data from req.
//
// callback shouldn't hold block after returning.
func ParseStream(bc *handshake.BufferedConn, callback func(rows []storage.MetricRow) error, isReadOnly func() bool) error {
	var wg sync.WaitGroup
	var (
		callbackErrLock sync.Mutex
		callbackErr     error
	)
	for {
		// Do not use unmarshalWork pool, since every unmarshalWork structure usually occupies
		// big amounts of memory (more than consts.MaxInsertPacketSizeForVMStorage bytes).
		// The pool would result in increased memory usage.
		uw := &unmarshalWork{}
		uw.callback = func(rows []storage.MetricRow) {
			if err := callback(rows); err != nil {
				processErrors.Inc()
				callbackErrLock.Lock()
				if callbackErr == nil {
					callbackErr = fmt.Errorf("error when processing native block: %w", err)
				}
				callbackErrLock.Unlock()
			}
		}
		uw.wg = &wg
		var err error
		uw.reqBuf, err = readBlock(uw.reqBuf[:0], bc, isReadOnly)
		if err != nil {
			wg.Wait()
			if err == io.EOF {
				// Remote end gracefully closed the connection.
				return nil
			}
			return err
		}
		blocksRead.Inc()
		wg.Add(1)
		common.ScheduleUnmarshalWork(uw)
	}
}

// readBlock reads the next data block from vminsert-initiated bc, appends it to dst and returns the result.
func readBlock(dst []byte, bc *handshake.BufferedConn, isReadOnly func() bool) ([]byte, error) {
	sizeBuf := auxBufPool.Get()
	defer auxBufPool.Put(sizeBuf)
	sizeBuf.B = bytesutil.ResizeNoCopyMayOverallocate(sizeBuf.B, 8)
	if _, err := io.ReadFull(bc, sizeBuf.B); err != nil {
		if err != io.EOF {
			readErrors.Inc()
			err = fmt.Errorf("cannot read packet size: %w", err)
		}
		return dst, err
	}
	packetSize := encoding.UnmarshalUint64(sizeBuf.B)
	if packetSize > consts.MaxInsertPacketSizeForVMStorage {
		parseErrors.Inc()
		return dst, fmt.Errorf("too big packet size: %d; shouldn't exceed %d", packetSize, consts.MaxInsertPacketSizeForVMStorage)
	}
	dstLen := len(dst)
	dst = bytesutil.ResizeWithCopyMayOverallocate(dst, dstLen+int(packetSize))
	if n, err := io.ReadFull(bc, dst[dstLen:]); err != nil {
		readErrors.Inc()
		return dst, fmt.Errorf("cannot read packet with size %d bytes: %w; read only %d bytes", packetSize, err, n)
	}
	if isReadOnly != nil && isReadOnly() {
		// The vmstorage is in readonly mode, so drop the read block of data
		// and send `read only` status to vminsert.
		dst = dst[:dstLen]
		if err := sendAck(bc, 2); err != nil {
			writeErrors.Inc()
			return dst, fmt.Errorf("cannot send readonly status to vminsert: %w", err)
		}
		return dst, nil
	}
	// Send `ack` to vminsert that the packet has been received.
	if err := sendAck(bc, 1); err != nil {
		writeErrors.Inc()
		return dst, fmt.Errorf("cannot send `ack` to vminsert: %w", err)
	}
	return dst, nil
}

func sendAck(bc *handshake.BufferedConn, status byte) error {
	deadline := time.Now().Add(5 * time.Second)
	if err := bc.SetWriteDeadline(deadline); err != nil {
		return fmt.Errorf("cannot set write deadline: %w", err)
	}
	b := auxBufPool.Get()
	defer auxBufPool.Put(b)
	b.B = append(b.B[:0], status)
	if _, err := bc.Write(b.B); err != nil {
		return err
	}
	if err := bc.Flush(); err != nil {
		return err
	}
	return nil
}

var auxBufPool bytesutil.ByteBufferPool

var (
	readErrors  = metrics.NewCounter(`vm_protoparser_read_errors_total{type="clusternative"}`)
	writeErrors = metrics.NewCounter(`vm_protoparser_write_errors_total{type="clusternative"}`)
	rowsRead    = metrics.NewCounter(`vm_protoparser_rows_read_total{type="clusternative"}`)
	blocksRead  = metrics.NewCounter(`vm_protoparser_blocks_read_total{type="clusternative"}`)

	parseErrors   = metrics.NewCounter(`vm_protoparser_parse_errors_total{type="clusternative"}`)
	processErrors = metrics.NewCounter(`vm_protoparser_process_errors_total{type="clusternative"}`)
)

type unmarshalWork struct {
	wg       *sync.WaitGroup
	callback func(rows []storage.MetricRow)
	reqBuf   []byte
	mrs      []storage.MetricRow
}

// Unmarshal implements common.UnmarshalWork
func (uw *unmarshalWork) Unmarshal() {
	reqBuf := uw.reqBuf
	for len(reqBuf) > 0 {
		// Limit the number of rows passed to callback in order to reduce memory usage
		// when processing big packets of rows.
		mrs, tail, err := storage.UnmarshalMetricRows(uw.mrs[:0], reqBuf, maxRowsPerCallback)
		uw.mrs = mrs
		if err != nil {
			parseErrors.Inc()
			logger.Errorf("cannot unmarshal MetricRow from clusternative block with size %d (remaining %d bytes): %s", len(reqBuf), len(tail), err)
			break
		}
		rowsRead.Add(len(mrs))
		uw.callback(mrs)
		reqBuf = tail
	}
	wg := uw.wg
	wg.Done()
}

const maxRowsPerCallback = 10000
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`package clusternative`

			`import (`
			`"fmt"`
			`"io"`
			`"sync"`
			`"time"`

			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/consts"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"`
			`"github.com/VictoriaMetrics/metrics"`
			`)`

			`// ParseStream parses data sent from vminsert to bc and calls callback for parsed rows.`
app/{vminsert,vmstorage}: follow-up after a171916ef5664690c9b596fe73b095aa75fa5d5c Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 2021-10-08 10:52:56 +00:00			`// Optional function isReadOnly must return true if the storage cannot accept new data.`
			`// In thic case the data read from bc isn't accepted and the readonly status is sent back bc.`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`//`
			`// The callback can be called concurrently multiple times for streamed data from req.`
			`//`
			`// callback shouldn't hold block after returning.`
Adds read-only mode for vmstorage node (#1680) * adds read-only mode for vmstorage https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 * changes order a bit * moves isFreeDiskLimitReached var to storage struct renames functions to be consistent change protoparser api - with optional storage limit check for given openned storage * renames freeSpaceLimit to ReadOnly 2021-10-08 09:52:56 +00:00			`func ParseStream(bc *handshake.BufferedConn, callback func(rows []storage.MetricRow) error, isReadOnly func() bool) error {`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`var wg sync.WaitGroup`
			`var (`
			`callbackErrLock sync.Mutex`
			`callbackErr error`
			`)`
			`for {`
lib/protoparser/clusternative: do not pool unmarshalWork structs, since they can occupy big amounts of memory (more than 100MB per each struct) This should reduce memory usage for vmstorage under high ingestion rate when the vmstorage runs on a system with big number of CPU cores 2021-06-23 12:45:05 +00:00			`// Do not use unmarshalWork pool, since every unmarshalWork structure usually occupies`
app/vminsert: reduce the max packet size, which vminsert can send to vmstorage This reduces the max memory usage for vminsert and vmstorage under heavy ingestion rate by up to 50% on production workload 2022-04-05 12:35:08 +00:00			`// big amounts of memory (more than consts.MaxInsertPacketSizeForVMStorage bytes).`
lib/protoparser/clusternative: do not pool unmarshalWork structs, since they can occupy big amounts of memory (more than 100MB per each struct) This should reduce memory usage for vmstorage under high ingestion rate when the vmstorage runs on a system with big number of CPU cores 2021-06-23 12:45:05 +00:00			`// The pool would result in increased memory usage.`
			`uw := &unmarshalWork{}`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`uw.callback = func(rows []storage.MetricRow) {`
			`if err := callback(rows); err != nil {`
			`processErrors.Inc()`
			`callbackErrLock.Lock()`
			`if callbackErr == nil {`
			`callbackErr = fmt.Errorf("error when processing native block: %w", err)`
			`}`
			`callbackErrLock.Unlock()`
			`}`
			`}`
			`uw.wg = &wg`
			`var err error`
Adds read-only mode for vmstorage node (#1680) * adds read-only mode for vmstorage https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 * changes order a bit * moves isFreeDiskLimitReached var to storage struct renames functions to be consistent change protoparser api - with optional storage limit check for given openned storage * renames freeSpaceLimit to ReadOnly 2021-10-08 09:52:56 +00:00			`uw.reqBuf, err = readBlock(uw.reqBuf[:0], bc, isReadOnly)`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`if err != nil {`
			`wg.Wait()`
			`if err == io.EOF {`
			`// Remote end gracefully closed the connection.`
			`return nil`
			`}`
lib/protoparser/clusternative: remove duplicate `cannot read packet size` phrase from the log message Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1336 2021-05-27 09:08:47 +00:00			`return err`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`}`
			`blocksRead.Inc()`
			`wg.Add(1)`
			`common.ScheduleUnmarshalWork(uw)`
			`}`
			`}`

			`// readBlock reads the next data block from vminsert-initiated bc, appends it to dst and returns the result.`
Adds read-only mode for vmstorage node (#1680) * adds read-only mode for vmstorage https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 * changes order a bit * moves isFreeDiskLimitReached var to storage struct renames functions to be consistent change protoparser api - with optional storage limit check for given openned storage * renames freeSpaceLimit to ReadOnly 2021-10-08 09:52:56 +00:00			`func readBlock(dst []byte, bc *handshake.BufferedConn, isReadOnly func() bool) ([]byte, error) {`
app/{vminsert,vmstorage}: follow-up after a171916ef5664690c9b596fe73b095aa75fa5d5c Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 2021-10-08 10:52:56 +00:00			`sizeBuf := auxBufPool.Get()`
			`defer auxBufPool.Put(sizeBuf)`
all: follow-up after 4bdd10ab9039d8ab0b4fe2d7a7dee064536da002 Properly use new bytesutil.Resize* functions 2022-02-01 15:48:25 +00:00			`sizeBuf.B = bytesutil.ResizeNoCopyMayOverallocate(sizeBuf.B, 8)`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`if _, err := io.ReadFull(bc, sizeBuf.B); err != nil {`
			`if err != io.EOF {`
			`readErrors.Inc()`
			`err = fmt.Errorf("cannot read packet size: %w", err)`
			`}`
			`return dst, err`
			`}`
			`packetSize := encoding.UnmarshalUint64(sizeBuf.B)`
app/vminsert: reduce the max packet size, which vminsert can send to vmstorage This reduces the max memory usage for vminsert and vmstorage under heavy ingestion rate by up to 50% on production workload 2022-04-05 12:35:08 +00:00			`if packetSize > consts.MaxInsertPacketSizeForVMStorage {`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`parseErrors.Inc()`
app/vminsert: reduce the max packet size, which vminsert can send to vmstorage This reduces the max memory usage for vminsert and vmstorage under heavy ingestion rate by up to 50% on production workload 2022-04-05 12:35:08 +00:00			`return dst, fmt.Errorf("too big packet size: %d; shouldn't exceed %d", packetSize, consts.MaxInsertPacketSizeForVMStorage)`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`}`
			`dstLen := len(dst)`
all: follow-up after 4bdd10ab9039d8ab0b4fe2d7a7dee064536da002 Properly use new bytesutil.Resize* functions 2022-02-01 15:48:25 +00:00			`dst = bytesutil.ResizeWithCopyMayOverallocate(dst, dstLen+int(packetSize))`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`if n, err := io.ReadFull(bc, dst[dstLen:]); err != nil {`
			`readErrors.Inc()`
			`return dst, fmt.Errorf("cannot read packet with size %d bytes: %w; read only %d bytes", packetSize, err, n)`
			`}`
app/{vminsert,vmstorage}: follow-up after a171916ef5664690c9b596fe73b095aa75fa5d5c Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 2021-10-08 10:52:56 +00:00			`if isReadOnly != nil && isReadOnly() {`
			`// The vmstorage is in readonly mode, so drop the read block of data`
			// and send `read only` status to vminsert.
			`dst = dst[:dstLen]`
			`if err := sendAck(bc, 2); err != nil {`
			`writeErrors.Inc()`
			`return dst, fmt.Errorf("cannot send readonly status to vminsert: %w", err)`
			`}`
			`return dst, nil`
			`}`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			// Send `ack` to vminsert that the packet has been received.
app/{vminsert,vmstorage}: follow-up after a171916ef5664690c9b596fe73b095aa75fa5d5c Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 2021-10-08 10:52:56 +00:00			`if err := sendAck(bc, 1); err != nil {`
			`writeErrors.Inc()`
			return dst, fmt.Errorf("cannot send `ack` to vminsert: %w", err)
			`}`
			`return dst, nil`
			`}`

			`func sendAck(bc *handshake.BufferedConn, status byte) error {`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`deadline := time.Now().Add(5 * time.Second)`
			`if err := bc.SetWriteDeadline(deadline); err != nil {`
app/{vminsert,vmstorage}: follow-up after a171916ef5664690c9b596fe73b095aa75fa5d5c Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 2021-10-08 10:52:56 +00:00			`return fmt.Errorf("cannot set write deadline: %w", err)`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`}`
app/{vminsert,vmstorage}: follow-up after a171916ef5664690c9b596fe73b095aa75fa5d5c Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 2021-10-08 10:52:56 +00:00			`b := auxBufPool.Get()`
			`defer auxBufPool.Put(b)`
lib/protoparser/clusternative: typo fix after 4fddcf4c834c82bcd14624aecfb0d5bc751dc807 2021-10-08 12:37:57 +00:00			`b.B = append(b.B[:0], status)`
			`if _, err := bc.Write(b.B); err != nil {`
app/{vminsert,vmstorage}: follow-up after a171916ef5664690c9b596fe73b095aa75fa5d5c Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 2021-10-08 10:52:56 +00:00			`return err`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`}`
			`if err := bc.Flush(); err != nil {`
app/{vminsert,vmstorage}: follow-up after a171916ef5664690c9b596fe73b095aa75fa5d5c Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 2021-10-08 10:52:56 +00:00			`return err`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`}`
app/{vminsert,vmstorage}: follow-up after a171916ef5664690c9b596fe73b095aa75fa5d5c Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 2021-10-08 10:52:56 +00:00			`return nil`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`}`

app/{vminsert,vmstorage}: follow-up after a171916ef5664690c9b596fe73b095aa75fa5d5c Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/269 2021-10-08 10:52:56 +00:00			`var auxBufPool bytesutil.ByteBufferPool`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00
			`var (`
			readErrors = metrics.NewCounter(`vm_protoparser_read_errors_total{type="clusternative"}`)
			writeErrors = metrics.NewCounter(`vm_protoparser_write_errors_total{type="clusternative"}`)
			rowsRead = metrics.NewCounter(`vm_protoparser_rows_read_total{type="clusternative"}`)
			blocksRead = metrics.NewCounter(`vm_protoparser_blocks_read_total{type="clusternative"}`)

			parseErrors = metrics.NewCounter(`vm_protoparser_parse_errors_total{type="clusternative"}`)
			processErrors = metrics.NewCounter(`vm_protoparser_process_errors_total{type="clusternative"}`)
			`)`

			`type unmarshalWork struct {`
lib/protoparser/clusternative: remove unused field - unmarshalWork.lastResetTime This is a follow-up for b84aea1e6ef1fe1786443501752d156ee9bc7bfc 2021-07-02 10:32:56 +00:00			`wg *sync.WaitGroup`
			`callback func(rows []storage.MetricRow)`
			`reqBuf []byte`
			`mrs []storage.MetricRow`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`}`

			`// Unmarshal implements common.UnmarshalWork`
			`func (uw *unmarshalWork) Unmarshal() {`
lib/protoparser/clusternative: do not pool unmarshalWork structs, since they can occupy big amounts of memory (more than 100MB per each struct) This should reduce memory usage for vmstorage under high ingestion rate when the vmstorage runs on a system with big number of CPU cores 2021-06-23 12:45:05 +00:00			`reqBuf := uw.reqBuf`
			`for len(reqBuf) > 0 {`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`// Limit the number of rows passed to callback in order to reduce memory usage`
			`// when processing big packets of rows.`
lib/protoparser/clusternative: do not pool unmarshalWork structs, since they can occupy big amounts of memory (more than 100MB per each struct) This should reduce memory usage for vmstorage under high ingestion rate when the vmstorage runs on a system with big number of CPU cores 2021-06-23 12:45:05 +00:00			`mrs, tail, err := storage.UnmarshalMetricRows(uw.mrs[:0], reqBuf, maxRowsPerCallback)`
			`uw.mrs = mrs`
			`if err != nil {`
			`parseErrors.Inc()`
			`logger.Errorf("cannot unmarshal MetricRow from clusternative block with size %d (remaining %d bytes): %s", len(reqBuf), len(tail), err)`
			`break`
			`}`
			`rowsRead.Add(len(mrs))`
			`uw.callback(mrs)`
			`reqBuf = tail`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`}`
lib/protoparser/clusternative: do not pool unmarshalWork structs, since they can occupy big amounts of memory (more than 100MB per each struct) This should reduce memory usage for vmstorage under high ingestion rate when the vmstorage runs on a system with big number of CPU cores 2021-06-23 12:45:05 +00:00			`wg := uw.wg`
			`wg.Done()`
app/vminsert: add support for data ingestion via other vminsert nodes 2021-05-08 14:55:44 +00:00			`}`

			`const maxRowsPerCallback = 10000`