2023-02-13 18:46:50 +00:00
package stream
2019-05-22 21:16:55 +00:00
import (
2020-09-27 23:06:27 +00:00
"bufio"
2019-05-22 21:16:55 +00:00
"fmt"
2020-01-28 20:53:50 +00:00
"io"
2019-05-22 21:16:55 +00:00
"sync"
2020-01-28 20:53:50 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
2020-12-08 18:49:32 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
2023-02-21 02:38:49 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
2020-11-13 15:48:23 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
2020-08-16 14:05:52 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
2019-05-22 21:16:55 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
2023-01-07 02:59:39 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
2019-05-22 21:16:55 +00:00
"github.com/VictoriaMetrics/metrics"
2020-01-28 20:53:50 +00:00
"github.com/golang/snappy"
2019-05-22 21:16:55 +00:00
)
2020-08-16 14:05:52 +00:00
var maxInsertRequestSize = flagutil . NewBytes ( "maxInsertRequestSize" , 32 * 1024 * 1024 , "The maximum size in bytes of a single Prometheus remote_write API request" )
2020-01-28 20:53:50 +00:00
2023-02-13 18:46:50 +00:00
// Parse parses Prometheus remote_write message from reader and calls callback for the parsed timeseries.
2020-02-23 11:35:47 +00:00
//
2020-09-28 01:11:55 +00:00
// callback shouldn't hold tss after returning.
2023-02-21 02:38:49 +00:00
func Parse ( r io . Reader , isVMRemoteWrite bool , callback func ( tss [ ] prompb . TimeSeries ) error ) error {
2023-01-07 02:59:39 +00:00
wcr := writeconcurrencylimiter . GetReader ( r )
defer writeconcurrencylimiter . PutReader ( wcr )
r = wcr
2021-09-20 11:49:28 +00:00
ctx := getPushCtx ( r )
2019-05-22 21:16:55 +00:00
defer putPushCtx ( ctx )
2020-09-27 23:06:27 +00:00
if err := ctx . Read ( ) ; err != nil {
2019-05-22 21:16:55 +00:00
return err
}
2020-11-13 10:16:08 +00:00
2023-02-13 18:51:55 +00:00
// Synchronously process the request in order to properly return errors to Parse caller,
2020-11-13 10:16:08 +00:00
// so it could properly return HTTP 503 status code in response.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/896
bb := bodyBufferPool . Get ( )
defer bodyBufferPool . Put ( bb )
var err error
2023-02-21 02:38:49 +00:00
if isVMRemoteWrite {
bb . B , err = zstd . Decompress ( bb . B [ : 0 ] , ctx . reqBuf . B )
if err != nil {
2023-11-13 20:19:06 +00:00
// Fall back to Snappy decompression, since vmagent may send snappy-encoded messages
// with 'Content-Encoding: zstd' header if they were put into persistent queue before vmagent restart.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5301
zstdErr := err
bb . B , err = snappy . Decode ( bb . B [ : cap ( bb . B ) ] , ctx . reqBuf . B )
if err != nil {
return fmt . Errorf ( "cannot decompress zstd-encoded request with length %d: %w" , len ( ctx . reqBuf . B ) , zstdErr )
}
2023-02-21 02:38:49 +00:00
}
} else {
bb . B , err = snappy . Decode ( bb . B [ : cap ( bb . B ) ] , ctx . reqBuf . B )
if err != nil {
2023-11-17 14:51:09 +00:00
// Fall back to zstd decompression, since vmagent may send zstd-encoded messages
// without 'Content-Encoding: zstd' header if they were put into persistent queue before vmagent restart.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5301#issuecomment-1815871992
snappyErr := err
bb . B , err = zstd . Decompress ( bb . B [ : 0 ] , ctx . reqBuf . B )
if err != nil {
return fmt . Errorf ( "cannot decompress snappy-encoded request with length %d: %w" , len ( ctx . reqBuf . B ) , snappyErr )
}
2023-02-21 02:38:49 +00:00
}
2020-11-13 10:16:08 +00:00
}
2022-12-15 03:26:24 +00:00
if int64 ( len ( bb . B ) ) > maxInsertRequestSize . N {
2020-11-13 10:16:08 +00:00
return fmt . Errorf ( "too big unpacked request; mustn't exceed `-maxInsertRequestSize=%d` bytes; got %d bytes" , maxInsertRequestSize . N , len ( bb . B ) )
}
wr := getWriteRequest ( )
defer putWriteRequest ( wr )
2024-01-14 20:46:06 +00:00
if err := wr . UnmarshalProtobuf ( bb . B ) ; err != nil {
2020-11-13 10:16:08 +00:00
unmarshalErrors . Inc ( )
return fmt . Errorf ( "cannot unmarshal prompb.WriteRequest with size %d bytes: %w" , len ( bb . B ) , err )
}
rows := 0
tss := wr . Timeseries
for i := range tss {
rows += len ( tss [ i ] . Samples )
}
rowsRead . Add ( rows )
if err := callback ( tss ) ; err != nil {
return fmt . Errorf ( "error when processing imported data: %w" , err )
2020-11-13 08:58:33 +00:00
}
2020-11-13 10:16:08 +00:00
return nil
2019-05-22 21:16:55 +00:00
}
2020-11-13 10:16:08 +00:00
var bodyBufferPool bytesutil . ByteBufferPool
2019-05-22 21:16:55 +00:00
type pushCtx struct {
2020-09-27 23:06:27 +00:00
br * bufio . Reader
2020-09-28 01:11:55 +00:00
reqBuf bytesutil . ByteBuffer
2019-05-22 21:16:55 +00:00
}
func ( ctx * pushCtx ) reset ( ) {
2020-09-27 23:06:27 +00:00
ctx . br . Reset ( nil )
2020-09-28 01:11:55 +00:00
ctx . reqBuf . Reset ( )
2019-05-22 21:16:55 +00:00
}
2020-09-27 23:06:27 +00:00
func ( ctx * pushCtx ) Read ( ) error {
2020-02-23 11:35:47 +00:00
readCalls . Inc ( )
2020-09-28 01:11:55 +00:00
lr := io . LimitReader ( ctx . br , int64 ( maxInsertRequestSize . N ) + 1 )
2020-11-13 15:48:23 +00:00
startTime := fasttime . UnixTimestamp ( )
2020-09-28 01:11:55 +00:00
reqLen , err := ctx . reqBuf . ReadFrom ( lr )
2019-05-22 21:16:55 +00:00
if err != nil {
2020-02-23 11:35:47 +00:00
readErrors . Inc ( )
2020-11-13 15:48:23 +00:00
return fmt . Errorf ( "cannot read compressed request in %d seconds: %w" , fasttime . UnixTimestamp ( ) - startTime , err )
2019-05-22 21:16:55 +00:00
}
2020-09-28 01:11:55 +00:00
if reqLen > int64 ( maxInsertRequestSize . N ) {
readErrors . Inc ( )
2024-02-28 14:33:08 +00:00
return fmt . Errorf ( "too big packed request; mustn't exceed -maxInsertRequestSize=%d bytes; got %d bytes" , maxInsertRequestSize . N , reqLen )
2020-02-23 11:35:47 +00:00
}
2019-05-22 21:16:55 +00:00
return nil
}
var (
2020-02-28 18:19:35 +00:00
readCalls = metrics . NewCounter ( ` vm_protoparser_read_calls_total { type="promremotewrite"} ` )
readErrors = metrics . NewCounter ( ` vm_protoparser_read_errors_total { type="promremotewrite"} ` )
rowsRead = metrics . NewCounter ( ` vm_protoparser_rows_read_total { type="promremotewrite"} ` )
2020-07-08 11:18:41 +00:00
unmarshalErrors = metrics . NewCounter ( ` vm_protoparser_unmarshal_errors_total { type="promremotewrite"} ` )
2019-05-22 21:16:55 +00:00
)
2020-09-27 23:06:27 +00:00
func getPushCtx ( r io . Reader ) * pushCtx {
2019-05-22 21:16:55 +00:00
select {
case ctx := <- pushCtxPoolCh :
2020-09-27 23:06:27 +00:00
ctx . br . Reset ( r )
2019-05-22 21:16:55 +00:00
return ctx
default :
if v := pushCtxPool . Get ( ) ; v != nil {
2020-09-27 23:06:27 +00:00
ctx := v . ( * pushCtx )
ctx . br . Reset ( r )
return ctx
}
return & pushCtx {
br : bufio . NewReaderSize ( r , 64 * 1024 ) ,
2019-05-22 21:16:55 +00:00
}
}
}
func putPushCtx ( ctx * pushCtx ) {
ctx . reset ( )
select {
case pushCtxPoolCh <- ctx :
default :
pushCtxPool . Put ( ctx )
}
}
2023-11-17 14:51:09 +00:00
var (
pushCtxPool sync . Pool
pushCtxPoolCh = make ( chan * pushCtx , cgroup . AvailableCPUs ( ) )
)
2020-01-28 20:53:50 +00:00
2020-11-13 10:16:08 +00:00
func getWriteRequest ( ) * prompb . WriteRequest {
v := writeRequestPool . Get ( )
2020-09-28 01:11:55 +00:00
if v == nil {
2020-11-13 10:16:08 +00:00
return & prompb . WriteRequest { }
2020-09-28 01:11:55 +00:00
}
2020-11-13 10:16:08 +00:00
return v . ( * prompb . WriteRequest )
2020-09-28 01:11:55 +00:00
}
2020-11-13 10:16:08 +00:00
func putWriteRequest ( wr * prompb . WriteRequest ) {
wr . Reset ( )
writeRequestPool . Put ( wr )
2020-09-28 01:11:55 +00:00
}
2020-11-13 10:16:08 +00:00
var writeRequestPool sync . Pool