mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
app/vmselect/netstorage: reduce memory usage when fetching big number of data blocks from vmstorage
Dump data blocks directly to temporary file instead of buffering them in RAM
This commit is contained in:
parent
ba460f62e6
commit
56dff57f77
1 changed files with 60 additions and 57 deletions
|
@ -694,64 +694,67 @@ func GetSeriesCount(at *auth.Token, deadline Deadline) (uint64, bool, error) {
|
||||||
return n, isPartialResult, nil
|
return n, isPartialResult, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type tmpBlocksFileWrapper struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
tbf *tmpBlocksFile
|
||||||
|
m map[string][]tmpBlockAddr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tbfw *tmpBlocksFileWrapper) WriteBlock(mb *storage.MetricBlock) error {
|
||||||
|
tbfw.mu.Lock()
|
||||||
|
defer tbfw.mu.Unlock()
|
||||||
|
|
||||||
|
addr, err := tbfw.tbf.WriteBlock(mb.Block)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
metricName := mb.MetricName
|
||||||
|
tbfw.m[string(metricName)] = append(tbfw.m[string(metricName)], addr)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// ProcessSearchQuery performs sq on storage nodes until the given deadline.
|
// ProcessSearchQuery performs sq on storage nodes until the given deadline.
|
||||||
func ProcessSearchQuery(at *auth.Token, sq *storage.SearchQuery, fetchData bool, deadline Deadline) (*Results, bool, error) {
|
func ProcessSearchQuery(at *auth.Token, sq *storage.SearchQuery, fetchData bool, deadline Deadline) (*Results, bool, error) {
|
||||||
requestData := sq.Marshal(nil)
|
requestData := sq.Marshal(nil)
|
||||||
|
|
||||||
// Send the query to all the storage nodes in parallel.
|
// Send the query to all the storage nodes in parallel.
|
||||||
type nodeResult struct {
|
resultsCh := make(chan error, len(storageNodes))
|
||||||
results []*storage.MetricBlock
|
|
||||||
err error
|
|
||||||
}
|
|
||||||
resultsCh := make(chan nodeResult, len(storageNodes))
|
|
||||||
tr := storage.TimeRange{
|
tr := storage.TimeRange{
|
||||||
MinTimestamp: sq.MinTimestamp,
|
MinTimestamp: sq.MinTimestamp,
|
||||||
MaxTimestamp: sq.MaxTimestamp,
|
MaxTimestamp: sq.MaxTimestamp,
|
||||||
}
|
}
|
||||||
|
tbfw := &tmpBlocksFileWrapper{
|
||||||
|
tbf: getTmpBlocksFile(),
|
||||||
|
m: make(map[string][]tmpBlockAddr),
|
||||||
|
}
|
||||||
for _, sn := range storageNodes {
|
for _, sn := range storageNodes {
|
||||||
go func(sn *storageNode) {
|
go func(sn *storageNode) {
|
||||||
sn.searchRequests.Inc()
|
sn.searchRequests.Inc()
|
||||||
results, err := sn.processSearchQuery(requestData, tr, fetchData, deadline)
|
err := sn.processSearchQuery(tbfw, requestData, tr, fetchData, deadline)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
sn.searchRequestErrors.Inc()
|
sn.searchRequestErrors.Inc()
|
||||||
err = fmt.Errorf("cannot perform search on vmstorage %s: %s", sn.connPool.Addr(), err)
|
err = fmt.Errorf("cannot perform search on vmstorage %s: %s", sn.connPool.Addr(), err)
|
||||||
}
|
}
|
||||||
resultsCh <- nodeResult{
|
resultsCh <- err
|
||||||
results: results,
|
|
||||||
err: err,
|
|
||||||
}
|
|
||||||
}(sn)
|
}(sn)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect results.
|
// Collect results.
|
||||||
var errors []error
|
var errors []error
|
||||||
tbf := getTmpBlocksFile()
|
|
||||||
m := make(map[string][]tmpBlockAddr)
|
|
||||||
blocksRead := 0
|
|
||||||
for i := 0; i < len(storageNodes); i++ {
|
for i := 0; i < len(storageNodes); i++ {
|
||||||
// There is no need in timer here, since all the goroutines executing
|
// There is no need in timer here, since all the goroutines executing
|
||||||
// sn.processSearchQuery must be finished until the deadline.
|
// sn.processSearchQuery must be finished until the deadline.
|
||||||
nr := <-resultsCh
|
err := <-resultsCh
|
||||||
if nr.err != nil {
|
if err != nil {
|
||||||
errors = append(errors, nr.err)
|
errors = append(errors, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for _, mb := range nr.results {
|
|
||||||
blocksRead++
|
|
||||||
addr, err := tbf.WriteBlock(mb.Block)
|
|
||||||
if err != nil {
|
|
||||||
errors = append(errors, fmt.Errorf("cannot write data block #%d to temporary blocks file: %s", blocksRead, err))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
metricName := mb.MetricName
|
|
||||||
m[string(metricName)] = append(m[string(metricName)], addr)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
isPartialResult := false
|
isPartialResult := false
|
||||||
if len(errors) > 0 {
|
if len(errors) > 0 {
|
||||||
if len(m) == 0 {
|
if len(tbfw.m) == 0 {
|
||||||
// Return only the first error, since it has no sense in returning all errors.
|
// Return only the first error, since it has no sense in returning all errors.
|
||||||
putTmpBlocksFile(tbf)
|
putTmpBlocksFile(tbfw.tbf)
|
||||||
return nil, true, fmt.Errorf("error occured during search: %s", errors[0])
|
return nil, true, fmt.Errorf("error occured during search: %s", errors[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -763,20 +766,20 @@ func ProcessSearchQuery(at *auth.Token, sq *storage.SearchQuery, fetchData bool,
|
||||||
partialSearchResults.Inc()
|
partialSearchResults.Inc()
|
||||||
isPartialResult = true
|
isPartialResult = true
|
||||||
}
|
}
|
||||||
if err := tbf.Finalize(); err != nil {
|
if err := tbfw.tbf.Finalize(); err != nil {
|
||||||
putTmpBlocksFile(tbf)
|
putTmpBlocksFile(tbfw.tbf)
|
||||||
return nil, false, fmt.Errorf("cannot finalize temporary blocks file with %d blocks: %s", blocksRead, err)
|
return nil, false, fmt.Errorf("cannot finalize temporary blocks file with %d time series: %s", len(tbfw.m), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var rss Results
|
var rss Results
|
||||||
rss.packedTimeseries = make([]packedTimeseries, len(m))
|
rss.packedTimeseries = make([]packedTimeseries, len(tbfw.m))
|
||||||
rss.at = at
|
rss.at = at
|
||||||
rss.tr = tr
|
rss.tr = tr
|
||||||
rss.fetchData = fetchData
|
rss.fetchData = fetchData
|
||||||
rss.deadline = deadline
|
rss.deadline = deadline
|
||||||
rss.tbf = tbf
|
rss.tbf = tbfw.tbf
|
||||||
i := 0
|
i := 0
|
||||||
for metricName, addrs := range m {
|
for metricName, addrs := range tbfw.m {
|
||||||
pts := &rss.packedTimeseries[i]
|
pts := &rss.packedTimeseries[i]
|
||||||
i++
|
i++
|
||||||
pts.metricName = metricName
|
pts.metricName = metricName
|
||||||
|
@ -935,24 +938,23 @@ func (sn *storageNode) getSeriesCount(accountID, projectID uint32, deadline Dead
|
||||||
return n, nil
|
return n, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sn *storageNode) processSearchQuery(requestData []byte, tr storage.TimeRange, fetchData bool, deadline Deadline) ([]*storage.MetricBlock, error) {
|
func (sn *storageNode) processSearchQuery(tbfw *tmpBlocksFileWrapper, requestData []byte, tr storage.TimeRange, fetchData bool, deadline Deadline) error {
|
||||||
var results []*storage.MetricBlock
|
var blocksRead int
|
||||||
f := func(bc *handshake.BufferedConn) error {
|
f := func(bc *handshake.BufferedConn) error {
|
||||||
rs, err := sn.processSearchQueryOnConn(bc, requestData, tr, fetchData)
|
n, err := sn.processSearchQueryOnConn(tbfw, bc, requestData, tr, fetchData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
results = rs
|
blocksRead = n
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if err := sn.execOnConn("search_v3", f, deadline); err != nil {
|
if err := sn.execOnConn("search_v3", f, deadline); err != nil && blocksRead == 0 {
|
||||||
// Try again before giving up.
|
// Try again before giving up if zero blocks read on the previous attempt.
|
||||||
results = nil
|
|
||||||
if err = sn.execOnConn("search_v3", f, deadline); err != nil {
|
if err = sn.execOnConn("search_v3", f, deadline); err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return results, nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sn *storageNode) execOnConn(rpcName string, f func(bc *handshake.BufferedConn) error, deadline Deadline) error {
|
func (sn *storageNode) execOnConn(rpcName string, f func(bc *handshake.BufferedConn) error, deadline Deadline) error {
|
||||||
|
@ -1201,16 +1203,16 @@ const maxMetricBlockSize = 1024 * 1024
|
||||||
// from vmstorage.
|
// from vmstorage.
|
||||||
const maxErrorMessageSize = 64 * 1024
|
const maxErrorMessageSize = 64 * 1024
|
||||||
|
|
||||||
func (sn *storageNode) processSearchQueryOnConn(bc *handshake.BufferedConn, requestData []byte, tr storage.TimeRange, fetchData bool) ([]*storage.MetricBlock, error) {
|
func (sn *storageNode) processSearchQueryOnConn(tbfw *tmpBlocksFileWrapper, bc *handshake.BufferedConn, requestData []byte, tr storage.TimeRange, fetchData bool) (int, error) {
|
||||||
// Send the request to sn.
|
// Send the request to sn.
|
||||||
if err := writeBytes(bc, requestData); err != nil {
|
if err := writeBytes(bc, requestData); err != nil {
|
||||||
return nil, fmt.Errorf("cannot write requestData: %s", err)
|
return 0, fmt.Errorf("cannot write requestData: %s", err)
|
||||||
}
|
}
|
||||||
if err := writeBool(bc, fetchData); err != nil {
|
if err := writeBool(bc, fetchData); err != nil {
|
||||||
return nil, fmt.Errorf("cannot write fetchData=%v: %s", fetchData, err)
|
return 0, fmt.Errorf("cannot write fetchData=%v: %s", fetchData, err)
|
||||||
}
|
}
|
||||||
if err := bc.Flush(); err != nil {
|
if err := bc.Flush(); err != nil {
|
||||||
return nil, fmt.Errorf("cannot flush requestData to conn: %s", err)
|
return 0, fmt.Errorf("cannot flush requestData to conn: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
|
@ -1219,37 +1221,38 @@ func (sn *storageNode) processSearchQueryOnConn(bc *handshake.BufferedConn, requ
|
||||||
// Read response error.
|
// Read response error.
|
||||||
buf, err = readBytes(buf[:0], bc, maxErrorMessageSize)
|
buf, err = readBytes(buf[:0], bc, maxErrorMessageSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("cannot read error message: %s", err)
|
return 0, fmt.Errorf("cannot read error message: %s", err)
|
||||||
}
|
}
|
||||||
if len(buf) > 0 {
|
if len(buf) > 0 {
|
||||||
return nil, &errRemote{msg: string(buf)}
|
return 0, &errRemote{msg: string(buf)}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read response. It may consist of multiple MetricBlocks.
|
// Read response. It may consist of multiple MetricBlocks.
|
||||||
var results []*storage.MetricBlock
|
blocksRead := 0
|
||||||
metricBlocksRead := 0
|
|
||||||
for {
|
for {
|
||||||
buf, err = readBytes(buf[:0], bc, maxMetricBlockSize)
|
buf, err = readBytes(buf[:0], bc, maxMetricBlockSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("cannot read MetricBlock #%d: %s", metricBlocksRead, err)
|
return blocksRead, fmt.Errorf("cannot read MetricBlock #%d: %s", blocksRead, err)
|
||||||
}
|
}
|
||||||
if len(buf) == 0 {
|
if len(buf) == 0 {
|
||||||
// Reached the end of the response
|
// Reached the end of the response
|
||||||
return results, nil
|
return blocksRead, nil
|
||||||
}
|
}
|
||||||
var mb storage.MetricBlock
|
var mb storage.MetricBlock
|
||||||
mb.Block = &storage.Block{}
|
mb.Block = &storage.Block{}
|
||||||
tail, err := mb.Unmarshal(buf)
|
tail, err := mb.Unmarshal(buf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("cannot unmarshal MetricBlock: %s", err)
|
return blocksRead, fmt.Errorf("cannot unmarshal MetricBlock #%d: %s", blocksRead, err)
|
||||||
}
|
}
|
||||||
if len(tail) != 0 {
|
if len(tail) != 0 {
|
||||||
return nil, fmt.Errorf("non-empty tail after unmarshaling MetricBlock: (len=%d) %q", len(tail), tail)
|
return blocksRead, fmt.Errorf("non-empty tail after unmarshaling MetricBlock #%d: (len=%d) %q", blocksRead, len(tail), tail)
|
||||||
}
|
}
|
||||||
metricBlocksRead++
|
blocksRead++
|
||||||
sn.metricBlocksRead.Inc()
|
sn.metricBlocksRead.Inc()
|
||||||
sn.metricRowsRead.Add(mb.Block.RowsCount())
|
sn.metricRowsRead.Add(mb.Block.RowsCount())
|
||||||
results = append(results, &mb)
|
if err := tbfw.WriteBlock(&mb); err != nil {
|
||||||
|
return blocksRead, fmt.Errorf("cannot write MetricBlock #%d to temporary blocks file: %s", blocksRead, err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue