VictoriaMetrics/lib/logstorage/storage_search.go

package logstorage

import (
	"math"
	"sort"
	"sync"

	"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
)

// genericSearchOptions contain options used for search.
type genericSearchOptions struct {
	// tenantIDs must contain the list of tenantIDs for the search.
	tenantIDs []TenantID

	// filter is the filter to use for the search
	filter filter

	// resultColumnNames is names of columns to return in the result.
	resultColumnNames []string
}

type searchOptions struct {
	// Optional sorted list of tenantIDs for the search.
	// If it is empty, then the search is performed by streamIDs
	tenantIDs []TenantID

	// Optional sorted list of streamIDs for the search.
	// If it is empty, then the search is performed by tenantIDs
	streamIDs []streamID

	// minTimestamp is the minimum timestamp for the search
	minTimestamp int64

	// maxTimestamp is the maximum timestamp for the search
	maxTimestamp int64

	// filter is the filter to use for the search
	filter filter

	// resultColumnNames is names of columns to return in the result
	resultColumnNames []string
}

// RunQuery runs the given q and calls processBlock for results
func (s *Storage) RunQuery(tenantIDs []TenantID, q *Query, stopCh <-chan struct{}, processBlock func(columns []BlockColumn)) {
	resultColumnNames := q.getResultColumnNames()
	so := &genericSearchOptions{
		tenantIDs:         tenantIDs,
		filter:            q.f,
		resultColumnNames: resultColumnNames,
	}
	workersCount := cgroup.AvailableCPUs()
	s.search(workersCount, so, stopCh, func(_ uint, br *blockResult) {
		brs := getBlockRows()
		cs := brs.cs

		for i, columnName := range resultColumnNames {
			cs = append(cs, BlockColumn{
				Name:   columnName,
				Values: br.getColumnValues(i),
			})
		}
		processBlock(cs)

		brs.cs = cs
		putBlockRows(brs)
	})
}

type blockRows struct {
	cs []BlockColumn
}

func (brs *blockRows) reset() {
	cs := brs.cs
	for i := range cs {
		cs[i].reset()
	}
	brs.cs = cs[:0]
}

func getBlockRows() *blockRows {
	v := blockRowsPool.Get()
	if v == nil {
		return &blockRows{}
	}
	return v.(*blockRows)
}

func putBlockRows(brs *blockRows) {
	brs.reset()
	blockRowsPool.Put(brs)
}

var blockRowsPool sync.Pool

// BlockColumn is a single column of a block of data
type BlockColumn struct {
	// Name is the column name
	Name string

	// Values is column values
	Values []string
}

func (c *BlockColumn) reset() {
	c.Name = ""
	c.Values = nil
}

// The number of blocks to search at once by a single worker
//
// This number must be increased on systems with many CPU cores in order to amortize
// the overhead for passing the blockSearchWork to worker goroutines.
const blockSearchWorksPerBatch = 64

// searchResultFunc must process sr.
//
// The callback is called at the worker with the given workerID.
type searchResultFunc func(workerID uint, br *blockResult)

// search searches for the matching rows according to so.
//
// It calls f for each found matching block.
func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-chan struct{}, processBlockResult searchResultFunc) {
	// Spin up workers
	var wg sync.WaitGroup
	workCh := make(chan []*blockSearchWork, workersCount)
	wg.Add(workersCount)
	for i := 0; i < workersCount; i++ {
		go func(workerID uint) {
			bs := getBlockSearch()
			for bsws := range workCh {
				for _, bsw := range bsws {
					bs.search(bsw)
					if bs.br.RowsCount() > 0 {
						processBlockResult(workerID, &bs.br)
					}
				}
			}
			putBlockSearch(bs)
			wg.Done()
		}(uint(i))
	}

	// Obtain common time filter from so.filter
	tf, f := getCommonTimeFilter(so.filter)

	// Select partitions according to the selected time range
	s.partitionsLock.Lock()
	ptws := s.partitions
	minDay := tf.minTimestamp / nsecPerDay
	n := sort.Search(len(ptws), func(i int) bool {
		return ptws[i].day >= minDay
	})
	ptws = ptws[n:]
	maxDay := tf.maxTimestamp / nsecPerDay
	n = sort.Search(len(ptws), func(i int) bool {
		return ptws[i].day > maxDay
	})
	ptws = ptws[:n]
	for _, ptw := range ptws {
		ptw.incRef()
	}
	s.partitionsLock.Unlock()

	// Obtain common streamFilter from f
	var sf *StreamFilter
	sf, f = getCommonStreamFilter(f)

	// Apply search to matching partitions
	var pws []*partWrapper
	for _, ptw := range ptws {
		pws = ptw.pt.search(pws, tf, sf, f, so, workCh, stopCh)
	}

	// Wait until workers finish their work
	close(workCh)
	wg.Wait()

	// Decrement references to parts
	for _, pw := range pws {
		pw.decRef()
	}

	// Decrement references to partitions
	for _, ptw := range ptws {
		ptw.decRef()
	}
}

func (pt *partition) search(pwsDst []*partWrapper, tf *timeFilter, sf *StreamFilter, f filter, so *genericSearchOptions,
	workCh chan<- []*blockSearchWork, stopCh <-chan struct{},
) []*partWrapper {
	tenantIDs := so.tenantIDs
	var streamIDs []streamID
	if sf != nil {
		streamIDs = pt.idb.searchStreamIDs(tenantIDs, sf)
		tenantIDs = nil
	}
	if hasStreamFilters(f) {
		f = initStreamFilters(tenantIDs, pt.idb, f)
	}
	soInternal := &searchOptions{
		tenantIDs:         tenantIDs,
		streamIDs:         streamIDs,
		minTimestamp:      tf.minTimestamp,
		maxTimestamp:      tf.maxTimestamp,
		filter:            f,
		resultColumnNames: so.resultColumnNames,
	}
	return pt.ddb.search(pwsDst, soInternal, workCh, stopCh)
}

func hasStreamFilters(f filter) bool {
	switch t := f.(type) {
	case *andFilter:
		return hasStreamFiltersInList(t.filters)
	case *orFilter:
		return hasStreamFiltersInList(t.filters)
	case *notFilter:
		return hasStreamFilters(t.f)
	case *streamFilter:
		return true
	default:
		return false
	}
}

func hasStreamFiltersInList(filters []filter) bool {
	for _, f := range filters {
		if hasStreamFilters(f) {
			return true
		}
	}
	return false
}

func initStreamFilters(tenantIDs []TenantID, idb *indexdb, f filter) filter {
	switch t := f.(type) {
	case *andFilter:
		return &andFilter{
			filters: initStreamFiltersList(tenantIDs, idb, t.filters),
		}
	case *orFilter:
		return &orFilter{
			filters: initStreamFiltersList(tenantIDs, idb, t.filters),
		}
	case *notFilter:
		return &notFilter{
			f: initStreamFilters(tenantIDs, idb, t.f),
		}
	case *streamFilter:
		return &streamFilter{
			f:         t.f,
			tenantIDs: tenantIDs,
			idb:       idb,
		}
	default:
		return t
	}
}

func initStreamFiltersList(tenantIDs []TenantID, idb *indexdb, filters []filter) []filter {
	result := make([]filter, len(filters))
	for i, f := range filters {
		result[i] = initStreamFilters(tenantIDs, idb, f)
	}
	return result
}

func (ddb *datadb) search(pwsDst []*partWrapper, so *searchOptions, workCh chan<- []*blockSearchWork, stopCh <-chan struct{}) []*partWrapper {
	// Select parts with data for the given time range
	ddb.partsLock.Lock()
	pwsDstLen := len(pwsDst)
	pwsDst = appendPartsInTimeRange(pwsDst, ddb.inmemoryParts, so.minTimestamp, so.maxTimestamp)
	pwsDst = appendPartsInTimeRange(pwsDst, ddb.fileParts, so.minTimestamp, so.maxTimestamp)
	pws := pwsDst[pwsDstLen:]
	for _, pw := range pws {
		pw.incRef()
	}
	ddb.partsLock.Unlock()

	// Apply search to matching parts
	for _, pw := range pws {
		pw.p.search(so, workCh, stopCh)
	}

	return pwsDst
}

func (p *part) search(so *searchOptions, workCh chan<- []*blockSearchWork, stopCh <-chan struct{}) {
	bhss := getBlockHeaders()
	if len(so.tenantIDs) > 0 {
		p.searchByTenantIDs(so, bhss, workCh, stopCh)
	} else {
		p.searchByStreamIDs(so, bhss, workCh, stopCh)
	}
	putBlockHeaders(bhss)
}

func getBlockHeaders() *blockHeaders {
	v := blockHeadersPool.Get()
	if v == nil {
		return &blockHeaders{}
	}
	return v.(*blockHeaders)
}

func putBlockHeaders(bhss *blockHeaders) {
	bhss.reset()
	blockHeadersPool.Put(bhss)
}

var blockHeadersPool sync.Pool

type blockHeaders struct {
	bhs []blockHeader
}

func (bhss *blockHeaders) reset() {
	bhs := bhss.bhs
	for i := range bhs {
		bhs[i].reset()
	}
	bhss.bhs = bhs[:0]
}

func (p *part) searchByTenantIDs(so *searchOptions, bhss *blockHeaders, workCh chan<- []*blockSearchWork, stopCh <-chan struct{}) {
	// it is assumed that tenantIDs are sorted
	tenantIDs := so.tenantIDs

	bsws := make([]*blockSearchWork, 0, blockSearchWorksPerBatch)
	scheduleBlockSearch := func(bh *blockHeader) bool {
		// Do not use pool for blockSearchWork, since it is returned back to the pool
		// at another goroutine, which may run on another CPU core.
		// This means that it will be put into another per-CPU pool, which may result
		// in slowdown related to memory synchronization between CPU cores.
		// This slowdown is increased on systems with bigger number of CPU cores.
		bsw := newBlockSearchWork(p, so, bh)
		bsws = append(bsws, bsw)
		if len(bsws) < cap(bsws) {
			return true
		}
		select {
		case <-stopCh:
			return false
		case workCh <- bsws:
			bsws = make([]*blockSearchWork, 0, blockSearchWorksPerBatch)
			return true
		}
	}

	// it is assumed that ibhs are sorted
	ibhs := p.indexBlockHeaders
	for len(ibhs) > 0 && len(tenantIDs) > 0 {
		select {
		case <-stopCh:
			return
		default:
		}

		// locate tenantID equal or bigger than the tenantID in ibhs[0]
		tenantID := &tenantIDs[0]
		if tenantID.less(&ibhs[0].streamID.tenantID) {
			tenantID = &ibhs[0].streamID.tenantID
			n := sort.Search(len(tenantIDs), func(i int) bool {
				return !tenantIDs[i].less(tenantID)
			})
			if n == len(tenantIDs) {
				tenantIDs = nil
				break
			}
			tenantID = &tenantIDs[n]
			tenantIDs = tenantIDs[n:]
		}

		// locate indexBlockHeader with equal or bigger tenantID than the given tenantID
		n := 0
		if ibhs[0].streamID.tenantID.less(tenantID) {
			n = sort.Search(len(ibhs), func(i int) bool {
				return !ibhs[i].streamID.tenantID.less(tenantID)
			})
			// The end of ibhs[n-1] may contain blocks for the given tenantID, so move it backwards
			n--
		}
		ibh := &ibhs[n]
		ibhs = ibhs[n+1:]

		if so.minTimestamp > ibh.maxTimestamp || so.maxTimestamp < ibh.minTimestamp {
			// Skip the ibh, since it doesn't contain entries on the requested time range
			continue
		}

		bhss.bhs = ibh.mustReadBlockHeaders(bhss.bhs[:0], p)

		bhs := bhss.bhs
		for len(bhs) > 0 {
			// search for blocks with the given tenantID
			n = sort.Search(len(bhs), func(i int) bool {
				return !bhs[i].streamID.tenantID.less(tenantID)
			})
			bhs = bhs[n:]
			for len(bhs) > 0 && bhs[0].streamID.tenantID.equal(tenantID) {
				bh := &bhs[0]
				bhs = bhs[1:]
				th := &bh.timestampsHeader
				if so.minTimestamp > th.maxTimestamp || so.maxTimestamp < th.minTimestamp {
					continue
				}
				if !scheduleBlockSearch(bh) {
					return
				}
			}
			if len(bhs) == 0 {
				break
			}

			// search for the next tenantID, which can potentially match tenantID from bhs[0]
			tenantID = &bhs[0].streamID.tenantID
			n = sort.Search(len(tenantIDs), func(i int) bool {
				return !tenantIDs[i].less(tenantID)
			})
			if n == len(tenantIDs) {
				tenantIDs = nil
				break
			}
			tenantID = &tenantIDs[n]
			tenantIDs = tenantIDs[n:]
		}
	}

	// Flush the remaining work
	if len(bsws) > 0 {
		workCh <- bsws
	}
}

func (p *part) searchByStreamIDs(so *searchOptions, bhss *blockHeaders, workCh chan<- []*blockSearchWork, stopCh <-chan struct{}) {
	// it is assumed that streamIDs are sorted
	streamIDs := so.streamIDs

	bsws := make([]*blockSearchWork, 0, blockSearchWorksPerBatch)
	scheduleBlockSearch := func(bh *blockHeader) bool {
		// Do not use pool for blockSearchWork, since it is returned back to the pool
		// at another goroutine, which may run on another CPU core.
		// This means that it will be put into another per-CPU pool, which may result
		// in slowdown related to memory synchronization between CPU cores.
		// This slowdown is increased on systems with bigger number of CPU cores.
		bsw := newBlockSearchWork(p, so, bh)
		bsws = append(bsws, bsw)
		if len(bsws) < cap(bsws) {
			return true
		}
		select {
		case <-stopCh:
			return false
		case workCh <- bsws:
			bsws = make([]*blockSearchWork, 0, blockSearchWorksPerBatch)
			return true
		}
	}

	// it is assumed that ibhs are sorted
	ibhs := p.indexBlockHeaders

	for len(ibhs) > 0 && len(streamIDs) > 0 {
		select {
		case <-stopCh:
			return
		default:
		}

		// locate streamID equal or bigger than the streamID in ibhs[0]
		streamID := &streamIDs[0]
		if streamID.less(&ibhs[0].streamID) {
			streamID = &ibhs[0].streamID
			n := sort.Search(len(streamIDs), func(i int) bool {
				return !streamIDs[i].less(streamID)
			})
			if n == len(streamIDs) {
				streamIDs = nil
				break
			}
			streamID = &streamIDs[n]
			streamIDs = streamIDs[n:]
		}

		// locate indexBlockHeader with equal or bigger streamID than the given streamID
		n := 0
		if ibhs[0].streamID.less(streamID) {
			n = sort.Search(len(ibhs), func(i int) bool {
				return !ibhs[i].streamID.less(streamID)
			})
			// The end of ibhs[n-1] may contain blocks for the given streamID, so move it backwards.
			n--
		}
		ibh := &ibhs[n]
		ibhs = ibhs[n+1:]

		if so.minTimestamp > ibh.maxTimestamp || so.maxTimestamp < ibh.minTimestamp {
			// Skip the ibh, since it doesn't contain entries on the requested time range
			continue
		}

		bhss.bhs = ibh.mustReadBlockHeaders(bhss.bhs[:0], p)

		bhs := bhss.bhs
		for len(bhs) > 0 {
			// search for blocks with the given streamID
			n = sort.Search(len(bhs), func(i int) bool {
				return !bhs[i].streamID.less(streamID)
			})
			bhs = bhs[n:]
			for len(bhs) > 0 && bhs[0].streamID.equal(streamID) {
				bh := &bhs[0]
				bhs = bhs[1:]
				th := &bh.timestampsHeader
				if so.minTimestamp > th.maxTimestamp || so.maxTimestamp < th.minTimestamp {
					continue
				}
				if !scheduleBlockSearch(bh) {
					return
				}
			}
			if len(bhs) == 0 {
				break
			}

			// search for the next streamID, which can potentially match streamID from bhs[0]
			streamID = &bhs[0].streamID
			n = sort.Search(len(streamIDs), func(i int) bool {
				return !streamIDs[i].less(streamID)
			})
			if n == len(streamIDs) {
				streamIDs = nil
				break
			}
			streamID = &streamIDs[n]
			streamIDs = streamIDs[n:]
		}
	}

	// Flush the remaining work
	if len(bsws) > 0 {
		workCh <- bsws
	}
}

func appendPartsInTimeRange(dst, src []*partWrapper, minTimestamp, maxTimestamp int64) []*partWrapper {
	for _, pw := range src {
		if maxTimestamp < pw.p.ph.MinTimestamp || minTimestamp > pw.p.ph.MaxTimestamp {
			continue
		}
		dst = append(dst, pw)
	}
	return dst
}

func getCommonStreamFilter(f filter) (*StreamFilter, filter) {
	switch t := f.(type) {
	case *andFilter:
		filters := t.filters
		for i, filter := range filters {
			sf, ok := filter.(*streamFilter)
			if ok && !sf.f.isEmpty() {
				// Remove sf from filters, since it doesn't filter out anything then.
				af := &andFilter{
					filters: append(filters[:i:i], filters[i+1:]...),
				}
				return sf.f, af
			}
		}
	case *streamFilter:
		return t.f, &noopFilter{}
	}
	return nil, f
}

func getCommonTimeFilter(f filter) (*timeFilter, filter) {
	switch t := f.(type) {
	case *andFilter:
		for _, filter := range t.filters {
			tf, ok := filter.(*timeFilter)
			if ok {
				// The tf must remain in af in order to properly filter out rows outside the selected time range
				return tf, f
			}
		}
	case *timeFilter:
		return t, f
	}
	return allTimeFilter, f
}

var allTimeFilter = &timeFilter{
	minTimestamp: math.MinInt64,
	maxTimestamp: math.MaxInt64,
}
app/victoria-logs: initial code release 2023-06-20 05:55:12 +00:00			`package logstorage`

			`import (`
			`"math"`
			`"sort"`
			`"sync"`

			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"`
			`)`

			`// genericSearchOptions contain options used for search.`
			`type genericSearchOptions struct {`
			`// tenantIDs must contain the list of tenantIDs for the search.`
			`tenantIDs []TenantID`

			`// filter is the filter to use for the search`
			`filter filter`

			`// resultColumnNames is names of columns to return in the result.`
			`resultColumnNames []string`
			`}`

			`type searchOptions struct {`
			`// Optional sorted list of tenantIDs for the search.`
			`// If it is empty, then the search is performed by streamIDs`
			`tenantIDs []TenantID`

			`// Optional sorted list of streamIDs for the search.`
			`// If it is empty, then the search is performed by tenantIDs`
			`streamIDs []streamID`

			`// minTimestamp is the minimum timestamp for the search`
			`minTimestamp int64`

			`// maxTimestamp is the maximum timestamp for the search`
			`maxTimestamp int64`

			`// filter is the filter to use for the search`
			`filter filter`

			`// resultColumnNames is names of columns to return in the result`
			`resultColumnNames []string`
			`}`

			`// RunQuery runs the given q and calls processBlock for results`
app/vlselect: follow-up for 451d2abf50171dbdea844b40f14e0205a173d37a - Consistently return the first `limit` log entries if the total size of found log entries doesn't exceed 1Mb. See app/vlselect/logsql/sort_writer.go . Previously random log entries could be returned with each request. - Document the change at docs/VictoriaLogs/CHANGELOG.md - Document the `limit` query arg at docs/VictoriaLogs/querying/README.md - Make the change less intrusive. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5674 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5778 2024-02-18 21:01:34 +00:00			`func (s Storage) RunQuery(tenantIDs []TenantID, q Query, stopCh <-chan struct{}, processBlock func(columns []BlockColumn)) {`
app/victoria-logs: initial code release 2023-06-20 05:55:12 +00:00			`resultColumnNames := q.getResultColumnNames()`
			`so := &genericSearchOptions{`
			`tenantIDs: tenantIDs,`
			`filter: q.f,`
			`resultColumnNames: resultColumnNames,`
			`}`
			`workersCount := cgroup.AvailableCPUs()`
all: fix golangci-lint(revive) warnings after 0c0ed61ce786d220b9fdd351385e81d9d3e4185e Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6001 2024-04-02 20:16:24 +00:00			`s.search(workersCount, so, stopCh, func(_ uint, br *blockResult) {`
app/victoria-logs: initial code release 2023-06-20 05:55:12 +00:00			`brs := getBlockRows()`
			`cs := brs.cs`

			`for i, columnName := range resultColumnNames {`
			`cs = append(cs, BlockColumn{`
			`Name: columnName,`
			`Values: br.getColumnValues(i),`
			`})`
			`}`
app/vlselect: follow-up for 451d2abf50171dbdea844b40f14e0205a173d37a - Consistently return the first `limit` log entries if the total size of found log entries doesn't exceed 1Mb. See app/vlselect/logsql/sort_writer.go . Previously random log entries could be returned with each request. - Document the change at docs/VictoriaLogs/CHANGELOG.md - Document the `limit` query arg at docs/VictoriaLogs/querying/README.md - Make the change less intrusive. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5674 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5778 2024-02-18 21:01:34 +00:00			`processBlock(cs)`
app/victoria-logs: initial code release 2023-06-20 05:55:12 +00:00
			`brs.cs = cs`
			`putBlockRows(brs)`
			`})`
			`}`

			`type blockRows struct {`
			`cs []BlockColumn`
			`}`

			`func (brs *blockRows) reset() {`
			`cs := brs.cs`
			`for i := range cs {`
			`cs[i].reset()`
			`}`
			`brs.cs = cs[:0]`
			`}`

			`func getBlockRows() *blockRows {`
			`v := blockRowsPool.Get()`
			`if v == nil {`
			`return &blockRows{}`
			`}`
			`return v.(*blockRows)`
			`}`

			`func putBlockRows(brs *blockRows) {`
			`brs.reset()`
			`blockRowsPool.Put(brs)`
			`}`

			`var blockRowsPool sync.Pool`

			`// BlockColumn is a single column of a block of data`
			`type BlockColumn struct {`
			`// Name is the column name`
			`Name string`

			`// Values is column values`
			`Values []string`
			`}`

			`func (c *BlockColumn) reset() {`
			`c.Name = ""`
			`c.Values = nil`
			`}`

			`// The number of blocks to search at once by a single worker`
			`//`
			`// This number must be increased on systems with many CPU cores in order to amortize`
			`// the overhead for passing the blockSearchWork to worker goroutines.`
			`const blockSearchWorksPerBatch = 64`

			`// searchResultFunc must process sr.`
			`//`
			`// The callback is called at the worker with the given workerID.`
app/vlselect: follow-up for 451d2abf50171dbdea844b40f14e0205a173d37a - Consistently return the first `limit` log entries if the total size of found log entries doesn't exceed 1Mb. See app/vlselect/logsql/sort_writer.go . Previously random log entries could be returned with each request. - Document the change at docs/VictoriaLogs/CHANGELOG.md - Document the `limit` query arg at docs/VictoriaLogs/querying/README.md - Make the change less intrusive. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5674 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5778 2024-02-18 21:01:34 +00:00			`type searchResultFunc func(workerID uint, br *blockResult)`
app/victoria-logs: initial code release 2023-06-20 05:55:12 +00:00
			`// search searches for the matching rows according to so.`
			`//`
			`// It calls f for each found matching block.`
			`func (s Storage) search(workersCount int, so genericSearchOptions, stopCh <-chan struct{}, processBlockResult searchResultFunc) {`
			`// Spin up workers`
			`var wg sync.WaitGroup`
			`workCh := make(chan []*blockSearchWork, workersCount)`
			`wg.Add(workersCount)`
			`for i := 0; i < workersCount; i++ {`
			`go func(workerID uint) {`
			`bs := getBlockSearch()`
app/vlselect: follow-up for 451d2abf50171dbdea844b40f14e0205a173d37a - Consistently return the first `limit` log entries if the total size of found log entries doesn't exceed 1Mb. See app/vlselect/logsql/sort_writer.go . Previously random log entries could be returned with each request. - Document the change at docs/VictoriaLogs/CHANGELOG.md - Document the `limit` query arg at docs/VictoriaLogs/querying/README.md - Make the change less intrusive. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5674 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5778 2024-02-18 21:01:34 +00:00			`for bsws := range workCh {`
			`for _, bsw := range bsws {`
			`bs.search(bsw)`
			`if bs.br.RowsCount() > 0 {`
			`processBlockResult(workerID, &bs.br)`
app/victoria-logs: initial code release 2023-06-20 05:55:12 +00:00			`}`
			`}`
			`}`
app/vlselect: follow-up for 451d2abf50171dbdea844b40f14e0205a173d37a - Consistently return the first `limit` log entries if the total size of found log entries doesn't exceed 1Mb. See app/vlselect/logsql/sort_writer.go . Previously random log entries could be returned with each request. - Document the change at docs/VictoriaLogs/CHANGELOG.md - Document the `limit` query arg at docs/VictoriaLogs/querying/README.md - Make the change less intrusive. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5674 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5778 2024-02-18 21:01:34 +00:00			`putBlockSearch(bs)`
			`wg.Done()`
app/victoria-logs: initial code release 2023-06-20 05:55:12 +00:00			`}(uint(i))`
			`}`

			`// Obtain common time filter from so.filter`
			`tf, f := getCommonTimeFilter(so.filter)`

			`// Select partitions according to the selected time range`
			`s.partitionsLock.Lock()`
			`ptws := s.partitions`
			`minDay := tf.minTimestamp / nsecPerDay`
			`n := sort.Search(len(ptws), func(i int) bool {`
			`return ptws[i].day >= minDay`
			`})`
			`ptws = ptws[n:]`
			`maxDay := tf.maxTimestamp / nsecPerDay`
			`n = sort.Search(len(ptws), func(i int) bool {`
			`return ptws[i].day > maxDay`
			`})`
			`ptws = ptws[:n]`
			`for _, ptw := range ptws {`
			`ptw.incRef()`
			`}`
			`s.partitionsLock.Unlock()`

			`// Obtain common streamFilter from f`
			`var sf *StreamFilter`
			`sf, f = getCommonStreamFilter(f)`

			`// Apply search to matching partitions`
			`var pws []*partWrapper`
			`for _, ptw := range ptws {`
			`pws = ptw.pt.search(pws, tf, sf, f, so, workCh, stopCh)`
			`}`

			`// Wait until workers finish their work`
			`close(workCh)`
			`wg.Wait()`

			`// Decrement references to parts`
			`for _, pw := range pws {`
			`pw.decRef()`
			`}`

			`// Decrement references to partitions`
			`for _, ptw := range ptws {`
			`ptw.decRef()`
			`}`
			`}`

			`func (pt partition) search(pwsDst []partWrapper, tf timeFilter, sf StreamFilter, f filter, so *genericSearchOptions,`
			`workCh chan<- []*blockSearchWork, stopCh <-chan struct{},`
			`) []*partWrapper {`
			`tenantIDs := so.tenantIDs`
			`var streamIDs []streamID`
			`if sf != nil {`
			`streamIDs = pt.idb.searchStreamIDs(tenantIDs, sf)`
			`tenantIDs = nil`
			`}`
			`if hasStreamFilters(f) {`
			`f = initStreamFilters(tenantIDs, pt.idb, f)`
			`}`
			`soInternal := &searchOptions{`
			`tenantIDs: tenantIDs,`
			`streamIDs: streamIDs,`
			`minTimestamp: tf.minTimestamp,`
			`maxTimestamp: tf.maxTimestamp,`
			`filter: f,`
			`resultColumnNames: so.resultColumnNames,`
			`}`
			`return pt.ddb.search(pwsDst, soInternal, workCh, stopCh)`
			`}`

			`func hasStreamFilters(f filter) bool {`
			`switch t := f.(type) {`
			`case *andFilter:`
			`return hasStreamFiltersInList(t.filters)`
			`case *orFilter:`
			`return hasStreamFiltersInList(t.filters)`
			`case *notFilter:`
			`return hasStreamFilters(t.f)`
			`case *streamFilter:`
			`return true`
			`default:`
			`return false`
			`}`
			`}`

			`func hasStreamFiltersInList(filters []filter) bool {`
			`for _, f := range filters {`
			`if hasStreamFilters(f) {`
			`return true`
			`}`
			`}`
			`return false`
			`}`

			`func initStreamFilters(tenantIDs []TenantID, idb *indexdb, f filter) filter {`
			`switch t := f.(type) {`
			`case *andFilter:`
			`return &andFilter{`
			`filters: initStreamFiltersList(tenantIDs, idb, t.filters),`
			`}`
			`case *orFilter:`
			`return &orFilter{`
			`filters: initStreamFiltersList(tenantIDs, idb, t.filters),`
			`}`
			`case *notFilter:`
			`return &notFilter{`
			`f: initStreamFilters(tenantIDs, idb, t.f),`
			`}`
			`case *streamFilter:`
			`return &streamFilter{`
			`f: t.f,`
			`tenantIDs: tenantIDs,`
			`idb: idb,`
			`}`
			`default:`
			`return t`
			`}`
			`}`

			`func initStreamFiltersList(tenantIDs []TenantID, idb *indexdb, filters []filter) []filter {`
			`result := make([]filter, len(filters))`
			`for i, f := range filters {`
			`result[i] = initStreamFilters(tenantIDs, idb, f)`
			`}`
			`return result`
			`}`

			`func (ddb datadb) search(pwsDst []partWrapper, so searchOptions, workCh chan<- []blockSearchWork, stopCh <-chan struct{}) []*partWrapper {`
			`// Select parts with data for the given time range`
			`ddb.partsLock.Lock()`
			`pwsDstLen := len(pwsDst)`
			`pwsDst = appendPartsInTimeRange(pwsDst, ddb.inmemoryParts, so.minTimestamp, so.maxTimestamp)`
			`pwsDst = appendPartsInTimeRange(pwsDst, ddb.fileParts, so.minTimestamp, so.maxTimestamp)`
			`pws := pwsDst[pwsDstLen:]`
			`for _, pw := range pws {`
			`pw.incRef()`
			`}`
			`ddb.partsLock.Unlock()`

			`// Apply search to matching parts`
			`for _, pw := range pws {`
			`pw.p.search(so, workCh, stopCh)`
			`}`

			`return pwsDst`
			`}`

			`func (p part) search(so searchOptions, workCh chan<- []*blockSearchWork, stopCh <-chan struct{}) {`
			`bhss := getBlockHeaders()`
			`if len(so.tenantIDs) > 0 {`
			`p.searchByTenantIDs(so, bhss, workCh, stopCh)`
			`} else {`
			`p.searchByStreamIDs(so, bhss, workCh, stopCh)`
			`}`
			`putBlockHeaders(bhss)`
			`}`

			`func getBlockHeaders() *blockHeaders {`
			`v := blockHeadersPool.Get()`
			`if v == nil {`
			`return &blockHeaders{}`
			`}`
			`return v.(*blockHeaders)`
			`}`

			`func putBlockHeaders(bhss *blockHeaders) {`
			`bhss.reset()`
			`blockHeadersPool.Put(bhss)`
			`}`

			`var blockHeadersPool sync.Pool`

			`type blockHeaders struct {`
			`bhs []blockHeader`
			`}`

			`func (bhss *blockHeaders) reset() {`
			`bhs := bhss.bhs`
			`for i := range bhs {`
			`bhs[i].reset()`
			`}`
			`bhss.bhs = bhs[:0]`
			`}`

			`func (p part) searchByTenantIDs(so searchOptions, bhss blockHeaders, workCh chan<- []blockSearchWork, stopCh <-chan struct{}) {`
			`// it is assumed that tenantIDs are sorted`
			`tenantIDs := so.tenantIDs`

			`bsws := make([]*blockSearchWork, 0, blockSearchWorksPerBatch)`
			`scheduleBlockSearch := func(bh *blockHeader) bool {`
			`// Do not use pool for blockSearchWork, since it is returned back to the pool`
			`// at another goroutine, which may run on another CPU core.`
			`// This means that it will be put into another per-CPU pool, which may result`
			`// in slowdown related to memory synchronization between CPU cores.`
			`// This slowdown is increased on systems with bigger number of CPU cores.`
			`bsw := newBlockSearchWork(p, so, bh)`
			`bsws = append(bsws, bsw)`
			`if len(bsws) < cap(bsws) {`
			`return true`
			`}`
			`select {`
			`case <-stopCh:`
			`return false`
			`case workCh <- bsws:`
			`bsws = make([]*blockSearchWork, 0, blockSearchWorksPerBatch)`
			`return true`
			`}`
			`}`

			`// it is assumed that ibhs are sorted`
			`ibhs := p.indexBlockHeaders`
			`for len(ibhs) > 0 && len(tenantIDs) > 0 {`
			`select {`
			`case <-stopCh:`
			`return`
			`default:`
			`}`

			`// locate tenantID equal or bigger than the tenantID in ibhs[0]`
			`tenantID := &tenantIDs[0]`
			`if tenantID.less(&ibhs[0].streamID.tenantID) {`
			`tenantID = &ibhs[0].streamID.tenantID`
			`n := sort.Search(len(tenantIDs), func(i int) bool {`
			`return !tenantIDs[i].less(tenantID)`
			`})`
			`if n == len(tenantIDs) {`
			`tenantIDs = nil`
			`break`
			`}`
			`tenantID = &tenantIDs[n]`
			`tenantIDs = tenantIDs[n:]`
			`}`

			`// locate indexBlockHeader with equal or bigger tenantID than the given tenantID`
			`n := 0`
			`if ibhs[0].streamID.tenantID.less(tenantID) {`
			`n = sort.Search(len(ibhs), func(i int) bool {`
			`return !ibhs[i].streamID.tenantID.less(tenantID)`
			`})`
lib/logstorage: always check the previous indexBlockHeader for blocks with matching tenantID and/or streamID The previous indexBlockHeader may contain blocks for the matching tenantID and/or streamID, so it must be scanned unconditionally during the search. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5295 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4856 This is a follow-up for 89dcbc2fe75f5f563b042e605ffce1dc6bc7be07 2023-11-13 22:10:43 +00:00			`// The end of ibhs[n-1] may contain blocks for the given tenantID, so move it backwards`
			`n--`
app/victoria-logs: initial code release 2023-06-20 05:55:12 +00:00			`}`
			`ibh := &ibhs[n]`
			`ibhs = ibhs[n+1:]`

			`if so.minTimestamp > ibh.maxTimestamp \|\| so.maxTimestamp < ibh.minTimestamp {`
			`// Skip the ibh, since it doesn't contain entries on the requested time range`
			`continue`
			`}`

			`bhss.bhs = ibh.mustReadBlockHeaders(bhss.bhs[:0], p)`

			`bhs := bhss.bhs`
			`for len(bhs) > 0 {`
			`// search for blocks with the given tenantID`
			`n = sort.Search(len(bhs), func(i int) bool {`
			`return !bhs[i].streamID.tenantID.less(tenantID)`
			`})`
			`bhs = bhs[n:]`
			`for len(bhs) > 0 && bhs[0].streamID.tenantID.equal(tenantID) {`
			`bh := &bhs[0]`
			`bhs = bhs[1:]`
			`th := &bh.timestampsHeader`
			`if so.minTimestamp > th.maxTimestamp \|\| so.maxTimestamp < th.minTimestamp {`
			`continue`
			`}`
lib/logstorage: proper exit during block search (#5400) 2024-02-01 12:11:05 +00:00			`if !scheduleBlockSearch(bh) {`
			`return`
			`}`
app/victoria-logs: initial code release 2023-06-20 05:55:12 +00:00			`}`
			`if len(bhs) == 0 {`
			`break`
			`}`

			`// search for the next tenantID, which can potentially match tenantID from bhs[0]`
			`tenantID = &bhs[0].streamID.tenantID`
			`n = sort.Search(len(tenantIDs), func(i int) bool {`
			`return !tenantIDs[i].less(tenantID)`
			`})`
			`if n == len(tenantIDs) {`
			`tenantIDs = nil`
			`break`
			`}`
			`tenantID = &tenantIDs[n]`
			`tenantIDs = tenantIDs[n:]`
			`}`
			`}`

			`// Flush the remaining work`
			`if len(bsws) > 0 {`
			`workCh <- bsws`
			`}`
			`}`

			`func (p part) searchByStreamIDs(so searchOptions, bhss blockHeaders, workCh chan<- []blockSearchWork, stopCh <-chan struct{}) {`
			`// it is assumed that streamIDs are sorted`
			`streamIDs := so.streamIDs`

			`bsws := make([]*blockSearchWork, 0, blockSearchWorksPerBatch)`
			`scheduleBlockSearch := func(bh *blockHeader) bool {`
			`// Do not use pool for blockSearchWork, since it is returned back to the pool`
			`// at another goroutine, which may run on another CPU core.`
			`// This means that it will be put into another per-CPU pool, which may result`
			`// in slowdown related to memory synchronization between CPU cores.`
			`// This slowdown is increased on systems with bigger number of CPU cores.`
			`bsw := newBlockSearchWork(p, so, bh)`
			`bsws = append(bsws, bsw)`
			`if len(bsws) < cap(bsws) {`
			`return true`
			`}`
			`select {`
			`case <-stopCh:`
			`return false`
			`case workCh <- bsws:`
			`bsws = make([]*blockSearchWork, 0, blockSearchWorksPerBatch)`
			`return true`
			`}`
			`}`

			`// it is assumed that ibhs are sorted`
			`ibhs := p.indexBlockHeaders`

			`for len(ibhs) > 0 && len(streamIDs) > 0 {`
			`select {`
			`case <-stopCh:`
			`return`
			`default:`
			`}`

			`// locate streamID equal or bigger than the streamID in ibhs[0]`
			`streamID := &streamIDs[0]`
			`if streamID.less(&ibhs[0].streamID) {`
			`streamID = &ibhs[0].streamID`
			`n := sort.Search(len(streamIDs), func(i int) bool {`
			`return !streamIDs[i].less(streamID)`
			`})`
			`if n == len(streamIDs) {`
			`streamIDs = nil`
			`break`
			`}`
			`streamID = &streamIDs[n]`
			`streamIDs = streamIDs[n:]`
			`}`

			`// locate indexBlockHeader with equal or bigger streamID than the given streamID`
			`n := 0`
			`if ibhs[0].streamID.less(streamID) {`
			`n = sort.Search(len(ibhs), func(i int) bool {`
			`return !ibhs[i].streamID.less(streamID)`
			`})`
lib/logstorage: always check the previous indexBlockHeader for blocks with matching tenantID and/or streamID The previous indexBlockHeader may contain blocks for the matching tenantID and/or streamID, so it must be scanned unconditionally during the search. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5295 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4856 This is a follow-up for 89dcbc2fe75f5f563b042e605ffce1dc6bc7be07 2023-11-13 22:10:43 +00:00			`// The end of ibhs[n-1] may contain blocks for the given streamID, so move it backwards.`
			`n--`
app/victoria-logs: initial code release 2023-06-20 05:55:12 +00:00			`}`
			`ibh := &ibhs[n]`
			`ibhs = ibhs[n+1:]`

			`if so.minTimestamp > ibh.maxTimestamp \|\| so.maxTimestamp < ibh.minTimestamp {`
			`// Skip the ibh, since it doesn't contain entries on the requested time range`
			`continue`
			`}`

			`bhss.bhs = ibh.mustReadBlockHeaders(bhss.bhs[:0], p)`

			`bhs := bhss.bhs`
			`for len(bhs) > 0 {`
			`// search for blocks with the given streamID`
			`n = sort.Search(len(bhs), func(i int) bool {`
			`return !bhs[i].streamID.less(streamID)`
			`})`
			`bhs = bhs[n:]`
			`for len(bhs) > 0 && bhs[0].streamID.equal(streamID) {`
			`bh := &bhs[0]`
			`bhs = bhs[1:]`
			`th := &bh.timestampsHeader`
			`if so.minTimestamp > th.maxTimestamp \|\| so.maxTimestamp < th.minTimestamp {`
			`continue`
			`}`
			`if !scheduleBlockSearch(bh) {`
			`return`
			`}`
			`}`
			`if len(bhs) == 0 {`
			`break`
			`}`

			`// search for the next streamID, which can potentially match streamID from bhs[0]`
			`streamID = &bhs[0].streamID`
			`n = sort.Search(len(streamIDs), func(i int) bool {`
			`return !streamIDs[i].less(streamID)`
			`})`
			`if n == len(streamIDs) {`
			`streamIDs = nil`
			`break`
			`}`
			`streamID = &streamIDs[n]`
			`streamIDs = streamIDs[n:]`
			`}`
			`}`

			`// Flush the remaining work`
			`if len(bsws) > 0 {`
			`workCh <- bsws`
			`}`
			`}`

			`func appendPartsInTimeRange(dst, src []partWrapper, minTimestamp, maxTimestamp int64) []partWrapper {`
			`for _, pw := range src {`
			`if maxTimestamp < pw.p.ph.MinTimestamp \|\| minTimestamp > pw.p.ph.MaxTimestamp {`
			`continue`
			`}`
			`dst = append(dst, pw)`
			`}`
			`return dst`
			`}`

			`func getCommonStreamFilter(f filter) (*StreamFilter, filter) {`
			`switch t := f.(type) {`
			`case *andFilter:`
			`filters := t.filters`
			`for i, filter := range filters {`
			`sf, ok := filter.(*streamFilter)`
			`if ok && !sf.f.isEmpty() {`
			`// Remove sf from filters, since it doesn't filter out anything then.`
			`af := &andFilter{`
			`filters: append(filters[:i:i], filters[i+1:]...),`
			`}`
			`return sf.f, af`
			`}`
			`}`
			`case *streamFilter:`
			`return t.f, &noopFilter{}`
			`}`
			`return nil, f`
			`}`

			`func getCommonTimeFilter(f filter) (*timeFilter, filter) {`
			`switch t := f.(type) {`
			`case *andFilter:`
			`for _, filter := range t.filters {`
			`tf, ok := filter.(*timeFilter)`
			`if ok {`
			`// The tf must remain in af in order to properly filter out rows outside the selected time range`
			`return tf, f`
			`}`
			`}`
			`case *timeFilter:`
			`return t, f`
			`}`
			`return allTimeFilter, f`
			`}`

			`var allTimeFilter = &timeFilter{`
			`minTimestamp: math.MinInt64,`
			`maxTimestamp: math.MaxInt64,`
			`}`