From 6427b3c3c068a16b9b787bf4e850bf792f66798e Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sat, 25 May 2024 22:59:13 +0200 Subject: [PATCH] lib/logstorage: work-in-progress --- docs/VictoriaLogs/LogsQL.md | 29 ++++++++++++++++++++++++++++- lib/logstorage/pipe_unroll.go | 7 ++++++- lib/regexutil/regex_test.go | 2 ++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index 74cec09c1..ec36f58ab 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -254,6 +254,7 @@ The list of LogsQL filters: - [Word filter](#word-filter) - matches logs with the given [word](#word) - [Phrase filter](#phrase-filter) - matches logs with the given phrase - [Prefix filter](#prefix-filter) - matches logs with the given word prefix or phrase prefix +- [Substring filter](#substring-filter) - matches logs with the given substring - [Empty value filter](#empty-value-filter) - matches logs without the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) - [Any value filter](#any-value-filter) - matches logs with the given non-empty [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) - [Exact filter](#exact-filter) - matches logs with the exact value @@ -490,7 +491,7 @@ This query matches the following [log messages](https://docs.victoriametrics.com This query doesn't match the following log messages: - `Error: foobar`, since the `Error` [word](#word) starts with capital letter. Use `i(err*)` for this case. See [these docs](#case-insensitive-filter) for details. -- `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `~"err"` for this case. See [these docs](#regexp-filter) for details. +- `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `~"err"` for this case. See [these docs](#substring-filter) for details. Prefix filter can be applied to [phrases](#phrase-filter). For example, the following query matches [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) containing phrases with `unexpected fail` prefix: @@ -549,6 +550,32 @@ See also: - [Logical filter](#logical-filter) +### Substring filter + +If it is needed to find logs with some substring, then `~"substring"` filter can be used. For example, the following query matches log entries, +which contain `ampl` text in the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field): + +```logsql +~"ampl" +``` + +It matches the following messages: + +- `Example message` +- `This is a sample` + +It doesn't match `EXAMPLE message`, since `AMPL` substring here is in uppercase. Use `~"(?i)ampl"` filter instead. Note that case-insensitive filter +may be much slower than case-sensitive one. + +Performance tip: prefer using [word filter](#word-filter) and [phrase filter](#phrase-filter), since substring filter may be quite slow. + +See also: + +- [Word filter](#word-filter) +- [Phrase filter](#phrase-filter) +- [Regexp filter](#regexp-filter) + + ### Empty value filter Sometimes it is needed to find log entries without the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). diff --git a/lib/logstorage/pipe_unroll.go b/lib/logstorage/pipe_unroll.go index e1e4e3be9..180b5687b 100644 --- a/lib/logstorage/pipe_unroll.go +++ b/lib/logstorage/pipe_unroll.go @@ -74,9 +74,10 @@ func (pu *pipeUnroll) updateNeededFields(neededFields, unneededFields fieldsSet) } } -func (pu *pipeUnroll) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor { +func (pu *pipeUnroll) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor { return &pipeUnrollProcessor{ pu: pu, + stopCh: stopCh, ppNext: ppNext, shards: make([]pipeUnrollProcessorShard, workersCount), @@ -85,6 +86,7 @@ func (pu *pipeUnroll) newPipeProcessor(workersCount int, _ <-chan struct{}, _ fu type pipeUnrollProcessor struct { pu *pipeUnroll + stopCh <-chan struct{} ppNext pipeProcessor shards []pipeUnrollProcessorShard @@ -139,6 +141,9 @@ func (pup *pipeUnrollProcessor) writeBlock(workerID uint, br *blockResult) { fields := shard.fields for rowIdx := range br.timestamps { if bm.isSetBit(rowIdx) { + if needStop(pup.stopCh) { + return + } shard.writeUnrolledFields(br, pu.fields, columnValues, rowIdx) } else { fields = fields[:0] diff --git a/lib/regexutil/regex_test.go b/lib/regexutil/regex_test.go index 2fedc2d81..a7208f962 100644 --- a/lib/regexutil/regex_test.go +++ b/lib/regexutil/regex_test.go @@ -165,6 +165,8 @@ func TestGetLiterals(t *testing.T) { f("foo.*bar(a|b)baz.+", []string{"foo", "bar", "baz"}) f("(foo[ab](?:bar))", []string{"foo", "bar"}) f("foo|bar", nil) + f("(?i)foo", nil) + f("foo((?i)bar)baz", []string{"foo", "baz"}) f("((foo|bar)baz xxx(?:yzabc))", []string{"baz xxxyzabc"}) f("((foo|bar)baz xxx(?:yzabc)*)", []string{"baz xxx"}) f("((foo|bar)baz? xxx(?:yzabc)*)", []string{"ba", " xxx"})