lib/logstorage: work-in-progress

This commit is contained in:
Aliaksandr Valialkin 2024-05-25 22:59:13 +02:00
parent 9edbeca46b
commit 6427b3c3c0
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
3 changed files with 36 additions and 2 deletions

View file

@ -254,6 +254,7 @@ The list of LogsQL filters:
- [Word filter](#word-filter) - matches logs with the given [word](#word) - [Word filter](#word-filter) - matches logs with the given [word](#word)
- [Phrase filter](#phrase-filter) - matches logs with the given phrase - [Phrase filter](#phrase-filter) - matches logs with the given phrase
- [Prefix filter](#prefix-filter) - matches logs with the given word prefix or phrase prefix - [Prefix filter](#prefix-filter) - matches logs with the given word prefix or phrase prefix
- [Substring filter](#substring-filter) - matches logs with the given substring
- [Empty value filter](#empty-value-filter) - matches logs without the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) - [Empty value filter](#empty-value-filter) - matches logs without the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
- [Any value filter](#any-value-filter) - matches logs with the given non-empty [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) - [Any value filter](#any-value-filter) - matches logs with the given non-empty [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
- [Exact filter](#exact-filter) - matches logs with the exact value - [Exact filter](#exact-filter) - matches logs with the exact value
@ -490,7 +491,7 @@ This query matches the following [log messages](https://docs.victoriametrics.com
This query doesn't match the following log messages: This query doesn't match the following log messages:
- `Error: foobar`, since the `Error` [word](#word) starts with capital letter. Use `i(err*)` for this case. See [these docs](#case-insensitive-filter) for details. - `Error: foobar`, since the `Error` [word](#word) starts with capital letter. Use `i(err*)` for this case. See [these docs](#case-insensitive-filter) for details.
- `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `~"err"` for this case. See [these docs](#regexp-filter) for details. - `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `~"err"` for this case. See [these docs](#substring-filter) for details.
Prefix filter can be applied to [phrases](#phrase-filter). For example, the following query matches Prefix filter can be applied to [phrases](#phrase-filter). For example, the following query matches
[log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) containing phrases with `unexpected fail` prefix: [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) containing phrases with `unexpected fail` prefix:
@ -549,6 +550,32 @@ See also:
- [Logical filter](#logical-filter) - [Logical filter](#logical-filter)
### Substring filter
If it is needed to find logs with some substring, then `~"substring"` filter can be used. For example, the following query matches log entries,
which contain `ampl` text in the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
```logsql
~"ampl"
```
It matches the following messages:
- `Example message`
- `This is a sample`
It doesn't match `EXAMPLE message`, since `AMPL` substring here is in uppercase. Use `~"(?i)ampl"` filter instead. Note that case-insensitive filter
may be much slower than case-sensitive one.
Performance tip: prefer using [word filter](#word-filter) and [phrase filter](#phrase-filter), since substring filter may be quite slow.
See also:
- [Word filter](#word-filter)
- [Phrase filter](#phrase-filter)
- [Regexp filter](#regexp-filter)
### Empty value filter ### Empty value filter
Sometimes it is needed to find log entries without the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Sometimes it is needed to find log entries without the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).

View file

@ -74,9 +74,10 @@ func (pu *pipeUnroll) updateNeededFields(neededFields, unneededFields fieldsSet)
} }
} }
func (pu *pipeUnroll) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor { func (pu *pipeUnroll) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
return &pipeUnrollProcessor{ return &pipeUnrollProcessor{
pu: pu, pu: pu,
stopCh: stopCh,
ppNext: ppNext, ppNext: ppNext,
shards: make([]pipeUnrollProcessorShard, workersCount), shards: make([]pipeUnrollProcessorShard, workersCount),
@ -85,6 +86,7 @@ func (pu *pipeUnroll) newPipeProcessor(workersCount int, _ <-chan struct{}, _ fu
type pipeUnrollProcessor struct { type pipeUnrollProcessor struct {
pu *pipeUnroll pu *pipeUnroll
stopCh <-chan struct{}
ppNext pipeProcessor ppNext pipeProcessor
shards []pipeUnrollProcessorShard shards []pipeUnrollProcessorShard
@ -139,6 +141,9 @@ func (pup *pipeUnrollProcessor) writeBlock(workerID uint, br *blockResult) {
fields := shard.fields fields := shard.fields
for rowIdx := range br.timestamps { for rowIdx := range br.timestamps {
if bm.isSetBit(rowIdx) { if bm.isSetBit(rowIdx) {
if needStop(pup.stopCh) {
return
}
shard.writeUnrolledFields(br, pu.fields, columnValues, rowIdx) shard.writeUnrolledFields(br, pu.fields, columnValues, rowIdx)
} else { } else {
fields = fields[:0] fields = fields[:0]

View file

@ -165,6 +165,8 @@ func TestGetLiterals(t *testing.T) {
f("foo.*bar(a|b)baz.+", []string{"foo", "bar", "baz"}) f("foo.*bar(a|b)baz.+", []string{"foo", "bar", "baz"})
f("(foo[ab](?:bar))", []string{"foo", "bar"}) f("(foo[ab](?:bar))", []string{"foo", "bar"})
f("foo|bar", nil) f("foo|bar", nil)
f("(?i)foo", nil)
f("foo((?i)bar)baz", []string{"foo", "baz"})
f("((foo|bar)baz xxx(?:yzabc))", []string{"baz xxxyzabc"}) f("((foo|bar)baz xxx(?:yzabc))", []string{"baz xxxyzabc"})
f("((foo|bar)baz xxx(?:yzabc)*)", []string{"baz xxx"}) f("((foo|bar)baz xxx(?:yzabc)*)", []string{"baz xxx"})
f("((foo|bar)baz? xxx(?:yzabc)*)", []string{"ba", " xxx"}) f("((foo|bar)baz? xxx(?:yzabc)*)", []string{"ba", " xxx"})