From 7c1955d7c3585a5da20ba3b72a38aca9506bd245 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 9 May 2024 20:14:48 +0200 Subject: [PATCH] wip --- docs/VictoriaLogs/CHANGELOG.md | 1 + lib/logstorage/tokenizer.go | 51 +++------------------------------- 2 files changed, 5 insertions(+), 47 deletions(-) diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 2b7eedbba..65cf35c96 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -28,6 +28,7 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta * FEATURE: allow using `_` inside numbers. For example, `score:range[1_000, 5_000_000]` for [`range` filter](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter). * FEATURE: allow numbers in hexadecimal and binary form. For example, `response_size:range[0xff, 0b10001101101]` for [`range` filter](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter). * FEATURE: allow using duration and byte size suffixes in numeric values inside LogsQL queries. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#numeric-values). +* FEATURE: improve data ingestion performance by up to 50%. * FEATURE: optimize performance for [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/), which contains multiple filters for [words](https://docs.victoriametrics.com/victorialogs/logsql/#word-filter) or [phrases](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter) delimited with [`AND` operator](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter). For example, `foo AND bar` query must find [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with `foo` and `bar` words at faster speed. * BUGFIX: prevent from additional CPU usage for up to a few seconds after canceling the query. diff --git a/lib/logstorage/tokenizer.go b/lib/logstorage/tokenizer.go index 433b3978f..e54bf841a 100644 --- a/lib/logstorage/tokenizer.go +++ b/lib/logstorage/tokenizer.go @@ -1,12 +1,13 @@ package logstorage import ( - "sort" "sync" "unicode" ) // tokenizeStrings extracts word tokens from a, appends them to dst and returns the result. +// +// the order of returned tokens is unspecified. func tokenizeStrings(dst, a []string) []string { t := getTokenizer() m := t.m @@ -17,17 +18,11 @@ func tokenizeStrings(dst, a []string) []string { } tokenizeString(m, s) } - dstLen := len(dst) for k := range t.m { dst = append(dst, k) } putTokenizer(t) - // Sort tokens with zero memory allocations - ss := getStringsSorter(dst[dstLen:]) - sort.Sort(ss) - putStringsSorter(ss) - return dst } @@ -90,51 +85,13 @@ func putTokenizer(t *tokenizer) { var tokenizerPool sync.Pool -type stringsSorter struct { - a []string -} - -func (ss *stringsSorter) Len() int { - return len(ss.a) -} -func (ss *stringsSorter) Swap(i, j int) { - a := ss.a - a[i], a[j] = a[j], a[i] -} -func (ss *stringsSorter) Less(i, j int) bool { - a := ss.a - return a[i] < a[j] -} - -func getStringsSorter(a []string) *stringsSorter { - v := stringsSorterPool.Get() - if v == nil { - return &stringsSorter{ - a: a, - } - } - ss := v.(*stringsSorter) - ss.a = a - return ss -} - -func putStringsSorter(ss *stringsSorter) { - ss.a = nil - stringsSorterPool.Put(ss) -} - -var stringsSorterPool sync.Pool - type tokensBuf struct { A []string } func (tb *tokensBuf) reset() { - a := tb.A - for i := range a { - a[i] = "" - } - tb.A = a[:0] + clear(tb.A) + tb.A = tb.A[:0] } func getTokensBuf() *tokensBuf {