This commit is contained in:
Aliaksandr Valialkin 2024-04-27 21:15:56 +02:00
parent 56d6496319
commit f9d0b21bb9
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
6 changed files with 71 additions and 14 deletions

View file

@ -24,6 +24,8 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
* FEATURE: add support for calculating the number of matching logs and the number of logs with non-empty [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Grouping by arbitrary set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) is supported. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stats) for details. * FEATURE: add support for calculating the number of matching logs and the number of logs with non-empty [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Grouping by arbitrary set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) is supported. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stats) for details.
* FEATURE: add support for limiting the number of returned results. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#limiters). * FEATURE: add support for limiting the number of returned results. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#limiters).
* FEATURE: optimize performance for [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/), which contains multiple filters for [words](https://docs.victoriametrics.com/victorialogs/logsql/#word-filter) or [phrases](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter) delimited with [`AND` operator](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter). For example, `foo AND bar` query must find [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with `foo` and `bar` words at faster speed. * FEATURE: optimize performance for [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/), which contains multiple filters for [words](https://docs.victoriametrics.com/victorialogs/logsql/#word-filter) or [phrases](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter) delimited with [`AND` operator](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter). For example, `foo AND bar` query must find [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with `foo` and `bar` words at faster speed.
* FEATURE: allow using `_` inside numbers. For example, `score:range[1_000, 5_000_000]` for [`range` filter](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter).
* FEATURE: allow numbers in hexadecimal and binary form. For example, `response_size:range[0xff, 0b10001101101]` for [`range` filter](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter).
* BUGFIX: prevent from additional CPU usage for up to a few seconds after canceling the query. * BUGFIX: prevent from additional CPU usage for up to a few seconds after canceling the query.
* BUGFIX: prevent from returning log entries with emtpy `_stream` field in the form `"_stream":""` in [search query results](https://docs.victoriametrics.com/victorialogs/querying/). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6042). * BUGFIX: prevent from returning log entries with emtpy `_stream` field in the form `"_stream":""` in [search query results](https://docs.victoriametrics.com/victorialogs/querying/). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6042).

View file

@ -827,6 +827,12 @@ parentheses with square brackets. For example:
- `range(1, 10]` includes `10` in the matching range - `range(1, 10]` includes `10` in the matching range
- `range[1, 10]` includes `1` and `10` in the matching range - `range[1, 10]` includes `1` and `10` in the matching range
The range boundaries can be expressed in the following forms:
- Hexadecimal form. For example, `range(0xff, 0xABCD)`.
- Binary form. Form example, `range(0b100110, 0b11111101)`
- Integer form with `_` delimiters for better readability. For example, `range(1_000, 2_345_678)`.
Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
with non-numeric values alongside numeric values. For example, `range(1, 10)` doesn't match `the request took 4.2 seconds` with non-numeric values alongside numeric values. For example, `range(1, 10)` doesn't match `the request took 4.2 seconds`
[log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), since the `4.2` number is surrounded by other text. [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), since the `4.2` number is surrounded by other text.
@ -938,6 +944,19 @@ This query doesn't match the following log messages:
- `foo`, since it is too short - `foo`, since it is too short
- `foo bar baz abc`, sinc it is too long - `foo bar baz abc`, sinc it is too long
It is possible to use `inf` as the upper bound. For example, the following query matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
with the length bigger or equal to 5 chars:
```logsql
len_range(5, inf)
```
The range boundaries can be expressed in the following forms:
- Hexadecimal form. For example, `len_range(0xff, 0xABCD)`.
- Binary form. Form example, `len_range(0b100110, 0b11111101)`
- Integer form with `_` delimiters for better readability. For example, `len_range(1_000, 2_345_678)`.
By default the `len_range()` is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). By default the `len_range()` is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Put the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `len_range()` in order to apply Put the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `len_range()` in order to apply
the filter to the needed field. For example, the following query matches log entries with the `foo` field length in the range `[10, 20]` chars: the filter to the needed field. For example, the following query matches log entries with the `foo` field length in the range `[10, 20]` chars:

View file

@ -1083,10 +1083,12 @@ type lenRangeFilter struct {
fieldName string fieldName string
minLen uint64 minLen uint64
maxLen uint64 maxLen uint64
stringRepr string
} }
func (rf *lenRangeFilter) String() string { func (rf *lenRangeFilter) String() string {
return quoteFieldNameIfNeeded(rf.fieldName) + fmt.Sprintf("len_range(%d,%d)", rf.minLen, rf.maxLen) return quoteFieldNameIfNeeded(rf.fieldName) + "len_range" + rf.stringRepr
} }
func (rf *lenRangeFilter) apply(bs *blockSearch, bm *filterBitmap) { func (rf *lenRangeFilter) apply(bs *blockSearch, bm *filterBitmap) {

View file

@ -522,18 +522,21 @@ func parseLenRangeFilter(lex *lexer, fieldName string) (filter, error) {
if len(args) != 2 { if len(args) != 2 {
return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 2", funcName, len(args)) return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 2", funcName, len(args))
} }
minLen, err := strconv.ParseUint(args[0], 10, 64) minLen, err := parseUint(args[0])
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse minLen at %s(): %w", funcName, err) return nil, fmt.Errorf("cannot parse minLen at %s(): %w", funcName, err)
} }
maxLen, err := strconv.ParseUint(args[1], 10, 64) maxLen, err := parseUint(args[1])
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse maxLen at %s(): %w", funcName, err) return nil, fmt.Errorf("cannot parse maxLen at %s(): %w", funcName, err)
} }
stringRepr := "(" + args[0] + ", " + args[1] + ")"
rf := &lenRangeFilter{ rf := &lenRangeFilter{
fieldName: fieldName, fieldName: fieldName,
minLen: minLen, minLen: minLen,
maxLen: maxLen, maxLen: maxLen,
stringRepr: stringRepr,
} }
return rf, nil return rf, nil
}) })
@ -737,6 +740,12 @@ func parseFloat64(lex *lexer) (float64, string, error) {
s := getCompoundToken(lex) s := getCompoundToken(lex)
f, err := strconv.ParseFloat(s, 64) f, err := strconv.ParseFloat(s, 64)
if err != nil { if err != nil {
// Try parsing s as integer.
// This handles 0x..., 0b... and 0... prefixes.
n, err := parseInt(s)
if err == nil {
return float64(n), s, nil
}
return 0, "", fmt.Errorf("cannot parse %q as float64: %w", lex.token, err) return 0, "", fmt.Errorf("cannot parse %q as float64: %w", lex.token, err)
} }
return f, s, nil return f, s, nil
@ -1170,3 +1179,21 @@ var reservedKeywords = func() map[string]struct{} {
} }
return m return m
}() }()
func parseUint(s string) (uint64, error) {
if strings.EqualFold(s, "inf") || strings.EqualFold(s, "+inf") {
return math.MaxUint64, nil
}
return strconv.ParseUint(s, 0, 64)
}
func parseInt(s string) (int64, error) {
switch {
case strings.EqualFold(s, "inf"), strings.EqualFold(s, "+inf"):
return math.MaxInt64, nil
case strings.EqualFold(s, "-inf"):
return math.MinInt64, nil
default:
return strconv.ParseInt(s, 0, 64)
}
}

View file

@ -745,6 +745,10 @@ func TestParseQuerySuccess(t *testing.T) {
// len_range filter // len_range filter
f(`len_range(10, 20)`, `len_range(10, 20)`) f(`len_range(10, 20)`, `len_range(10, 20)`)
f(`foo:len_range("10", 20, )`, `foo:len_range(10, 20)`) f(`foo:len_range("10", 20, )`, `foo:len_range(10, 20)`)
f(`len_RANGe(10, inf)`, `len_range(10, inf)`)
f(`len_range(10, +InF)`, `len_range(10, +InF)`)
f(`len_range(10, 1_000_000)`, `len_range(10, 1_000_000)`)
f(`len_range(0x10,0b100101)`, `len_range(0x10, 0b100101)`)
// range filter // range filter
f(`range(1.234, 5656.43454)`, `range(1.234, 5656.43454)`) f(`range(1.234, 5656.43454)`, `range(1.234, 5656.43454)`)
@ -753,6 +757,9 @@ func TestParseQuerySuccess(t *testing.T) {
f(`range[123, 456)`, `range[123, 456)`) f(`range[123, 456)`, `range[123, 456)`)
f(`range(123, 445]`, `range(123, 445]`) f(`range(123, 445]`, `range(123, 445]`)
f(`range("1.234e-4", -23)`, `range(1.234e-4, -23)`) f(`range("1.234e-4", -23)`, `range(1.234e-4, -23)`)
f(`range(1_000, 0o7532)`, `range(1_000, 0o7532)`)
f(`range(0x1ff, inf)`, `range(0x1ff, inf)`)
f(`range(-INF,+inF)`, `range(-INF, +inF)`)
// re filter // re filter
f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`) f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`)

View file

@ -829,7 +829,7 @@ func parseHeadPipe(lex *lexer) (*headPipe, error) {
if !lex.mustNextToken() { if !lex.mustNextToken() {
return nil, fmt.Errorf("missing the number of head rows to return") return nil, fmt.Errorf("missing the number of head rows to return")
} }
n, err := strconv.ParseUint(lex.token, 10, 64) n, err := parseUint(lex.token)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse the number of head rows to return %q: %w", lex.token, err) return nil, fmt.Errorf("cannot parse the number of head rows to return %q: %w", lex.token, err)
} }
@ -896,7 +896,7 @@ func parseSkipPipe(lex *lexer) (*skipPipe, error) {
if !lex.mustNextToken() { if !lex.mustNextToken() {
return nil, fmt.Errorf("missing the number of rows to skip") return nil, fmt.Errorf("missing the number of rows to skip")
} }
n, err := strconv.ParseUint(lex.token, 10, 64) n, err := parseUint(lex.token)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse the number of rows to skip %q: %w", lex.token, err) return nil, fmt.Errorf("cannot parse the number of rows to skip %q: %w", lex.token, err)
} }