This commit is contained in:
Aliaksandr Valialkin 2024-04-27 21:15:56 +02:00
parent 56d6496319
commit f9d0b21bb9
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
6 changed files with 71 additions and 14 deletions

View file

@ -24,6 +24,8 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
* FEATURE: add support for calculating the number of matching logs and the number of logs with non-empty [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Grouping by arbitrary set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) is supported. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stats) for details.
* FEATURE: add support for limiting the number of returned results. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#limiters).
* FEATURE: optimize performance for [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/), which contains multiple filters for [words](https://docs.victoriametrics.com/victorialogs/logsql/#word-filter) or [phrases](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter) delimited with [`AND` operator](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter). For example, `foo AND bar` query must find [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with `foo` and `bar` words at faster speed.
* FEATURE: allow using `_` inside numbers. For example, `score:range[1_000, 5_000_000]` for [`range` filter](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter).
* FEATURE: allow numbers in hexadecimal and binary form. For example, `response_size:range[0xff, 0b10001101101]` for [`range` filter](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter).
* BUGFIX: prevent from additional CPU usage for up to a few seconds after canceling the query.
* BUGFIX: prevent from returning log entries with emtpy `_stream` field in the form `"_stream":""` in [search query results](https://docs.victoriametrics.com/victorialogs/querying/). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6042).

View file

@ -827,6 +827,12 @@ parentheses with square brackets. For example:
- `range(1, 10]` includes `10` in the matching range
- `range[1, 10]` includes `1` and `10` in the matching range
The range boundaries can be expressed in the following forms:
- Hexadecimal form. For example, `range(0xff, 0xABCD)`.
- Binary form. Form example, `range(0b100110, 0b11111101)`
- Integer form with `_` delimiters for better readability. For example, `range(1_000, 2_345_678)`.
Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
with non-numeric values alongside numeric values. For example, `range(1, 10)` doesn't match `the request took 4.2 seconds`
[log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), since the `4.2` number is surrounded by other text.
@ -938,6 +944,19 @@ This query doesn't match the following log messages:
- `foo`, since it is too short
- `foo bar baz abc`, sinc it is too long
It is possible to use `inf` as the upper bound. For example, the following query matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
with the length bigger or equal to 5 chars:
```logsql
len_range(5, inf)
```
The range boundaries can be expressed in the following forms:
- Hexadecimal form. For example, `len_range(0xff, 0xABCD)`.
- Binary form. Form example, `len_range(0b100110, 0b11111101)`
- Integer form with `_` delimiters for better readability. For example, `len_range(1_000, 2_345_678)`.
By default the `len_range()` is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Put the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `len_range()` in order to apply
the filter to the needed field. For example, the following query matches log entries with the `foo` field length in the range `[10, 20]` chars:

View file

@ -1083,10 +1083,12 @@ type lenRangeFilter struct {
fieldName string
minLen uint64
maxLen uint64
stringRepr string
}
func (rf *lenRangeFilter) String() string {
return quoteFieldNameIfNeeded(rf.fieldName) + fmt.Sprintf("len_range(%d,%d)", rf.minLen, rf.maxLen)
return quoteFieldNameIfNeeded(rf.fieldName) + "len_range" + rf.stringRepr
}
func (rf *lenRangeFilter) apply(bs *blockSearch, bm *filterBitmap) {

View file

@ -522,18 +522,21 @@ func parseLenRangeFilter(lex *lexer, fieldName string) (filter, error) {
if len(args) != 2 {
return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 2", funcName, len(args))
}
minLen, err := strconv.ParseUint(args[0], 10, 64)
minLen, err := parseUint(args[0])
if err != nil {
return nil, fmt.Errorf("cannot parse minLen at %s(): %w", funcName, err)
}
maxLen, err := strconv.ParseUint(args[1], 10, 64)
maxLen, err := parseUint(args[1])
if err != nil {
return nil, fmt.Errorf("cannot parse maxLen at %s(): %w", funcName, err)
}
stringRepr := "(" + args[0] + ", " + args[1] + ")"
rf := &lenRangeFilter{
fieldName: fieldName,
minLen: minLen,
maxLen: maxLen,
stringRepr: stringRepr,
}
return rf, nil
})
@ -737,6 +740,12 @@ func parseFloat64(lex *lexer) (float64, string, error) {
s := getCompoundToken(lex)
f, err := strconv.ParseFloat(s, 64)
if err != nil {
// Try parsing s as integer.
// This handles 0x..., 0b... and 0... prefixes.
n, err := parseInt(s)
if err == nil {
return float64(n), s, nil
}
return 0, "", fmt.Errorf("cannot parse %q as float64: %w", lex.token, err)
}
return f, s, nil
@ -1170,3 +1179,21 @@ var reservedKeywords = func() map[string]struct{} {
}
return m
}()
func parseUint(s string) (uint64, error) {
if strings.EqualFold(s, "inf") || strings.EqualFold(s, "+inf") {
return math.MaxUint64, nil
}
return strconv.ParseUint(s, 0, 64)
}
func parseInt(s string) (int64, error) {
switch {
case strings.EqualFold(s, "inf"), strings.EqualFold(s, "+inf"):
return math.MaxInt64, nil
case strings.EqualFold(s, "-inf"):
return math.MinInt64, nil
default:
return strconv.ParseInt(s, 0, 64)
}
}

View file

@ -745,6 +745,10 @@ func TestParseQuerySuccess(t *testing.T) {
// len_range filter
f(`len_range(10, 20)`, `len_range(10, 20)`)
f(`foo:len_range("10", 20, )`, `foo:len_range(10, 20)`)
f(`len_RANGe(10, inf)`, `len_range(10, inf)`)
f(`len_range(10, +InF)`, `len_range(10, +InF)`)
f(`len_range(10, 1_000_000)`, `len_range(10, 1_000_000)`)
f(`len_range(0x10,0b100101)`, `len_range(0x10, 0b100101)`)
// range filter
f(`range(1.234, 5656.43454)`, `range(1.234, 5656.43454)`)
@ -753,6 +757,9 @@ func TestParseQuerySuccess(t *testing.T) {
f(`range[123, 456)`, `range[123, 456)`)
f(`range(123, 445]`, `range(123, 445]`)
f(`range("1.234e-4", -23)`, `range(1.234e-4, -23)`)
f(`range(1_000, 0o7532)`, `range(1_000, 0o7532)`)
f(`range(0x1ff, inf)`, `range(0x1ff, inf)`)
f(`range(-INF,+inF)`, `range(-INF, +inF)`)
// re filter
f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`)

View file

@ -829,7 +829,7 @@ func parseHeadPipe(lex *lexer) (*headPipe, error) {
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing the number of head rows to return")
}
n, err := strconv.ParseUint(lex.token, 10, 64)
n, err := parseUint(lex.token)
if err != nil {
return nil, fmt.Errorf("cannot parse the number of head rows to return %q: %w", lex.token, err)
}
@ -896,7 +896,7 @@ func parseSkipPipe(lex *lexer) (*skipPipe, error) {
if !lex.mustNextToken() {
return nil, fmt.Errorf("missing the number of rows to skip")
}
n, err := strconv.ParseUint(lex.token, 10, 64)
n, err := parseUint(lex.token)
if err != nil {
return nil, fmt.Errorf("cannot parse the number of rows to skip %q: %w", lex.token, err)
}