diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index eec03ea26..9ad8b073b 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -20,6 +20,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip * FEATURE: allow [`head` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe) without number. For example, `error | head`. In this case 10 last values are returned as `head` Unix command does by default. +* FEATURE: allow using [comparison filters](https://docs.victoriametrics.com/victorialogs/logsql/#range-comparison-filters) with strings. For example, `some_text_field:>="foo"` matches [log entries](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with `some_text_field` field values bigger or equal to `foo`. ## [v0.12.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.12.1-victorialogs) diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index b1455e9e0..3a8c41da6 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -255,6 +255,7 @@ The list of LogsQL filters: - [Phrase filter](#phrase-filter) - matches logs with the given phrase - [Prefix filter](#prefix-filter) - matches logs with the given word prefix or phrase prefix - [Substring filter](#substring-filter) - matches logs with the given substring +- [Range comparison filter](#range-comparison-filter) - matches logs with field values in the provided range - [Empty value filter](#empty-value-filter) - matches logs without the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) - [Any value filter](#any-value-filter) - matches logs with the given non-empty [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) - [Exact filter](#exact-filter) - matches logs with the exact value @@ -576,6 +577,26 @@ See also: - [Regexp filter](#regexp-filter) +### Range comparison filter + +LogsQL supports `field:>X`, `field:>=X`, `field:10KiB +``` + +The following query returns logs with `user` field containing string values smaller than 'John`: + +```logsql +username:<"John" +``` + +See also: + +- [String range filter](#string-range-filter) +- [Range filter](#range-filter) + ### Empty value filter Sometimes it is needed to find log entries without the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). @@ -906,18 +927,12 @@ for searching for log entries with request durations exceeding 4.2 seconds: request.duration:range(4.2, Inf) ``` -This query can be shortened to: +This query can be shortened to by using [range comparison filter](#range-comparison-filter): ```logsql request.duration:>4.2 ``` -The following query returns logs with request durations smaller or equal to 1.5 seconds: - -```logsql -request.duration:<=1.5 -``` - The lower and the upper bounds of the `range(lower, upper)` are excluded by default. If they must be included, then substitute the corresponding parentheses with square brackets. For example: @@ -941,6 +956,7 @@ Performance tips: See also: +- [Range comparison filter](#range-comparison-filter) - [IPv4 range filter](#ipv4-range-filter) - [String range filter](#string-range-filter) - [Length range filter](#length-range-filter) @@ -1012,6 +1028,7 @@ For example, the `user.name:string_range(C, E)` would match `user.name` fields, See also: +- [Range comparison filter](#range-comparison-filter) - [Range filter](#range-filter) - [IPv4 range filter](#ipv4-range-filter) - [Length range filter](#length-range-filter) diff --git a/lib/logstorage/filter_string_range.go b/lib/logstorage/filter_string_range.go index 4ab081b60..095159715 100644 --- a/lib/logstorage/filter_string_range.go +++ b/lib/logstorage/filter_string_range.go @@ -1,11 +1,11 @@ package logstorage import ( - "fmt" - "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) +var maxStringRangeValue = string([]byte{255, 255, 255, 255}) + // filterStringRange matches tie given string range [minValue..maxValue) // // Note that the minValue is included in the range, while the maxValue isn't included in the range. @@ -16,10 +16,12 @@ type filterStringRange struct { fieldName string minValue string maxValue string + + stringRepr string } func (fr *filterStringRange) String() string { - return fmt.Sprintf("%sstring_range(%s, %s)", quoteFieldNameIfNeeded(fr.fieldName), quoteTokenIfNeeded(fr.minValue), quoteTokenIfNeeded(fr.maxValue)) + return quoteFieldNameIfNeeded(fr.fieldName) + fr.stringRepr } func (fr *filterStringRange) updateNeededFields(neededFields fieldsSet) { diff --git a/lib/logstorage/parser.go b/lib/logstorage/parser.go index 0220d9655..5ea4e164e 100644 --- a/lib/logstorage/parser.go +++ b/lib/logstorage/parser.go @@ -74,6 +74,11 @@ func (lex *lexer) isQuotedToken() bool { return lex.token != lex.rawToken } +func (lex *lexer) isNumber() bool { + s := lex.rawToken + lex.s + return isNumberPrefix(s) +} + func (lex *lexer) isPrevToken(tokens ...string) bool { for _, token := range tokens { if token == lex.prevToken { @@ -855,6 +860,8 @@ func parseFilterStringRange(lex *lexer, fieldName string) (filter, error) { fieldName: fieldName, minValue: args[0], maxValue: args[1], + + stringRepr: fmt.Sprintf("string_range(%s, %s)", quoteTokenIfNeeded(args[0]), quoteTokenIfNeeded(args[1])), } return fr, nil }) @@ -1091,6 +1098,15 @@ func parseFilterGT(lex *lexer, fieldName string) (filter, error) { op = ">=" } + if !lex.isNumber() { + lexState := lex.backupState() + fr := tryParseFilterGTString(lex, fieldName, op, includeMinValue) + if fr != nil { + return fr, nil + } + lex.restoreState(lexState) + } + minValue, fStr, err := parseFloat64(lex) if err != nil { return nil, fmt.Errorf("cannot parse number after '%s': %w", op, err) @@ -1120,6 +1136,15 @@ func parseFilterLT(lex *lexer, fieldName string) (filter, error) { op = "<=" } + if !lex.isNumber() { + lexState := lex.backupState() + fr := tryParseFilterLTString(lex, fieldName, op, includeMaxValue) + if fr != nil { + return fr, nil + } + lex.restoreState(lexState) + } + maxValue, fStr, err := parseFloat64(lex) if err != nil { return nil, fmt.Errorf("cannot parse number after '%s': %w", op, err) @@ -1138,6 +1163,43 @@ func parseFilterLT(lex *lexer, fieldName string) (filter, error) { return fr, nil } +func tryParseFilterGTString(lex *lexer, fieldName, op string, includeMinValue bool) filter { + minValueOrig, err := getCompoundToken(lex) + if err != nil { + return nil + } + minValue := minValueOrig + if !includeMinValue { + minValue = string(append([]byte(minValue), 0)) + } + fr := &filterStringRange{ + fieldName: fieldName, + minValue: minValue, + maxValue: maxStringRangeValue, + + stringRepr: op + quoteStringTokenIfNeeded(minValueOrig), + } + return fr +} + +func tryParseFilterLTString(lex *lexer, fieldName, op string, includeMaxValue bool) filter { + maxValueOrig, err := getCompoundToken(lex) + if err != nil { + return nil + } + maxValue := maxValueOrig + if includeMaxValue { + maxValue = string(append([]byte(maxValue), 0)) + } + fr := &filterStringRange{ + fieldName: fieldName, + maxValue: maxValue, + + stringRepr: op + quoteStringTokenIfNeeded(maxValueOrig), + } + return fr +} + func parseFilterRange(lex *lexer, fieldName string) (filter, error) { funcName := lex.token lex.nextToken() @@ -1495,6 +1557,13 @@ func parseTime(lex *lexer) (int64, string, error) { return int64(math.Round(t*1e3)) * 1e6, s, nil } +func quoteStringTokenIfNeeded(s string) string { + if !needQuoteStringToken(s) { + return s + } + return strconv.Quote(s) +} + func quoteTokenIfNeeded(s string) string { if !needQuoteToken(s) { return s @@ -1502,6 +1571,23 @@ func quoteTokenIfNeeded(s string) string { return strconv.Quote(s) } +func needQuoteStringToken(s string) bool { + return isNumberPrefix(s) || needQuoteToken(s) +} + +func isNumberPrefix(s string) bool { + if len(s) == 0 { + return false + } + if s[0] == '-' || s[0] == '+' { + s = s[1:] + if len(s) == 0 { + return false + } + } + return s[0] >= '0' && s[0] <= '9' +} + func needQuoteToken(s string) bool { sLower := strings.ToLower(s) if _, ok := reservedKeywords[sLower]; ok { diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go index 4ba801927..e4ec7cc0d 100644 --- a/lib/logstorage/parser_test.go +++ b/lib/logstorage/parser_test.go @@ -353,6 +353,10 @@ func TestParseFilterStringRange(t *testing.T) { f("string_range(foo, bar)", ``, "foo", "bar") f(`abc:string_range("foo,bar", "baz) !")`, `abc`, `foo,bar`, `baz) !`) + f(">foo", ``, "foo\x00", maxStringRangeValue) + f("x:>=foo", `x`, "foo", maxStringRangeValue) + f("x:=10.43`, `foo`, 10.43, inf) f(`foo: >= -10.43`, `foo`, -10.43, inf) - f(`foo:<10.43`, `foo`, -inf, nextafter(10.43, -inf)) + f(`foo:<10.43K`, `foo`, -inf, nextafter(10_430, -inf)) f(`foo: < -10.43`, `foo`, -inf, nextafter(-10.43, -inf)) - f(`foo:<=10.43`, `foo`, -inf, 10.43) + f(`foo:<=10.43ms`, `foo`, -inf, 10_430_000) f(`foo: <= 10.43`, `foo`, -inf, 10.43) } @@ -802,6 +806,12 @@ func TestParseQuerySuccess(t *testing.T) { // string_range filter f(`string_range(foo, bar)`, `string_range(foo, bar)`) f(`foo:string_range("foo, bar", baz)`, `foo:string_range("foo, bar", baz)`) + f(`foo:>bar`, `foo:>bar`) + f(`foo:>"1234"`, `foo:>"1234"`) + f(`>="abc"`, `>=abc`) + f(`foo:(`) // missing filter f(`| fields *`)