mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-20 15:16:42 +00:00
wip
This commit is contained in:
parent
ecd51e48ec
commit
09e81cb5aa
6 changed files with 239 additions and 28 deletions
|
@ -1175,7 +1175,10 @@ See also:
|
|||
|
||||
### sort pipe
|
||||
|
||||
By default logs are selected in arbitrary order because of performance reasons. If logs must be sorted, then `| sort by (field1, ..., fieldN)` [pipe](#pipes) must be used.
|
||||
By default logs are selected in arbitrary order because of performance reasons. If logs must be sorted, then `| sort by (field1, ..., fieldN)` [pipe](#pipes) can be used.
|
||||
The returned logs are sorted by the given [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||
using [natural sorting](https://en.wikipedia.org/wiki/Natural_sort_order).
|
||||
|
||||
For example, the following query returns logs for the last 5 minutes sorted by [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
|
||||
and then by [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field):
|
||||
|
||||
|
@ -1210,7 +1213,7 @@ See also:
|
|||
### uniq pipe
|
||||
|
||||
`| uniq ...` pipe allows returning only unique results over the selected logs. For example, the following LogsQL query
|
||||
returns uniq values for `ip` [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
returns unique values for `ip` [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
over logs for the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
|
@ -1536,7 +1539,7 @@ See also:
|
|||
|
||||
`uniq_values(field1, ..., fieldN)` [stats pipe](#stats-pipe) returns the unique non-empty values across
|
||||
the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
The returned values are sorted and encoded in JSON array.
|
||||
The returned values are encoded in JSON array. The order of the returned values is arbitrary.
|
||||
|
||||
For example, the following query returns unique non-empty values for the `ip` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
over logs for the last 5 minutes:
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
|
||||
)
|
||||
|
||||
// pipeSort processes '| sort ...' queries.
|
||||
|
@ -639,9 +640,9 @@ func sortBlockLess(shardA *pipeSortProcessorShard, rowIdxA int, shardB *pipeSort
|
|||
continue
|
||||
}
|
||||
if isDesc {
|
||||
return sB < sA
|
||||
return stringsutil.LessNatural(sB, sA)
|
||||
}
|
||||
return sA < sB
|
||||
return stringsutil.LessNatural(sA, sB)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -202,12 +202,10 @@ func (sup *statsUniqValuesProcessor) finalizeStats() string {
|
|||
return "[]"
|
||||
}
|
||||
|
||||
// Sort unique items
|
||||
items := make([]string, 0, len(sup.m))
|
||||
for k := range sup.m {
|
||||
items = append(items, k)
|
||||
}
|
||||
slices.SortFunc(items, compareValues)
|
||||
|
||||
if limit := sup.su.limit; limit > 0 && uint64(len(items)) > limit {
|
||||
items = items[:limit]
|
||||
|
@ -242,27 +240,6 @@ func marshalJSONArray(items []string) string {
|
|||
return bytesutil.ToUnsafeString(b)
|
||||
}
|
||||
|
||||
func compareValues(a, b string) int {
|
||||
fA, okA := tryParseFloat64(a)
|
||||
fB, okB := tryParseFloat64(b)
|
||||
if okA && okB {
|
||||
if fA == fB {
|
||||
return 0
|
||||
}
|
||||
if fA < fB {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
if okA {
|
||||
return -1
|
||||
}
|
||||
if okB {
|
||||
return 1
|
||||
}
|
||||
return strings.Compare(a, b)
|
||||
}
|
||||
|
||||
func parseStatsUniqValues(lex *lexer) (*statsUniqValues, error) {
|
||||
fields, err := parseFieldNamesForStatsFunc(lex, "uniq_values")
|
||||
if err != nil {
|
||||
|
|
111
lib/stringsutil/less_natural.go
Normal file
111
lib/stringsutil/less_natural.go
Normal file
|
@ -0,0 +1,111 @@
|
|||
package stringsutil
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
// LessNatural returns true if a is less than b using natural sort comparison.
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Natural_sort_order
|
||||
func LessNatural(a, b string) bool {
|
||||
isReverse := false
|
||||
for {
|
||||
if len(a) > len(b) {
|
||||
a, b = b, a
|
||||
isReverse = !isReverse
|
||||
}
|
||||
|
||||
// Skip common prefix except of decimal digits
|
||||
i := 0
|
||||
for i < len(a) {
|
||||
cA := a[i]
|
||||
cB := b[i]
|
||||
|
||||
if cA >= '0' && cA <= '9' {
|
||||
if cB >= '0' && cB <= '9' {
|
||||
break
|
||||
}
|
||||
return !isReverse
|
||||
}
|
||||
if cB >= '0' && cB <= '9' {
|
||||
return isReverse
|
||||
}
|
||||
if cA != cB {
|
||||
// This should work properly for utf8 bytes in the middle of encoded unicode char, since:
|
||||
// - utf8 bytes for multi-byte chars are bigger than decimal digit chars
|
||||
// - sorting of utf8-encoded strings works properly thanks to utf8 properties
|
||||
if isReverse {
|
||||
return cB < cA
|
||||
}
|
||||
return cA < cB
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
a = a[i:]
|
||||
b = b[i:]
|
||||
if len(a) == 0 {
|
||||
if isReverse {
|
||||
return false
|
||||
}
|
||||
return len(b) > 0
|
||||
}
|
||||
|
||||
// Collect digit prefixes for a and b and then compare them.
|
||||
|
||||
iA := 1
|
||||
nA := uint64(a[0] - '0')
|
||||
for iA < len(a) {
|
||||
c := a[iA]
|
||||
if c < '0' || c > '9' {
|
||||
break
|
||||
}
|
||||
if nA > (math.MaxUint64-9)/10 {
|
||||
// Too big integer. Fall back to string comparison
|
||||
if isReverse {
|
||||
return b < a
|
||||
}
|
||||
return a < b
|
||||
}
|
||||
nA *= 10
|
||||
nA += uint64(c - '0')
|
||||
iA++
|
||||
}
|
||||
|
||||
iB := 1
|
||||
nB := uint64(b[0] - '0')
|
||||
for iB < len(b) {
|
||||
c := b[iB]
|
||||
if c < '0' || c > '9' {
|
||||
break
|
||||
}
|
||||
if nB > (math.MaxUint64-9)/10 {
|
||||
// Too big integer. Fall back to string comparison
|
||||
if isReverse {
|
||||
return b < a
|
||||
}
|
||||
return a < b
|
||||
}
|
||||
nB *= 10
|
||||
nB += uint64(c - '0')
|
||||
iB++
|
||||
}
|
||||
|
||||
if nA != nB {
|
||||
if isReverse {
|
||||
return nB < nA
|
||||
}
|
||||
return nA < nB
|
||||
}
|
||||
|
||||
if iA != iB {
|
||||
if isReverse {
|
||||
return iB < iA
|
||||
}
|
||||
return iA < iB
|
||||
}
|
||||
|
||||
a = a[iA:]
|
||||
b = b[iB:]
|
||||
}
|
||||
}
|
90
lib/stringsutil/less_natural_test.go
Normal file
90
lib/stringsutil/less_natural_test.go
Normal file
|
@ -0,0 +1,90 @@
|
|||
package stringsutil
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLessNatural(t *testing.T) {
|
||||
f := func(a, b string, resultExpected bool) {
|
||||
t.Helper()
|
||||
|
||||
result := LessNatural(a, b)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result for LessNatural(%q, %q); got %v; want %v", a, b, result, resultExpected)
|
||||
}
|
||||
}
|
||||
|
||||
// comparison with empty string
|
||||
f("", "", false)
|
||||
f("", "foo", true)
|
||||
f("foo", "", false)
|
||||
f("", "123", true)
|
||||
f("123", "", false)
|
||||
|
||||
// identical values
|
||||
f("foo", "foo", false)
|
||||
f("123", "123", false)
|
||||
f("foo123", "foo123", false)
|
||||
f("123foo", "123foo", false)
|
||||
f("000", "000", false)
|
||||
f("00123", "00123", false)
|
||||
f("00foo", "00foo", false)
|
||||
f("abc00foo0123", "abc00foo0123", false)
|
||||
|
||||
// identical values with different number of zeroes in front of them
|
||||
f("00123", "0123", false)
|
||||
f("0123", "00123", true)
|
||||
|
||||
// numeric comparsion
|
||||
f("123", "99", false)
|
||||
f("99", "123", true)
|
||||
|
||||
// floating-point comparsion (works unexpectedly - this is OK for natural sort order)
|
||||
f("1.23", "1.123", true)
|
||||
f("1.123", "1.23", false)
|
||||
|
||||
// non-numeric comparison
|
||||
f("foo", "bar", false)
|
||||
f("fo", "bar", false)
|
||||
f("bar", "foo", true)
|
||||
f("bar", "fo", true)
|
||||
|
||||
// comparison with common non-numeric prefix
|
||||
f("abc_foo", "abc_bar", false)
|
||||
f("abc_bar", "abc_foo", true)
|
||||
f("abc_foo", "abc_", false)
|
||||
f("abc_", "abc_foo", true)
|
||||
f("abc_123", "abc_foo", true)
|
||||
f("abc_foo", "abc_123", false)
|
||||
|
||||
// comparison with common numeric prefix
|
||||
f("123foo", "123bar", false)
|
||||
f("123bar", "123foo", true)
|
||||
f("123", "123bar", true)
|
||||
f("123bar", "123", false)
|
||||
f("123_456", "123_78", false)
|
||||
f("123_78", "123_456", true)
|
||||
|
||||
// too big integers - fall back to string order
|
||||
f("1234567890123456789012345", "1234567890123456789012345", false)
|
||||
f("1234567890123456789012345", "123456789012345678901234", false)
|
||||
f("123456789012345678901234", "1234567890123456789012345", true)
|
||||
f("193456789012345678901234", "1234567890123456789012345", false)
|
||||
f("123456789012345678901234", "1934567890123456789012345", true)
|
||||
f("1934", "1234567890123456789012345", false)
|
||||
f("1234567890123456789012345", "1934", true)
|
||||
|
||||
// integers with many zeroes in front
|
||||
f("00000000000000000000000000123", "0000000000000000000000000045", false)
|
||||
f("0000000000000000000000000045", "00000000000000000000000000123", true)
|
||||
|
||||
// unicode strings
|
||||
f("бвг", "мирг", true)
|
||||
f("мирг", "бвг", false)
|
||||
f("abcde", "мирг", true)
|
||||
f("мирг", "abcde", false)
|
||||
f("123", "мирг", true)
|
||||
f("мирг", "123", false)
|
||||
f("12345", "мирг", true)
|
||||
f("мирг", "12345", false)
|
||||
}
|
29
lib/stringsutil/less_natural_timing_test.go
Normal file
29
lib/stringsutil/less_natural_timing_test.go
Normal file
|
@ -0,0 +1,29 @@
|
|||
package stringsutil
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkLessNatural(b *testing.B) {
|
||||
b.Run("distinct_string_prefixes", func(b *testing.B) {
|
||||
benchmarkLessNatural(b, []string{
|
||||
"aaa", "bbb", "ccc", "ddd", "eee", "fff",
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkLessNatural(b *testing.B, a []string) {
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(len(a) - 1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
n := uint64(0)
|
||||
for pb.Next() {
|
||||
for i := 1; i < len(a); i++ {
|
||||
if LessNatural(a[i-1], a[i]) {
|
||||
n++
|
||||
}
|
||||
}
|
||||
}
|
||||
GlobalSink.Add(n)
|
||||
})
|
||||
}
|
Loading…
Reference in a new issue